- Add the ability to upload the files on an S3-ObjectStore.

- Change the server's port and the port of the controller-api.
- Update dependencies.
This commit is contained in:
Lampros Smyrnaios 2021-07-29 09:01:53 +03:00
parent 6307cda23a
commit 6cc2673fca
6 changed files with 15 additions and 10 deletions

View File

@ -1,11 +1,11 @@
buildscript { buildscript {
ext { ext {
springSecurityVersion = "5.5.0" springSecurityVersion = "5.5.1"
} }
} }
plugins { plugins {
id 'org.springframework.boot' version '2.5.0' id 'org.springframework.boot' version '2.5.3'
id 'io.spring.dependency-management' version '1.0.11.RELEASE' id 'io.spring.dependency-management' version '1.0.11.RELEASE'
id 'java' id 'java'
} }

View File

@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.0.2-bin.zip distributionUrl=https\://services.gradle.org/distributions/gradle-7.1.1-bin.zip
zipStoreBase=GRADLE_USER_HOME zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists zipStorePath=wrapper/dists

View File

@ -1,6 +1,6 @@
package eu.openaire.urls_worker.components; package eu.openaire.urls_worker.components;
import eu.openaire.urls_worker.plugins.publicationsRetriever.PublicationsRetrieverPlugin; import eu.openaire.urls_worker.plugins.PublicationsRetrieverPlugin;
import eu.openaire.urls_worker.util.AssignmentHandler; import eu.openaire.urls_worker.util.AssignmentHandler;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;

View File

@ -1,10 +1,11 @@
package eu.openaire.urls_worker.plugins.publicationsRetriever; package eu.openaire.urls_worker.plugins;
import com.google.common.hash.Hashing; import com.google.common.hash.Hashing;
import com.google.common.io.Files; import com.google.common.io.Files;
import edu.uci.ics.crawler4j.url.URLCanonicalizer; import edu.uci.ics.crawler4j.url.URLCanonicalizer;
import eu.openaire.publications_retriever.PublicationsRetriever; import eu.openaire.publications_retriever.PublicationsRetriever;
import eu.openaire.publications_retriever.util.file.FileUtils; import eu.openaire.publications_retriever.util.file.FileUtils;
import eu.openaire.publications_retriever.util.file.S3ObjectStoreMinIO;
import eu.openaire.publications_retriever.util.http.ConnSupportUtils; import eu.openaire.publications_retriever.util.http.ConnSupportUtils;
import eu.openaire.publications_retriever.util.http.HttpConnUtils; import eu.openaire.publications_retriever.util.http.HttpConnUtils;
import eu.openaire.publications_retriever.util.url.DataToBeLogged; import eu.openaire.publications_retriever.util.url.DataToBeLogged;
@ -12,7 +13,6 @@ import eu.openaire.publications_retriever.util.url.LoaderAndChecker;
import eu.openaire.publications_retriever.util.url.UrlUtils; import eu.openaire.publications_retriever.util.url.UrlUtils;
import eu.openaire.urls_worker.models.Assignment; import eu.openaire.urls_worker.models.Assignment;
import eu.openaire.urls_worker.models.Payload; import eu.openaire.urls_worker.models.Payload;
import eu.openaire.urls_worker.models.Task;
import eu.openaire.urls_worker.models.UrlReport; import eu.openaire.urls_worker.models.UrlReport;
import eu.openaire.urls_worker.util.AssignmentHandler; import eu.openaire.urls_worker.util.AssignmentHandler;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -50,8 +50,13 @@ public class PublicationsRetrieverPlugin {
LoaderAndChecker.retrieveDocuments = true; LoaderAndChecker.retrieveDocuments = true;
LoaderAndChecker.retrieveDatasets = false; LoaderAndChecker.retrieveDatasets = false;
FileUtils.shouldDownloadDocFiles = true; FileUtils.shouldDownloadDocFiles = true;
FileUtils.shouldUploadFilesToS3 = true;
FileUtils.docFileNameType = FileUtils.DocFileNameType.idName;
PublicationsRetriever.targetUrlType = "docUrl"; PublicationsRetriever.targetUrlType = "docUrl";
if ( FileUtils.shouldUploadFilesToS3 )
new S3ObjectStoreMinIO(); // Check here on how to create the credentials-file: https://github.com/LSmyrnaios/PublicationsRetriever/blob/master/README.md
int workerThreadsCount = Runtime.getRuntime().availableProcessors() * PublicationsRetriever.threadsMultiplier; int workerThreadsCount = Runtime.getRuntime().availableProcessors() * PublicationsRetriever.threadsMultiplier;
logger.info("Use " + workerThreadsCount + " worker-threads."); logger.info("Use " + workerThreadsCount + " worker-threads.");
PublicationsRetriever.executor = Executors.newFixedThreadPool(workerThreadsCount); PublicationsRetriever.executor = Executors.newFixedThreadPool(workerThreadsCount);

View File

@ -8,7 +8,7 @@ import eu.openaire.urls_worker.models.Assignment;
import eu.openaire.urls_worker.models.UrlReport; import eu.openaire.urls_worker.models.UrlReport;
import eu.openaire.urls_worker.payloads.requests.AssignmentRequest; import eu.openaire.urls_worker.payloads.requests.AssignmentRequest;
import eu.openaire.urls_worker.payloads.responces.WorkerReport; import eu.openaire.urls_worker.payloads.responces.WorkerReport;
import eu.openaire.urls_worker.plugins.publicationsRetriever.PublicationsRetrieverPlugin; import eu.openaire.urls_worker.plugins.PublicationsRetrieverPlugin;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.boot.web.client.RestTemplateBuilder; import org.springframework.boot.web.client.RestTemplateBuilder;
@ -31,7 +31,7 @@ public class AssignmentHandler {
public static AssignmentRequest requestAssignments() public static AssignmentRequest requestAssignments()
{ {
RestTemplate restTemplate = new RestTemplateBuilder().build(); RestTemplate restTemplate = new RestTemplateBuilder().build();
String url = "http://localhost:8080/api/urls/test?workerId=" + WorkerConstants.WORKER_ID + "&assignmentsLimit=" + WorkerConstants.ASSIGNMENTS_LIMIT; String url = "http://localhost:1880/api/urls/test?workerId=" + WorkerConstants.WORKER_ID + "&workerAssignmentsLimit=" + WorkerConstants.ASSIGNMENTS_LIMIT;
String json = null; String json = null;
try { try {
json = restTemplate.getForObject(url, String.class); json = restTemplate.getForObject(url, String.class);
@ -113,7 +113,7 @@ public class AssignmentHandler {
public static boolean postWorkerReport(Long assignmentRequestCounter) public static boolean postWorkerReport(Long assignmentRequestCounter)
{ {
RestTemplate restTemplate = new RestTemplateBuilder().build(); RestTemplate restTemplate = new RestTemplateBuilder().build();
String url = "http://localhost:8080/api/urls/addWorkerReport"; String url = "http://localhost:1880/api/urls/addWorkerReport";
try { try {
ResponseEntity<String> responseEntity = restTemplate.postForEntity(url, new WorkerReport(WorkerConstants.WORKER_ID, assignmentRequestCounter, urlReports), String.class); ResponseEntity<String> responseEntity = restTemplate.postForEntity(url, new WorkerReport(WorkerConstants.WORKER_ID, assignmentRequestCounter, urlReports), String.class);

View File

@ -10,7 +10,7 @@
#server.error.include-stacktrace=never #server.error.include-stacktrace=never
# HTTP CONFIGURATION # HTTP CONFIGURATION
server.port = 8081 server.port = 1881
# Server api path # Server api path
server.servlet.context-path=/api server.servlet.context-path=/api