From fd5b56e3c6ac688704e8974abd959f7ffa20042d Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Tue, 7 Dec 2021 00:52:40 +0200 Subject: [PATCH] - Allow the user to set the "maxAssignmentsLimitPerBatch" value. - Set increased lower and upper limits for the Java Heap Size. - Update the "ServerBaseURL" to the Public IP Address of the machine which is running the app. - Improve two log-messages. --- build.gradle | 5 +++ installAndRun.sh | 5 ++- .../urls_worker/UrlsWorkerApplication.java | 23 +++++++++---- .../controllers/FullTextsController.java | 4 +-- .../controllers/GeneralController.java | 2 +- .../plugins/PublicationsRetrieverPlugin.java | 4 +-- .../urls_worker/util/AssignmentsHandler.java | 16 ++++++--- .../openaire/urls_worker/util/UriBuilder.java | 34 ++++++++++++++++--- 8 files changed, 73 insertions(+), 20 deletions(-) diff --git a/build.gradle b/build.gradle index a75ca9e..76d0ca6 100644 --- a/build.gradle +++ b/build.gradle @@ -48,6 +48,11 @@ configurations { all*.exclude group: 'ch.qos.logback' } +// Set increased lower and upper limits for the java-execution. +tasks.withType(JavaExec) { + jvmArgs = ['-Xms512m', '-Xmx8g'] +} + test { useJUnitPlatform() } diff --git a/installAndRun.sh b/installAndRun.sh index 4966cca..60ffd25 100755 --- a/installAndRun.sh +++ b/installAndRun.sh @@ -17,11 +17,14 @@ if [[ ! -f $inputDataFile ]]; then echo "Give the ID of this worker:" read -r workerId + echo -e "\nGive the max-assignments-limit-per-batch for the Worker to handle: " + read -r maxAssignmentsLimitPerBatch + echo -e "\nGive the baseUrl of the controller (e.g.: http://IP:PORT/api/):" read -r controllerBaseUrl touch $inputDataFile - echo "$workerId,$controllerBaseUrl" >> $inputDataFile + echo "$workerId,$maxAssignmentsLimitPerBatch,$controllerBaseUrl" >> $inputDataFile echo -e "\n\n" fi diff --git a/src/main/java/eu/openaire/urls_worker/UrlsWorkerApplication.java b/src/main/java/eu/openaire/urls_worker/UrlsWorkerApplication.java index 7bee1cc..e6f12c3 100644 --- a/src/main/java/eu/openaire/urls_worker/UrlsWorkerApplication.java +++ b/src/main/java/eu/openaire/urls_worker/UrlsWorkerApplication.java @@ -3,7 +3,9 @@ package eu.openaire.urls_worker; import eu.openaire.publications_retriever.PublicationsRetriever; import eu.openaire.publications_retriever.util.file.FileUtils; import eu.openaire.urls_worker.plugins.PublicationsRetrieverPlugin; +import eu.openaire.urls_worker.util.AssignmentsHandler; import eu.openaire.urls_worker.util.UriBuilder; +import eu.openaire.urls_worker.util.WorkerConstants; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.boot.CommandLineRunner; @@ -34,6 +36,7 @@ public class UrlsWorkerApplication { private static final String inputDataFilePath = FileUtils.workingDir + "inputData.txt"; public static String workerId = null; + public static int maxAssignmentsLimitPerBatch = 0; public static String controllerBaseUrl = null; // BaseUrl template: "http://IP:PORT/api/" @@ -41,6 +44,7 @@ public class UrlsWorkerApplication { setInputData(); // This may cause the Server to terminate early, in case the workerId or the controllerBaseUrl cannot be found. new PublicationsRetrieverPlugin(); + new AssignmentsHandler(); SpringApplication.run(UrlsWorkerApplication.class, args); @@ -109,26 +113,33 @@ public class UrlsWorkerApplication { myReader = new Scanner(inputDataFile); if ( myReader.hasNextLine() ) { String[] data = myReader.nextLine().split(","); - if ( data.length < 2 ) { + if ( data.length < 3 ) { String errorMsg = "Not all data were retrieved from file \"" + inputDataFilePath + "\"!"; logger.error(errorMsg); System.err.println(errorMsg); System.exit(61); } workerId = data[0].trim(); - controllerBaseUrl = data[1].trim(); + String maxAssignmentsLimitStr = data[1].trim(); + try { + maxAssignmentsLimitPerBatch = Integer.parseInt(maxAssignmentsLimitStr); + } catch (NumberFormatException nfe) { + logger.warn("The given \"maxAssignmentsLimitPerBatch\" (" + maxAssignmentsLimitStr + ") was not a number! Will use the default one: " + WorkerConstants.ASSIGNMENTS_LIMIT); + maxAssignmentsLimitPerBatch = WorkerConstants.ASSIGNMENTS_LIMIT; + } + controllerBaseUrl = data[2].trim(); if ( !controllerBaseUrl.endsWith("/") ) - controllerBaseUrl += "/"; // Make sure the whole urls will not break later. + controllerBaseUrl += "/"; // Make sure the other urls will not break later. } - if ( (workerId == null) || (controllerBaseUrl == null) ) { - String errorMsg = "No \"workerId\" or/and \"controllerBaseUrl\" could be retrieved from the file: " + inputDataFilePath; + if ( (workerId == null) || (maxAssignmentsLimitPerBatch == 0) || (controllerBaseUrl == null) ) { + String errorMsg = "No \"workerId\" or/and \"maxAssignmentsLimitPerBatch\" or/and \"controllerBaseUrl\" could be retrieved from the file: " + inputDataFilePath; logger.error(errorMsg); System.err.println(errorMsg); System.exit(62); } - logger.info("workerId: " + workerId + ", controllerBaseUrl: " + controllerBaseUrl); // It's safe and helpful to show them in the logs. + logger.info("workerId: " + workerId + ", maxAssignmentsLimitPerBatch: " + maxAssignmentsLimitPerBatch + ", controllerBaseUrl: " + controllerBaseUrl); // It's safe and helpful to show them in the logs. } catch (Exception e) { String errorMsg = "An error prevented the retrieval of the workerId and the controllerBaseUrl from the file: " + inputDataFilePath + "\n" + e.getMessage(); diff --git a/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java b/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java index b7930b9..03ea547 100644 --- a/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java +++ b/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java @@ -50,7 +50,7 @@ public class FullTextsController { return ResponseEntity.badRequest().body(errorMsg); } - logger.info("Received a \"getMultipleFullTexts\" request for returning a zip-file containing " + fileNamesListNum + " full-texts, from assignments_" + assignmentsCounter + ", for batch_" + zipBatchCounter); + logger.info("Received a \"getMultipleFullTexts\" request for returning a zip-file containing " + fileNamesListNum + " full-texts, from assignments_" + assignmentsCounter + ", for batch_" + zipBatchCounter + " (out of " + totalZipBatches + ")."); String currentAssignmentsBaseFullTextsPath = assignmentsBaseDir + "assignments_" + assignmentsCounter + "_fullTexts" + File.separator; @@ -82,7 +82,7 @@ public class FullTextsController { // If this is the last batch for this assignments-count, then make sure it is deleted in the next scheduled delete-operation. if ( zipBatchCounter == totalZipBatches ) { assignmentsNumsHandledAndLocallyDeleted.put(assignmentsCounter, false); - logger.debug("Will return the last batch of Assignments_" + assignmentsCounter + " to the Controller and these assignments will be deleted later."); + logger.debug("Will return the last batch (" + zipBatchCounter + ") of Assignments_" + assignmentsCounter + " to the Controller and these assignments will be deleted later."); } String contentType = request.getServletContext().getMimeType(zipFileFullPath); diff --git a/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java b/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java index 4112c73..e600191 100644 --- a/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java +++ b/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java @@ -40,7 +40,7 @@ public class GeneralController { if ( AssignmentsHandler.isAvailableForWork ) { logger.info("The worker is available for an assignment."); - return ResponseEntity.status(200).body(new WorkerResponse(UrlsWorkerApplication.workerId, WorkerConstants.ASSIGNMENTS_LIMIT)); + return ResponseEntity.status(200).body(new WorkerResponse(UrlsWorkerApplication.workerId, UrlsWorkerApplication.maxAssignmentsLimitPerBatch)); } else { logger.info("The worker is busy with another assignment."); return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE).build(); diff --git a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java index 0a881a6..9df3ee7 100644 --- a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java +++ b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java @@ -9,13 +9,13 @@ import eu.openaire.publications_retriever.util.http.HttpConnUtils; import eu.openaire.publications_retriever.util.url.DataToBeLogged; import eu.openaire.publications_retriever.util.url.LoaderAndChecker; import eu.openaire.publications_retriever.util.url.UrlUtils; +import eu.openaire.urls_worker.UrlsWorkerApplication; import eu.openaire.urls_worker.models.Assignment; import eu.openaire.urls_worker.models.Error; import eu.openaire.urls_worker.models.Payload; import eu.openaire.urls_worker.models.UrlReport; import eu.openaire.urls_worker.services.FileStorageService; import eu.openaire.urls_worker.util.AssignmentsHandler; -import eu.openaire.urls_worker.util.WorkerConstants; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,7 +42,7 @@ public class PublicationsRetrieverPlugin { FileUtils.shouldDownloadDocFiles = true; FileUtils.docFileNameType = FileUtils.DocFileNameType.idName; PublicationsRetriever.targetUrlType = "docUrl"; - FileUtils.jsonBatchSize = WorkerConstants.ASSIGNMENTS_LIMIT; + FileUtils.jsonBatchSize = UrlsWorkerApplication.maxAssignmentsLimitPerBatch; assignmentsBasePath = FileStorageService.assignmentsLocation.toString(); if ( !assignmentsBasePath.endsWith(File.separator) ) diff --git a/src/main/java/eu/openaire/urls_worker/util/AssignmentsHandler.java b/src/main/java/eu/openaire/urls_worker/util/AssignmentsHandler.java index f35b54a..9693f68 100644 --- a/src/main/java/eu/openaire/urls_worker/util/AssignmentsHandler.java +++ b/src/main/java/eu/openaire/urls_worker/util/AssignmentsHandler.java @@ -25,10 +25,10 @@ public class AssignmentsHandler { private static final Logger logger = LoggerFactory.getLogger(AssignmentsHandler.class); public static boolean isAvailableForWork = true; - public static final List urlReports = new ArrayList<>(WorkerConstants.ASSIGNMENTS_LIMIT); + public static List urlReports = null; private static final int expectedDatasourcesPerRequest = 1400; // Per 10_000 assignments. - private static final int expectedAssignmentsPerDatasource = (WorkerConstants.ASSIGNMENTS_LIMIT / expectedDatasourcesPerRequest); - public static final Multimap assignmentsForPlugins = HashMultimap.create(expectedDatasourcesPerRequest, expectedAssignmentsPerDatasource); + private static int expectedAssignmentsPerDatasource = 0; + public static Multimap assignmentsForPlugins = null; private static final boolean askForTest = false; // Enable this only for testing. private static final Duration requestConnectTimeoutDuration = Duration.ofMinutes(1); // 1 minute. @@ -37,9 +37,17 @@ public class AssignmentsHandler { public static final RestTemplate restTemplate = new RestTemplateBuilder().setConnectTimeout(requestConnectTimeoutDuration).setReadTimeout(requestReadTimeoutDuration).build(); + + public AssignmentsHandler() + { + urlReports = new ArrayList<>(UrlsWorkerApplication.maxAssignmentsLimitPerBatch); + expectedAssignmentsPerDatasource = (UrlsWorkerApplication.maxAssignmentsLimitPerBatch / expectedDatasourcesPerRequest); + assignmentsForPlugins = HashMultimap.create(expectedDatasourcesPerRequest, expectedAssignmentsPerDatasource); + } + public static AssignmentsRequest requestAssignments() { - String requestUrl = UrlsWorkerApplication.controllerBaseUrl + "urls" + (askForTest ? "/test" : "") + "?workerId=" + UrlsWorkerApplication.workerId + "&workerAssignmentsLimit=" + WorkerConstants.ASSIGNMENTS_LIMIT; + String requestUrl = UrlsWorkerApplication.controllerBaseUrl + "urls" + (askForTest ? "/test" : "") + "?workerId=" + UrlsWorkerApplication.workerId + "&workerAssignmentsLimit=" + UrlsWorkerApplication.maxAssignmentsLimitPerBatch; logger.info("Going to request assignments from the controller-server: " + requestUrl); AssignmentsRequest assignmentRequest = null; diff --git a/src/main/java/eu/openaire/urls_worker/util/UriBuilder.java b/src/main/java/eu/openaire/urls_worker/util/UriBuilder.java index 35e848f..004ef96 100644 --- a/src/main/java/eu/openaire/urls_worker/util/UriBuilder.java +++ b/src/main/java/eu/openaire/urls_worker/util/UriBuilder.java @@ -5,7 +5,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.core.env.Environment; +import java.io.BufferedReader; +import java.io.InputStreamReader; import java.net.InetAddress; +import java.net.MalformedURLException; +import java.net.URL; public class UriBuilder { @@ -25,7 +29,10 @@ public class UriBuilder { baseUrl += "://"; - String hostName = InetAddress.getLoopbackAddress().getHostName(); // Non-null. + String hostName = getPublicIP(); + if ( hostName == null ) + hostName = InetAddress.getLoopbackAddress().getHostName(); // Non-null. + baseUrl += hostName; String serverPort = environment.getProperty("server.port"); @@ -36,11 +43,11 @@ public class UriBuilder { baseUrl += ":" + serverPort; String baseInternalPath = environment.getProperty("server.servlet.context-path"); - if (baseInternalPath != null) { - if (!baseInternalPath.startsWith("/")) + if ( baseInternalPath != null ) { + if ( !baseInternalPath.startsWith("/") ) baseUrl += "/"; baseUrl += baseInternalPath; - if (!baseInternalPath.endsWith("/")) + if ( !baseInternalPath.endsWith("/") ) baseUrl += "/"; } else { logger.warn("No property \"server.servlet.context-path\" was found in \"application.properties\"!"); // Yes it's expected. @@ -50,6 +57,25 @@ public class UriBuilder { logger.debug("ServerBaseURL: " + baseUrl); } + private static String getPublicIP() + { + String publicIpAddress = ""; + URL url_name; + try { + url_name = new URL("https://api.ipify.org/"); + } catch (MalformedURLException mue) { + logger.warn(mue.getMessage()); + return null; + } + try ( BufferedReader bf = new BufferedReader(new InputStreamReader(url_name.openStream()))) { + publicIpAddress = bf.readLine().trim(); + } catch (Exception e) { + logger.warn("Cannot get the publicIP address for this machine!", e); + return null; + } + return publicIpAddress; + } + public static String getBaseUrl() { return baseUrl; }