From 4d908462611c3234f51fa0ec9f3570727a6f0720 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Mon, 22 May 2023 21:25:22 +0300 Subject: [PATCH] - In case the specified "controllerIP" is actually a domain-name, find its IP-address, so that a proper IP-to-IP comparison can be performed and the "securityChecks" can pass. - Increase the "read-timeout" when searching for the host's machine public-IP. - Update dependencies. - Code polishing. --- build.gradle | 4 +- .../components/AssignmentsHandler.java | 12 +++--- .../plugins/PublicationsRetrieverPlugin.java | 1 + .../controllers/GeneralController.java | 41 ++++++++++++------- .../openaire/urls_worker/util/UriBuilder.java | 4 +- 5 files changed, 36 insertions(+), 26 deletions(-) diff --git a/build.gradle b/build.gradle index 1eb46dd..8b7b218 100644 --- a/build.gradle +++ b/build.gradle @@ -1,5 +1,5 @@ plugins { - id 'org.springframework.boot' version '2.7.11' + id 'org.springframework.boot' version '2.7.12' id 'io.spring.dependency-management' version '1.1.0' id 'java' } @@ -42,7 +42,7 @@ dependencies { implementation group: 'com.google.guava', name: 'guava', version: '31.1-jre' implementation 'org.apache.commons:commons-compress:1.23.0' - implementation 'com.github.luben:zstd-jni:1.5.5-2' // Even though this is part of the above dependency, it is needed separately as well, specifically here, in the Worker. + implementation 'com.github.luben:zstd-jni:1.5.5-3' // Even though this is part of the above dependency, it is needed separately as well, specifically here, in the Worker. testImplementation 'org.springframework.security:spring-security-test' testImplementation "org.springframework.boot:spring-boot-starter-test" diff --git a/src/main/java/eu/openaire/urls_worker/components/AssignmentsHandler.java b/src/main/java/eu/openaire/urls_worker/components/AssignmentsHandler.java index 095f0bd..bee9601 100644 --- a/src/main/java/eu/openaire/urls_worker/components/AssignmentsHandler.java +++ b/src/main/java/eu/openaire/urls_worker/components/AssignmentsHandler.java @@ -90,8 +90,7 @@ public class AssignmentsHandler { public AssignmentsRequest requestAssignments() { - logger.info("Going to request " + this.maxAssignmentsLimitPerBatch + " assignments from the controller-server: " + requestUrl); - + logger.info("Going to request " + this.maxAssignmentsLimitPerBatch + " assignments from the Controller: " + requestUrl); AssignmentsRequest assignmentRequest = null; try { // Here, the HTTP-request is executed. assignmentRequest = restTemplateForRequest.getForObject(requestUrl, AssignmentsRequest.class); @@ -103,7 +102,6 @@ public class AssignmentsHandler { logger.error("Could not retrieve the assignments, as the provided Controller's url was malformed!\n" + iae.getMessage()); UrlsWorkerApplication.gentleAppShutdown(); } - //logger.debug(assignmentRequest.toString()); // DEBUG! return assignmentRequest; } @@ -141,13 +139,12 @@ public class AssignmentsHandler { assignments = getAssignmentsSpacedOutByDomain(assignments, assignmentsSize, false); // Iterate over the tasks and add each task in its own list depending on the DATASOURCE in order to decide which plugin to use later. - for ( Assignment assignment : assignments ) { // Add each task in its own HashSet. try { assignmentsForPlugins.put(assignment.getDatasource().getId(), assignment); } catch (NullPointerException npe) { - logger.warn("An NPE was thrown when splitting the assignments based on the datasource-types. The assignment was: " + assignment); // Do not use "assignment.toString()", it may cause an NPE. + logger.warn("An NPE was thrown when splitting the assignments based on the datasource-types. The problematic assignment was: " + assignment); // Do not use "assignment.toString()", it may cause an NPE. } } @@ -209,9 +206,10 @@ public class AssignmentsHandler { } // Note: Cannot call this method, here, retrospectively, as if it runs 100s of times, the memory-stack may break.. - // The scheduler will handle calling it every 15 mins, in case the Worker is available for work.. + // The scheduler will handle calling it repetitively, in case the Worker is available for work.. } + public static HashSet assignmentsNumsHandled = new HashSet<>(); @@ -260,7 +258,7 @@ public class AssignmentsHandler { // Note: It is possible that one or more full-texts-batches, are not sent to the Controller, or that the Controller failed to process them. // In that case, the related "attempt"-records will keep their "success" state, but the related "payload" records will not be inserted into the database. - // When all the id-urls are processed at least one time, the Controller will start returning all the "couldRetry" records without a related "payload"-record. + // When all the id-urls are processed at least one time, the Service will start reprocessing all the "couldRetry" records without a related "payload"-record. } diff --git a/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java b/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java index 2fba042..d79f51a 100644 --- a/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java +++ b/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java @@ -148,6 +148,7 @@ public class PublicationsRetrieverPlugin { if ( numFailedTasks == -1 ) { // The unknown exception is logged inside the above method. System.err.println("Invoking and/or executing the callableTasks failed with the exception written in the log files!"); UrlsWorkerApplication.gentleAppShutdown(); + return; // Not relly needed, but have it for code-readability. } if ( numFailedTasks > 0 ) diff --git a/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java b/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java index 8a02c29..3d74c46 100644 --- a/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java +++ b/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java @@ -12,6 +12,9 @@ import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import javax.servlet.http.HttpServletRequest; +import java.io.File; +import java.net.UnknownHostException; +import java.util.regex.Pattern; @RestController @@ -21,10 +24,27 @@ public class GeneralController { private static final Logger logger = LoggerFactory.getLogger(GeneralController.class); private final String controllerIp; + //private final String workerReportsDirPath; + private final String workerId; - public GeneralController(@Value("${info.controllerIp}") String controllerIp) { - this.controllerIp = controllerIp; + private static final Pattern DOMAIN_DETECTOR = Pattern.compile("^.*[a-zA-Z].*$"); + + public GeneralController(@Value("${info.controllerIp}") String controllerIp, /*@Value("${workerReportsDirPath}") String workerReportsDirPath,*/ @Value("${info.workerId}") String workerId) + { + if ( DOMAIN_DETECTOR.matcher(controllerIp).matches() ) { + try { + this.controllerIp = java.net.InetAddress.getByName(controllerIp).getHostAddress(); + } catch (UnknownHostException uhe) { + String errorMsg = "The domain given for the Controller (" + controllerIp + ") is unknown to the world! So its IP cannot be retrieved!"; + logger.error(errorMsg); + throw new RuntimeException(errorMsg); + } + } else + this.controllerIp = controllerIp; + + //this.workerReportsDirPath = workerReportsDirPath; + this.workerId = workerId; } @@ -40,18 +60,18 @@ public class GeneralController { @GetMapping("shutdownWorker") public ResponseEntity shutdownWorkerGracefully(HttpServletRequest request) { - String initMsg = "Received a \"shutdownWorker\" request."; + String initMsg = "Received a \"shutdownWorker\" request. "; ResponseEntity responseEntity = passSecurityChecks(request, initMsg); if ( responseEntity != null ) return responseEntity; String finalMsg = ""; if ( shouldShutdownWorker ) - finalMsg = " The worker has already received a \"shutdownWorker\" (which was not canceled afterwards)."; + finalMsg = "The worker has already received a \"shutdownWorker\" (which was not canceled afterwards)."; else shouldShutdownWorker = true; - finalMsg += " The worker will shutdown, after finishing current work."; + finalMsg += "The worker will shutdown, after finishing current work."; logger.info(initMsg + finalMsg); return ResponseEntity.ok().body(finalMsg + "\n"); } @@ -86,22 +106,13 @@ public class GeneralController { return ResponseEntity.internalServerError().build(); } String remoteAddr = request.getHeader("X-FORWARDED-FOR"); - if ( remoteAddr == null || "".equals(remoteAddr) ) + if ( remoteAddr == null || remoteAddr.isEmpty() ) remoteAddr = request.getRemoteAddr(); if ( ! (remoteAddr.equals("127.0.0.1") || remoteAddr.equals(UriBuilder.ip) || remoteAddr.equals(controllerIp)) ) { logger.error(initMsg + " The request came from another IP: " + remoteAddr + " | while this worker has the IP: " + UriBuilder.ip); return ResponseEntity.status(HttpStatus.FORBIDDEN).build(); } - - // TODO - If the Controller's ip is actually a domain name, then we have to calculate its IP-address one time - // Ad a regex to check that the controller's "IP" contains only numbers and dots, if not, then we have a DOMAIN. - // So use the following code to get the number-IP address: - /* - String address = java.net.InetAddress.getByName(controllerIp).getHostAddress(); - // TOdo - checks for potential "null" or exceptions in the above code, in case the domain resolution failed - * */ - return null; // The checks are passing. } diff --git a/src/main/java/eu/openaire/urls_worker/util/UriBuilder.java b/src/main/java/eu/openaire/urls_worker/util/UriBuilder.java index 0897110..83f31ce 100644 --- a/src/main/java/eu/openaire/urls_worker/util/UriBuilder.java +++ b/src/main/java/eu/openaire/urls_worker/util/UriBuilder.java @@ -58,8 +58,8 @@ public class UriBuilder { String urlString = "https://api.ipify.org/"; try { conn = (HttpURLConnection) new URL(urlString).openConnection(); - conn.setConnectTimeout(60_000); - conn.setReadTimeout(60_000); + conn.setConnectTimeout(60_000); // 1 minute + conn.setReadTimeout(120_000); // 2 minutes conn.setRequestMethod("GET"); conn.connect();