From 2ffb44a615cc9ef4e6f353db39d0ab54bab300dc Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Wed, 22 Sep 2021 16:36:48 +0300 Subject: [PATCH] - Update the "installAndRun.sh": --Ask the user to give the "workerId" and the "controllerBaseUrl". --Make sure the "libs" directory is created, if not exists. --Make sure the "unzip" package is installed. - Change the data-type of the "UrlReport.status" to be "enum StatusType", in order to increase consistency and comparability. - Update the guidelines in the README. --- README.md | 5 +- installAndRun.sh | 20 ++++++- .../urls_worker/UrlsWorkerApplication.java | 52 ++++++++++++------- .../controllers/GeneralController.java | 3 +- .../urls_worker/models/UrlReport.java | 15 ++++-- .../plugins/PublicationsRetrieverPlugin.java | 7 +-- .../urls_worker/util/AssignmentHandler.java | 4 +- .../urls_worker/util/WorkerConstants.java | 1 - 8 files changed, 71 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 6e8cc73..b59a2ff 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,9 @@ It posts the results to the controller, which in turn, puts them in a database.< To install and run the application: - Run ```git clone``` and then ```cd UrlsWorker```. -- Create the file ```controllerBaseUrl.txt``` which contains just one line with the controller's base api-url, for example: ```http://IP:PORT/api/```. -- Execute the ```installAndRun.sh``` script.
+- Create the file ```S3_minIO_credentials.txt``` , which contains just one line with the ___S3_url___, ___S3_username___, ___S3_password___, ___S3_server_region___ and the ___S3_bucket___, separated by a _comma_ ```,```. +- [Optional] Create the file ```inputData.txt``` , which contains just one line with the ___workerId___ and the ___controller's base api-url___, seperated by a _comma_ ```,``` . For example: ```worker_1,http://IP:PORT/api/```. +- Execute the ```installAndRun.sh``` script. In case the above file (_inputData.txt_) does not exist, it will request the current ___worker's ID___ and the ___Controller's Url___, and it will create the _inputData.txt_ file.
That script, installs the [PublicationsRetriever](https://github.com/LSmyrnaios/PublicationsRetriever), as a library and then compiles and runs the whole Application.
If you want to just run the app, then run the script with the argument "1": ```./installAndRun.sh 1```.
diff --git a/installAndRun.sh b/installAndRun.sh index 5195c82..f71cbd0 100755 --- a/installAndRun.sh +++ b/installAndRun.sh @@ -8,11 +8,27 @@ elif [[ $# -gt 1 ]]; then echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh "; exit 1 fi +# Check of the "inputData.txt" file exist, if not, ask to fill it. +inputDataFile="inputData.txt" + +if [[ ! -f $inputDataFile ]]; then + echo -e "The file \"$inputDataFile\" does not exist. Going to create it..\n" + + echo "Give the ID of this worker:" + read -r workerId + + echo -e "\nGive the baseUrl of the controller (e.g.: http://IP:PORT/api/):" + read -r controllerBaseUrl + + touch $inputDataFile + echo "$workerId,$controllerBaseUrl" >> $inputDataFile +fi + gradleVersion="7.2" if [[ justInstall -eq 0 ]]; then - cd libs || exit + cd libs || (mkdir libs && (cd libs || exit)) git clone https://github.com/LSmyrnaios/PublicationsRetriever.git # We assume there is no previously source-code here, if so, it will be overwritten. # Do not need to perform a string-replace in "build.gradle", since it automatically gets all ".jar" files. @@ -35,7 +51,7 @@ if [[ justInstall -eq 0 ]]; then wget https://services.gradle.org/distributions/gradle-${gradleVersion}-bin.zip sudo mkdir /opt/gradle - sudo unzip -d /opt/gradle gradle-${gradleVersion}-bin.zip + sudo apt install -y unzip && sudo unzip -d /opt/gradle gradle-${gradleVersion}-bin.zip #ls /opt/gradle/gradle-${gradleVersion} # For debugging installation export PATH=$PATH:/opt/gradle/gradle-${gradleVersion}/bin diff --git a/src/main/java/eu/openaire/urls_worker/UrlsWorkerApplication.java b/src/main/java/eu/openaire/urls_worker/UrlsWorkerApplication.java index d2ce51d..18bf404 100644 --- a/src/main/java/eu/openaire/urls_worker/UrlsWorkerApplication.java +++ b/src/main/java/eu/openaire/urls_worker/UrlsWorkerApplication.java @@ -20,16 +20,19 @@ public class UrlsWorkerApplication { private static final Logger logger = LoggerFactory.getLogger(UrlsWorkerApplication.class); - private static final String controllerBaseUrlFilePath = FileUtils.workingDir + "controllerBaseUrl.txt"; + private static final String inputDataFilePath = FileUtils.workingDir + "inputData.txt"; + public static String workerId = null; public static String controllerBaseUrl = null; // BaseUrl template: "http://IP:PORT/api/" + public static void main(String[] args) { - setControllerBaseUrl(); // This may cause the Server to terminate early, in case the controllerBaseUrl cannot be found. + setInputData(); // This may cause the Server to terminate early, in case the workerId or the controllerBaseUrl cannot be found. SpringApplication.run(UrlsWorkerApplication.class, args); } + @PreDestroy public static void preDestroy() { @@ -48,39 +51,48 @@ public class UrlsWorkerApplication { } - private static void setControllerBaseUrl() + private static void setInputData() { - // Take the controllerBaseUrl from the file. + // Take the workerId and the controllerBaseUrl from the file. Scanner myReader = null; try { - File controllerBaseUrlFile = new File(controllerBaseUrlFilePath); - if ( !controllerBaseUrlFile.exists() ) { - String errorMsg = "controllerBaseUrlFile \"" + controllerBaseUrlFilePath + "\" does not exists!"; + File inputDataFile = new File(inputDataFilePath); + if ( !inputDataFile.exists() ) { + String errorMsg = "controllerBaseUrlFile \"" + inputDataFilePath + "\" does not exists!"; logger.error(errorMsg); System.err.println(errorMsg); System.exit(60); } - - myReader = new Scanner(controllerBaseUrlFile); - if ( !myReader.hasNextLine() ) { - String errorMsg = "The controllerBaseUrlFile is empty! No WorkerReports can be sent from this worker! Exiting.."; - logger.error(errorMsg); - System.err.println(errorMsg); - System.exit(61); + myReader = new Scanner(inputDataFile); + if ( myReader.hasNextLine() ) { + String[] data = myReader.nextLine().split(","); + if ( data.length < 2 ) { + String errorMsg = "Not all data were retrieved from file \"" + inputDataFilePath + "\"!"; + logger.error(errorMsg); + System.err.println(errorMsg); + System.exit(61); + } + workerId = data[0].trim(); + controllerBaseUrl = data[1].trim(); + if ( !controllerBaseUrl.endsWith("/") ) + controllerBaseUrl += "/"; // Make sure the whole urls will not break later. } - controllerBaseUrl = myReader.nextLine().trim(); - if ( !controllerBaseUrl.endsWith("/") ) - controllerBaseUrl += "/"; // Make sure the whole urls will not break later. + if ( (workerId == null) || (controllerBaseUrl == null) ) { + String errorMsg = "No \"workerId\" or/and \"controllerBaseUrl\" could be retrieved from the file: " + inputDataFilePath; + logger.error(errorMsg); + System.err.println(errorMsg); + System.exit(62); + } - logger.info("The controllerBaseUrl is: " + controllerBaseUrl); + logger.info("workerId: " + workerId + ", controllerBaseUrl: " + controllerBaseUrl); // It's safe and helpful to show them in the logs. } catch (Exception e) { - String errorMsg = "An error prevented the retrieval of the controllerBaseUrl from the file: " + controllerBaseUrlFilePath + "\n" + e.getMessage(); + String errorMsg = "An error prevented the retrieval of the workerId and the controllerBaseUrl from the file: " + inputDataFilePath + "\n" + e.getMessage(); logger.error(errorMsg); System.err.println(errorMsg); e.printStackTrace(); - System.exit(62); + System.exit(63); } finally { if ( myReader != null ) myReader.close(); diff --git a/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java b/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java index d6b0416..f4eaca2 100644 --- a/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java +++ b/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java @@ -1,5 +1,6 @@ package eu.openaire.urls_worker.controllers; +import eu.openaire.urls_worker.UrlsWorkerApplication; import eu.openaire.urls_worker.payloads.responces.WorkerResponse; import eu.openaire.urls_worker.util.AssignmentHandler; import eu.openaire.urls_worker.util.WorkerConstants; @@ -35,7 +36,7 @@ public class GeneralController { if ( AssignmentHandler.isAvailableForWork ) { logger.info("The worker is available for an assignment."); - return ResponseEntity.status(200).body(new WorkerResponse(WorkerConstants.WORKER_ID, WorkerConstants.ASSIGNMENTS_LIMIT)); + return ResponseEntity.status(200).body(new WorkerResponse(UrlsWorkerApplication.workerId, WorkerConstants.ASSIGNMENTS_LIMIT)); } else { logger.info("The worker is busy with another assignment."); diff --git a/src/main/java/eu/openaire/urls_worker/models/UrlReport.java b/src/main/java/eu/openaire/urls_worker/models/UrlReport.java index abe7a1e..7fbded6 100644 --- a/src/main/java/eu/openaire/urls_worker/models/UrlReport.java +++ b/src/main/java/eu/openaire/urls_worker/models/UrlReport.java @@ -13,8 +13,12 @@ import com.fasterxml.jackson.annotation.JsonPropertyOrder; }) public class UrlReport { + public enum StatusType { + accessible, non_accessible + } + @JsonProperty("status") - private String status; + private StatusType status; @JsonProperty("payload") private Payload payload; @@ -23,18 +27,18 @@ public class UrlReport { private Error error; - public UrlReport(String status, Payload payload, Error error) { + public UrlReport(StatusType status, Payload payload, Error error) { this.status = status; this.payload = payload; this.error = error; } - public String getStatus() { + public StatusType getStatus() { return this.status; } - public void setStatus(String status) { + public void setStatus(StatusType status) { this.status = status; } @@ -57,9 +61,10 @@ public class UrlReport { @Override public String toString() { return "UrlReport{" + - "status='" + status + '\'' + + "status=" + status + ", payload=" + payload + ", error=" + error + '}'; } + } diff --git a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java index a9591ee..1531ea0 100644 --- a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java +++ b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java @@ -139,13 +139,14 @@ public class PublicationsRetrieverPlugin { { for ( DataToBeLogged data : FileUtils.dataToBeLoggedList ) { - String status = null, fileLocation = null, comment = data.getComment(), mimeType = null, hash = data.getHash(); + UrlReport.StatusType status = null; + String fileLocation = null, comment = data.getComment(), mimeType = null, hash = data.getHash(); Long size = data.getSize(); Error error = null; if ( data.getWasDocumentOrDatasetAccessible().equals("true") ) { - status = "accessible"; + status = UrlReport.StatusType.accessible; if ( comment.contains(UrlUtils.alreadyDownloadedByIDMessage) ) { // The file of this docUrl was already downloaded by another docUrl. String previousId = comment.substring(UrlUtils.alreadyDownloadedByIDMessage.length() +1); @@ -171,7 +172,7 @@ public class PublicationsRetrieverPlugin { error = new Error(null, null); // We do not want to send a "null" object, since it just adds more complicated handling in the controller.. } else { - status = "non-accessible"; + status = UrlReport.StatusType.non_accessible; if ( data.getCouldRetry().equals("true") ) error = new Error(Error.ErrorType.couldRetry, comment); else diff --git a/src/main/java/eu/openaire/urls_worker/util/AssignmentHandler.java b/src/main/java/eu/openaire/urls_worker/util/AssignmentHandler.java index f6ebe84..2dcd302 100644 --- a/src/main/java/eu/openaire/urls_worker/util/AssignmentHandler.java +++ b/src/main/java/eu/openaire/urls_worker/util/AssignmentHandler.java @@ -32,7 +32,7 @@ public class AssignmentHandler { public static AssignmentRequest requestAssignments() { - String requestUrl = UrlsWorkerApplication.controllerBaseUrl + "urls" + (askForTest ? "/test" : "") + "?workerId=" + WorkerConstants.WORKER_ID + "&workerAssignmentsLimit=" + WorkerConstants.ASSIGNMENTS_LIMIT; + String requestUrl = UrlsWorkerApplication.controllerBaseUrl + "urls" + (askForTest ? "/test" : "") + "?workerId=" + UrlsWorkerApplication.workerId + "&workerAssignmentsLimit=" + WorkerConstants.ASSIGNMENTS_LIMIT; logger.info("Going to request assignments from the controller-server: " + requestUrl); String json = null; @@ -129,7 +129,7 @@ public class AssignmentHandler { logger.info("Going to post the WorkerReport to the controller-server: " + postUrl); try { - ResponseEntity responseEntity = new RestTemplateBuilder().build().postForEntity(postUrl, new WorkerReport(WorkerConstants.WORKER_ID, assignmentRequestCounter, urlReports), String.class); + ResponseEntity responseEntity = new RestTemplateBuilder().build().postForEntity(postUrl, new WorkerReport(UrlsWorkerApplication.workerId, assignmentRequestCounter, urlReports), String.class); int responseCode = responseEntity.getStatusCode().value(); if ( responseCode != HttpStatus.OK.value() ) { logger.error("Connection problem with the submission of the WorkerReport of assignment_" + assignmentRequestCounter + " to the Controller. Error-code was: " + responseCode); diff --git a/src/main/java/eu/openaire/urls_worker/util/WorkerConstants.java b/src/main/java/eu/openaire/urls_worker/util/WorkerConstants.java index cc1728f..4093ca3 100644 --- a/src/main/java/eu/openaire/urls_worker/util/WorkerConstants.java +++ b/src/main/java/eu/openaire/urls_worker/util/WorkerConstants.java @@ -2,7 +2,6 @@ package eu.openaire.urls_worker.util; public interface WorkerConstants { - String WORKER_ID = "worker_1"; // This should be different for every deployment of a Worker. int ASSIGNMENTS_LIMIT = 10000; }