- Update the "installAndRun.sh":

--Ask the user to give the "workerId" and the "controllerBaseUrl".
--Make sure the "libs" directory is created, if not exists.
--Make sure the "unzip" package is installed.
- Change the data-type of the "UrlReport.status" to be "enum StatusType", in order to increase consistency and comparability.
- Update the guidelines in the README.
This commit is contained in:
Lampros Smyrnaios 2021-09-22 16:36:48 +03:00
parent 61597d1627
commit 2ffb44a615
8 changed files with 71 additions and 36 deletions

View File

@ -7,8 +7,9 @@ It posts the results to the controller, which in turn, puts them in a database.<
To install and run the application: To install and run the application:
- Run ```git clone``` and then ```cd UrlsWorker```. - Run ```git clone``` and then ```cd UrlsWorker```.
- Create the file ```controllerBaseUrl.txt``` which contains just one line with the controller's base api-url, for example: ```http://IP:PORT/api/```. - Create the file ```S3_minIO_credentials.txt``` , which contains just one line with the ___S3_url___, ___S3_username___, ___S3_password___, ___S3_server_region___ and the ___S3_bucket___, separated by a _comma_ ```,```.
- Execute the ```installAndRun.sh``` script.<br> - [Optional] Create the file ```inputData.txt``` , which contains just one line with the ___workerId___ and the ___controller's base api-url___, seperated by a _comma_ ```,``` . For example: ```worker_1,http://IP:PORT/api/```.
- Execute the ```installAndRun.sh``` script. In case the above file (_inputData.txt_) does not exist, it will request the current ___worker's ID___ and the ___Controller's Url___, and it will create the _inputData.txt_ file.<br>
That script, installs the [PublicationsRetriever](https://github.com/LSmyrnaios/PublicationsRetriever), as a library and then compiles and runs the whole Application.<br> That script, installs the [PublicationsRetriever](https://github.com/LSmyrnaios/PublicationsRetriever), as a library and then compiles and runs the whole Application.<br>
If you want to just run the app, then run the script with the argument "1": ```./installAndRun.sh 1```.<br> If you want to just run the app, then run the script with the argument "1": ```./installAndRun.sh 1```.<br>

View File

@ -8,11 +8,27 @@ elif [[ $# -gt 1 ]]; then
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh <justInstall: 0 | 1>"; exit 1 echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh <justInstall: 0 | 1>"; exit 1
fi fi
# Check of the "inputData.txt" file exist, if not, ask to fill it.
inputDataFile="inputData.txt"
if [[ ! -f $inputDataFile ]]; then
echo -e "The file \"$inputDataFile\" does not exist. Going to create it..\n"
echo "Give the ID of this worker:"
read -r workerId
echo -e "\nGive the baseUrl of the controller (e.g.: http://IP:PORT/api/):"
read -r controllerBaseUrl
touch $inputDataFile
echo "$workerId,$controllerBaseUrl" >> $inputDataFile
fi
gradleVersion="7.2" gradleVersion="7.2"
if [[ justInstall -eq 0 ]]; then if [[ justInstall -eq 0 ]]; then
cd libs || exit cd libs || (mkdir libs && (cd libs || exit))
git clone https://github.com/LSmyrnaios/PublicationsRetriever.git # We assume there is no previously source-code here, if so, it will be overwritten. git clone https://github.com/LSmyrnaios/PublicationsRetriever.git # We assume there is no previously source-code here, if so, it will be overwritten.
# Do not need to perform a string-replace in "build.gradle", since it automatically gets all ".jar" files. # Do not need to perform a string-replace in "build.gradle", since it automatically gets all ".jar" files.
@ -35,7 +51,7 @@ if [[ justInstall -eq 0 ]]; then
wget https://services.gradle.org/distributions/gradle-${gradleVersion}-bin.zip wget https://services.gradle.org/distributions/gradle-${gradleVersion}-bin.zip
sudo mkdir /opt/gradle sudo mkdir /opt/gradle
sudo unzip -d /opt/gradle gradle-${gradleVersion}-bin.zip sudo apt install -y unzip && sudo unzip -d /opt/gradle gradle-${gradleVersion}-bin.zip
#ls /opt/gradle/gradle-${gradleVersion} # For debugging installation #ls /opt/gradle/gradle-${gradleVersion} # For debugging installation
export PATH=$PATH:/opt/gradle/gradle-${gradleVersion}/bin export PATH=$PATH:/opt/gradle/gradle-${gradleVersion}/bin

View File

@ -20,16 +20,19 @@ public class UrlsWorkerApplication {
private static final Logger logger = LoggerFactory.getLogger(UrlsWorkerApplication.class); private static final Logger logger = LoggerFactory.getLogger(UrlsWorkerApplication.class);
private static final String controllerBaseUrlFilePath = FileUtils.workingDir + "controllerBaseUrl.txt"; private static final String inputDataFilePath = FileUtils.workingDir + "inputData.txt";
public static String workerId = null;
public static String controllerBaseUrl = null; // BaseUrl template: "http://IP:PORT/api/" public static String controllerBaseUrl = null; // BaseUrl template: "http://IP:PORT/api/"
public static void main(String[] args) { public static void main(String[] args) {
setControllerBaseUrl(); // This may cause the Server to terminate early, in case the controllerBaseUrl cannot be found. setInputData(); // This may cause the Server to terminate early, in case the workerId or the controllerBaseUrl cannot be found.
SpringApplication.run(UrlsWorkerApplication.class, args); SpringApplication.run(UrlsWorkerApplication.class, args);
} }
@PreDestroy @PreDestroy
public static void preDestroy() public static void preDestroy()
{ {
@ -48,39 +51,48 @@ public class UrlsWorkerApplication {
} }
private static void setControllerBaseUrl() private static void setInputData()
{ {
// Take the controllerBaseUrl from the file. // Take the workerId and the controllerBaseUrl from the file.
Scanner myReader = null; Scanner myReader = null;
try { try {
File controllerBaseUrlFile = new File(controllerBaseUrlFilePath); File inputDataFile = new File(inputDataFilePath);
if ( !controllerBaseUrlFile.exists() ) { if ( !inputDataFile.exists() ) {
String errorMsg = "controllerBaseUrlFile \"" + controllerBaseUrlFilePath + "\" does not exists!"; String errorMsg = "controllerBaseUrlFile \"" + inputDataFilePath + "\" does not exists!";
logger.error(errorMsg); logger.error(errorMsg);
System.err.println(errorMsg); System.err.println(errorMsg);
System.exit(60); System.exit(60);
} }
myReader = new Scanner(inputDataFile);
myReader = new Scanner(controllerBaseUrlFile); if ( myReader.hasNextLine() ) {
if ( !myReader.hasNextLine() ) { String[] data = myReader.nextLine().split(",");
String errorMsg = "The controllerBaseUrlFile is empty! No WorkerReports can be sent from this worker! Exiting.."; if ( data.length < 2 ) {
logger.error(errorMsg); String errorMsg = "Not all data were retrieved from file \"" + inputDataFilePath + "\"!";
System.err.println(errorMsg); logger.error(errorMsg);
System.exit(61); System.err.println(errorMsg);
System.exit(61);
}
workerId = data[0].trim();
controllerBaseUrl = data[1].trim();
if ( !controllerBaseUrl.endsWith("/") )
controllerBaseUrl += "/"; // Make sure the whole urls will not break later.
} }
controllerBaseUrl = myReader.nextLine().trim(); if ( (workerId == null) || (controllerBaseUrl == null) ) {
if ( !controllerBaseUrl.endsWith("/") ) String errorMsg = "No \"workerId\" or/and \"controllerBaseUrl\" could be retrieved from the file: " + inputDataFilePath;
controllerBaseUrl += "/"; // Make sure the whole urls will not break later. logger.error(errorMsg);
System.err.println(errorMsg);
System.exit(62);
}
logger.info("The controllerBaseUrl is: " + controllerBaseUrl); logger.info("workerId: " + workerId + ", controllerBaseUrl: " + controllerBaseUrl); // It's safe and helpful to show them in the logs.
} catch (Exception e) { } catch (Exception e) {
String errorMsg = "An error prevented the retrieval of the controllerBaseUrl from the file: " + controllerBaseUrlFilePath + "\n" + e.getMessage(); String errorMsg = "An error prevented the retrieval of the workerId and the controllerBaseUrl from the file: " + inputDataFilePath + "\n" + e.getMessage();
logger.error(errorMsg); logger.error(errorMsg);
System.err.println(errorMsg); System.err.println(errorMsg);
e.printStackTrace(); e.printStackTrace();
System.exit(62); System.exit(63);
} finally { } finally {
if ( myReader != null ) if ( myReader != null )
myReader.close(); myReader.close();

View File

@ -1,5 +1,6 @@
package eu.openaire.urls_worker.controllers; package eu.openaire.urls_worker.controllers;
import eu.openaire.urls_worker.UrlsWorkerApplication;
import eu.openaire.urls_worker.payloads.responces.WorkerResponse; import eu.openaire.urls_worker.payloads.responces.WorkerResponse;
import eu.openaire.urls_worker.util.AssignmentHandler; import eu.openaire.urls_worker.util.AssignmentHandler;
import eu.openaire.urls_worker.util.WorkerConstants; import eu.openaire.urls_worker.util.WorkerConstants;
@ -35,7 +36,7 @@ public class GeneralController {
if ( AssignmentHandler.isAvailableForWork ) { if ( AssignmentHandler.isAvailableForWork ) {
logger.info("The worker is available for an assignment."); logger.info("The worker is available for an assignment.");
return ResponseEntity.status(200).body(new WorkerResponse(WorkerConstants.WORKER_ID, WorkerConstants.ASSIGNMENTS_LIMIT)); return ResponseEntity.status(200).body(new WorkerResponse(UrlsWorkerApplication.workerId, WorkerConstants.ASSIGNMENTS_LIMIT));
} }
else { else {
logger.info("The worker is busy with another assignment."); logger.info("The worker is busy with another assignment.");

View File

@ -13,8 +13,12 @@ import com.fasterxml.jackson.annotation.JsonPropertyOrder;
}) })
public class UrlReport { public class UrlReport {
public enum StatusType {
accessible, non_accessible
}
@JsonProperty("status") @JsonProperty("status")
private String status; private StatusType status;
@JsonProperty("payload") @JsonProperty("payload")
private Payload payload; private Payload payload;
@ -23,18 +27,18 @@ public class UrlReport {
private Error error; private Error error;
public UrlReport(String status, Payload payload, Error error) { public UrlReport(StatusType status, Payload payload, Error error) {
this.status = status; this.status = status;
this.payload = payload; this.payload = payload;
this.error = error; this.error = error;
} }
public String getStatus() { public StatusType getStatus() {
return this.status; return this.status;
} }
public void setStatus(String status) { public void setStatus(StatusType status) {
this.status = status; this.status = status;
} }
@ -57,9 +61,10 @@ public class UrlReport {
@Override @Override
public String toString() { public String toString() {
return "UrlReport{" + return "UrlReport{" +
"status='" + status + '\'' + "status=" + status +
", payload=" + payload + ", payload=" + payload +
", error=" + error + ", error=" + error +
'}'; '}';
} }
} }

View File

@ -139,13 +139,14 @@ public class PublicationsRetrieverPlugin {
{ {
for ( DataToBeLogged data : FileUtils.dataToBeLoggedList ) for ( DataToBeLogged data : FileUtils.dataToBeLoggedList )
{ {
String status = null, fileLocation = null, comment = data.getComment(), mimeType = null, hash = data.getHash(); UrlReport.StatusType status = null;
String fileLocation = null, comment = data.getComment(), mimeType = null, hash = data.getHash();
Long size = data.getSize(); Long size = data.getSize();
Error error = null; Error error = null;
if ( data.getWasDocumentOrDatasetAccessible().equals("true") ) if ( data.getWasDocumentOrDatasetAccessible().equals("true") )
{ {
status = "accessible"; status = UrlReport.StatusType.accessible;
if ( comment.contains(UrlUtils.alreadyDownloadedByIDMessage) ) { if ( comment.contains(UrlUtils.alreadyDownloadedByIDMessage) ) {
// The file of this docUrl was already downloaded by another docUrl. // The file of this docUrl was already downloaded by another docUrl.
String previousId = comment.substring(UrlUtils.alreadyDownloadedByIDMessage.length() +1); String previousId = comment.substring(UrlUtils.alreadyDownloadedByIDMessage.length() +1);
@ -171,7 +172,7 @@ public class PublicationsRetrieverPlugin {
error = new Error(null, null); // We do not want to send a "null" object, since it just adds more complicated handling in the controller.. error = new Error(null, null); // We do not want to send a "null" object, since it just adds more complicated handling in the controller..
} }
else { else {
status = "non-accessible"; status = UrlReport.StatusType.non_accessible;
if ( data.getCouldRetry().equals("true") ) if ( data.getCouldRetry().equals("true") )
error = new Error(Error.ErrorType.couldRetry, comment); error = new Error(Error.ErrorType.couldRetry, comment);
else else

View File

@ -32,7 +32,7 @@ public class AssignmentHandler {
public static AssignmentRequest requestAssignments() public static AssignmentRequest requestAssignments()
{ {
String requestUrl = UrlsWorkerApplication.controllerBaseUrl + "urls" + (askForTest ? "/test" : "") + "?workerId=" + WorkerConstants.WORKER_ID + "&workerAssignmentsLimit=" + WorkerConstants.ASSIGNMENTS_LIMIT; String requestUrl = UrlsWorkerApplication.controllerBaseUrl + "urls" + (askForTest ? "/test" : "") + "?workerId=" + UrlsWorkerApplication.workerId + "&workerAssignmentsLimit=" + WorkerConstants.ASSIGNMENTS_LIMIT;
logger.info("Going to request assignments from the controller-server: " + requestUrl); logger.info("Going to request assignments from the controller-server: " + requestUrl);
String json = null; String json = null;
@ -129,7 +129,7 @@ public class AssignmentHandler {
logger.info("Going to post the WorkerReport to the controller-server: " + postUrl); logger.info("Going to post the WorkerReport to the controller-server: " + postUrl);
try { try {
ResponseEntity<String> responseEntity = new RestTemplateBuilder().build().postForEntity(postUrl, new WorkerReport(WorkerConstants.WORKER_ID, assignmentRequestCounter, urlReports), String.class); ResponseEntity<String> responseEntity = new RestTemplateBuilder().build().postForEntity(postUrl, new WorkerReport(UrlsWorkerApplication.workerId, assignmentRequestCounter, urlReports), String.class);
int responseCode = responseEntity.getStatusCode().value(); int responseCode = responseEntity.getStatusCode().value();
if ( responseCode != HttpStatus.OK.value() ) { if ( responseCode != HttpStatus.OK.value() ) {
logger.error("Connection problem with the submission of the WorkerReport of assignment_" + assignmentRequestCounter + " to the Controller. Error-code was: " + responseCode); logger.error("Connection problem with the submission of the WorkerReport of assignment_" + assignmentRequestCounter + " to the Controller. Error-code was: " + responseCode);

View File

@ -2,7 +2,6 @@ package eu.openaire.urls_worker.util;
public interface WorkerConstants { public interface WorkerConstants {
String WORKER_ID = "worker_1"; // This should be different for every deployment of a Worker.
int ASSIGNMENTS_LIMIT = 10000; int ASSIGNMENTS_LIMIT = 10000;
} }