- Update the "installAndRun.sh":

--Ask the user to give the "workerId" and the "controllerBaseUrl".
--Make sure the "libs" directory is created, if not exists.
--Make sure the "unzip" package is installed.
- Change the data-type of the "UrlReport.status" to be "enum StatusType", in order to increase consistency and comparability.
- Update the guidelines in the README.
This commit is contained in:
Lampros Smyrnaios 2021-09-22 16:36:48 +03:00
parent 61597d1627
commit 2ffb44a615
8 changed files with 71 additions and 36 deletions

View File

@ -7,8 +7,9 @@ It posts the results to the controller, which in turn, puts them in a database.<
To install and run the application:
- Run ```git clone``` and then ```cd UrlsWorker```.
- Create the file ```controllerBaseUrl.txt``` which contains just one line with the controller's base api-url, for example: ```http://IP:PORT/api/```.
- Execute the ```installAndRun.sh``` script.<br>
- Create the file ```S3_minIO_credentials.txt``` , which contains just one line with the ___S3_url___, ___S3_username___, ___S3_password___, ___S3_server_region___ and the ___S3_bucket___, separated by a _comma_ ```,```.
- [Optional] Create the file ```inputData.txt``` , which contains just one line with the ___workerId___ and the ___controller's base api-url___, seperated by a _comma_ ```,``` . For example: ```worker_1,http://IP:PORT/api/```.
- Execute the ```installAndRun.sh``` script. In case the above file (_inputData.txt_) does not exist, it will request the current ___worker's ID___ and the ___Controller's Url___, and it will create the _inputData.txt_ file.<br>
That script, installs the [PublicationsRetriever](https://github.com/LSmyrnaios/PublicationsRetriever), as a library and then compiles and runs the whole Application.<br>
If you want to just run the app, then run the script with the argument "1": ```./installAndRun.sh 1```.<br>

View File

@ -8,11 +8,27 @@ elif [[ $# -gt 1 ]]; then
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh <justInstall: 0 | 1>"; exit 1
fi
# Check of the "inputData.txt" file exist, if not, ask to fill it.
inputDataFile="inputData.txt"
if [[ ! -f $inputDataFile ]]; then
echo -e "The file \"$inputDataFile\" does not exist. Going to create it..\n"
echo "Give the ID of this worker:"
read -r workerId
echo -e "\nGive the baseUrl of the controller (e.g.: http://IP:PORT/api/):"
read -r controllerBaseUrl
touch $inputDataFile
echo "$workerId,$controllerBaseUrl" >> $inputDataFile
fi
gradleVersion="7.2"
if [[ justInstall -eq 0 ]]; then
cd libs || exit
cd libs || (mkdir libs && (cd libs || exit))
git clone https://github.com/LSmyrnaios/PublicationsRetriever.git # We assume there is no previously source-code here, if so, it will be overwritten.
# Do not need to perform a string-replace in "build.gradle", since it automatically gets all ".jar" files.
@ -35,7 +51,7 @@ if [[ justInstall -eq 0 ]]; then
wget https://services.gradle.org/distributions/gradle-${gradleVersion}-bin.zip
sudo mkdir /opt/gradle
sudo unzip -d /opt/gradle gradle-${gradleVersion}-bin.zip
sudo apt install -y unzip && sudo unzip -d /opt/gradle gradle-${gradleVersion}-bin.zip
#ls /opt/gradle/gradle-${gradleVersion} # For debugging installation
export PATH=$PATH:/opt/gradle/gradle-${gradleVersion}/bin

View File

@ -20,16 +20,19 @@ public class UrlsWorkerApplication {
private static final Logger logger = LoggerFactory.getLogger(UrlsWorkerApplication.class);
private static final String controllerBaseUrlFilePath = FileUtils.workingDir + "controllerBaseUrl.txt";
private static final String inputDataFilePath = FileUtils.workingDir + "inputData.txt";
public static String workerId = null;
public static String controllerBaseUrl = null; // BaseUrl template: "http://IP:PORT/api/"
public static void main(String[] args) {
setControllerBaseUrl(); // This may cause the Server to terminate early, in case the controllerBaseUrl cannot be found.
setInputData(); // This may cause the Server to terminate early, in case the workerId or the controllerBaseUrl cannot be found.
SpringApplication.run(UrlsWorkerApplication.class, args);
}
@PreDestroy
public static void preDestroy()
{
@ -48,39 +51,48 @@ public class UrlsWorkerApplication {
}
private static void setControllerBaseUrl()
private static void setInputData()
{
// Take the controllerBaseUrl from the file.
// Take the workerId and the controllerBaseUrl from the file.
Scanner myReader = null;
try {
File controllerBaseUrlFile = new File(controllerBaseUrlFilePath);
if ( !controllerBaseUrlFile.exists() ) {
String errorMsg = "controllerBaseUrlFile \"" + controllerBaseUrlFilePath + "\" does not exists!";
File inputDataFile = new File(inputDataFilePath);
if ( !inputDataFile.exists() ) {
String errorMsg = "controllerBaseUrlFile \"" + inputDataFilePath + "\" does not exists!";
logger.error(errorMsg);
System.err.println(errorMsg);
System.exit(60);
}
myReader = new Scanner(controllerBaseUrlFile);
if ( !myReader.hasNextLine() ) {
String errorMsg = "The controllerBaseUrlFile is empty! No WorkerReports can be sent from this worker! Exiting..";
logger.error(errorMsg);
System.err.println(errorMsg);
System.exit(61);
myReader = new Scanner(inputDataFile);
if ( myReader.hasNextLine() ) {
String[] data = myReader.nextLine().split(",");
if ( data.length < 2 ) {
String errorMsg = "Not all data were retrieved from file \"" + inputDataFilePath + "\"!";
logger.error(errorMsg);
System.err.println(errorMsg);
System.exit(61);
}
workerId = data[0].trim();
controllerBaseUrl = data[1].trim();
if ( !controllerBaseUrl.endsWith("/") )
controllerBaseUrl += "/"; // Make sure the whole urls will not break later.
}
controllerBaseUrl = myReader.nextLine().trim();
if ( !controllerBaseUrl.endsWith("/") )
controllerBaseUrl += "/"; // Make sure the whole urls will not break later.
if ( (workerId == null) || (controllerBaseUrl == null) ) {
String errorMsg = "No \"workerId\" or/and \"controllerBaseUrl\" could be retrieved from the file: " + inputDataFilePath;
logger.error(errorMsg);
System.err.println(errorMsg);
System.exit(62);
}
logger.info("The controllerBaseUrl is: " + controllerBaseUrl);
logger.info("workerId: " + workerId + ", controllerBaseUrl: " + controllerBaseUrl); // It's safe and helpful to show them in the logs.
} catch (Exception e) {
String errorMsg = "An error prevented the retrieval of the controllerBaseUrl from the file: " + controllerBaseUrlFilePath + "\n" + e.getMessage();
String errorMsg = "An error prevented the retrieval of the workerId and the controllerBaseUrl from the file: " + inputDataFilePath + "\n" + e.getMessage();
logger.error(errorMsg);
System.err.println(errorMsg);
e.printStackTrace();
System.exit(62);
System.exit(63);
} finally {
if ( myReader != null )
myReader.close();

View File

@ -1,5 +1,6 @@
package eu.openaire.urls_worker.controllers;
import eu.openaire.urls_worker.UrlsWorkerApplication;
import eu.openaire.urls_worker.payloads.responces.WorkerResponse;
import eu.openaire.urls_worker.util.AssignmentHandler;
import eu.openaire.urls_worker.util.WorkerConstants;
@ -35,7 +36,7 @@ public class GeneralController {
if ( AssignmentHandler.isAvailableForWork ) {
logger.info("The worker is available for an assignment.");
return ResponseEntity.status(200).body(new WorkerResponse(WorkerConstants.WORKER_ID, WorkerConstants.ASSIGNMENTS_LIMIT));
return ResponseEntity.status(200).body(new WorkerResponse(UrlsWorkerApplication.workerId, WorkerConstants.ASSIGNMENTS_LIMIT));
}
else {
logger.info("The worker is busy with another assignment.");

View File

@ -13,8 +13,12 @@ import com.fasterxml.jackson.annotation.JsonPropertyOrder;
})
public class UrlReport {
public enum StatusType {
accessible, non_accessible
}
@JsonProperty("status")
private String status;
private StatusType status;
@JsonProperty("payload")
private Payload payload;
@ -23,18 +27,18 @@ public class UrlReport {
private Error error;
public UrlReport(String status, Payload payload, Error error) {
public UrlReport(StatusType status, Payload payload, Error error) {
this.status = status;
this.payload = payload;
this.error = error;
}
public String getStatus() {
public StatusType getStatus() {
return this.status;
}
public void setStatus(String status) {
public void setStatus(StatusType status) {
this.status = status;
}
@ -57,9 +61,10 @@ public class UrlReport {
@Override
public String toString() {
return "UrlReport{" +
"status='" + status + '\'' +
"status=" + status +
", payload=" + payload +
", error=" + error +
'}';
}
}

View File

@ -139,13 +139,14 @@ public class PublicationsRetrieverPlugin {
{
for ( DataToBeLogged data : FileUtils.dataToBeLoggedList )
{
String status = null, fileLocation = null, comment = data.getComment(), mimeType = null, hash = data.getHash();
UrlReport.StatusType status = null;
String fileLocation = null, comment = data.getComment(), mimeType = null, hash = data.getHash();
Long size = data.getSize();
Error error = null;
if ( data.getWasDocumentOrDatasetAccessible().equals("true") )
{
status = "accessible";
status = UrlReport.StatusType.accessible;
if ( comment.contains(UrlUtils.alreadyDownloadedByIDMessage) ) {
// The file of this docUrl was already downloaded by another docUrl.
String previousId = comment.substring(UrlUtils.alreadyDownloadedByIDMessage.length() +1);
@ -171,7 +172,7 @@ public class PublicationsRetrieverPlugin {
error = new Error(null, null); // We do not want to send a "null" object, since it just adds more complicated handling in the controller..
}
else {
status = "non-accessible";
status = UrlReport.StatusType.non_accessible;
if ( data.getCouldRetry().equals("true") )
error = new Error(Error.ErrorType.couldRetry, comment);
else

View File

@ -32,7 +32,7 @@ public class AssignmentHandler {
public static AssignmentRequest requestAssignments()
{
String requestUrl = UrlsWorkerApplication.controllerBaseUrl + "urls" + (askForTest ? "/test" : "") + "?workerId=" + WorkerConstants.WORKER_ID + "&workerAssignmentsLimit=" + WorkerConstants.ASSIGNMENTS_LIMIT;
String requestUrl = UrlsWorkerApplication.controllerBaseUrl + "urls" + (askForTest ? "/test" : "") + "?workerId=" + UrlsWorkerApplication.workerId + "&workerAssignmentsLimit=" + WorkerConstants.ASSIGNMENTS_LIMIT;
logger.info("Going to request assignments from the controller-server: " + requestUrl);
String json = null;
@ -129,7 +129,7 @@ public class AssignmentHandler {
logger.info("Going to post the WorkerReport to the controller-server: " + postUrl);
try {
ResponseEntity<String> responseEntity = new RestTemplateBuilder().build().postForEntity(postUrl, new WorkerReport(WorkerConstants.WORKER_ID, assignmentRequestCounter, urlReports), String.class);
ResponseEntity<String> responseEntity = new RestTemplateBuilder().build().postForEntity(postUrl, new WorkerReport(UrlsWorkerApplication.workerId, assignmentRequestCounter, urlReports), String.class);
int responseCode = responseEntity.getStatusCode().value();
if ( responseCode != HttpStatus.OK.value() ) {
logger.error("Connection problem with the submission of the WorkerReport of assignment_" + assignmentRequestCounter + " to the Controller. Error-code was: " + responseCode);

View File

@ -2,7 +2,6 @@ package eu.openaire.urls_worker.util;
public interface WorkerConstants {
String WORKER_ID = "worker_1"; // This should be different for every deployment of a Worker.
int ASSIGNMENTS_LIMIT = 10000;
}