- Update the "installAndRun.sh":
--Ask the user to give the "workerId" and the "controllerBaseUrl". --Make sure the "libs" directory is created, if not exists. --Make sure the "unzip" package is installed. - Change the data-type of the "UrlReport.status" to be "enum StatusType", in order to increase consistency and comparability. - Update the guidelines in the README.
This commit is contained in:
parent
61597d1627
commit
2ffb44a615
|
@ -7,8 +7,9 @@ It posts the results to the controller, which in turn, puts them in a database.<
|
|||
|
||||
To install and run the application:
|
||||
- Run ```git clone``` and then ```cd UrlsWorker```.
|
||||
- Create the file ```controllerBaseUrl.txt``` which contains just one line with the controller's base api-url, for example: ```http://IP:PORT/api/```.
|
||||
- Execute the ```installAndRun.sh``` script.<br>
|
||||
- Create the file ```S3_minIO_credentials.txt``` , which contains just one line with the ___S3_url___, ___S3_username___, ___S3_password___, ___S3_server_region___ and the ___S3_bucket___, separated by a _comma_ ```,```.
|
||||
- [Optional] Create the file ```inputData.txt``` , which contains just one line with the ___workerId___ and the ___controller's base api-url___, seperated by a _comma_ ```,``` . For example: ```worker_1,http://IP:PORT/api/```.
|
||||
- Execute the ```installAndRun.sh``` script. In case the above file (_inputData.txt_) does not exist, it will request the current ___worker's ID___ and the ___Controller's Url___, and it will create the _inputData.txt_ file.<br>
|
||||
|
||||
That script, installs the [PublicationsRetriever](https://github.com/LSmyrnaios/PublicationsRetriever), as a library and then compiles and runs the whole Application.<br>
|
||||
If you want to just run the app, then run the script with the argument "1": ```./installAndRun.sh 1```.<br>
|
||||
|
|
|
@ -8,11 +8,27 @@ elif [[ $# -gt 1 ]]; then
|
|||
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh <justInstall: 0 | 1>"; exit 1
|
||||
fi
|
||||
|
||||
# Check of the "inputData.txt" file exist, if not, ask to fill it.
|
||||
inputDataFile="inputData.txt"
|
||||
|
||||
if [[ ! -f $inputDataFile ]]; then
|
||||
echo -e "The file \"$inputDataFile\" does not exist. Going to create it..\n"
|
||||
|
||||
echo "Give the ID of this worker:"
|
||||
read -r workerId
|
||||
|
||||
echo -e "\nGive the baseUrl of the controller (e.g.: http://IP:PORT/api/):"
|
||||
read -r controllerBaseUrl
|
||||
|
||||
touch $inputDataFile
|
||||
echo "$workerId,$controllerBaseUrl" >> $inputDataFile
|
||||
fi
|
||||
|
||||
gradleVersion="7.2"
|
||||
|
||||
if [[ justInstall -eq 0 ]]; then
|
||||
|
||||
cd libs || exit
|
||||
cd libs || (mkdir libs && (cd libs || exit))
|
||||
git clone https://github.com/LSmyrnaios/PublicationsRetriever.git # We assume there is no previously source-code here, if so, it will be overwritten.
|
||||
|
||||
# Do not need to perform a string-replace in "build.gradle", since it automatically gets all ".jar" files.
|
||||
|
@ -35,7 +51,7 @@ if [[ justInstall -eq 0 ]]; then
|
|||
wget https://services.gradle.org/distributions/gradle-${gradleVersion}-bin.zip
|
||||
|
||||
sudo mkdir /opt/gradle
|
||||
sudo unzip -d /opt/gradle gradle-${gradleVersion}-bin.zip
|
||||
sudo apt install -y unzip && sudo unzip -d /opt/gradle gradle-${gradleVersion}-bin.zip
|
||||
#ls /opt/gradle/gradle-${gradleVersion} # For debugging installation
|
||||
|
||||
export PATH=$PATH:/opt/gradle/gradle-${gradleVersion}/bin
|
||||
|
|
|
@ -20,16 +20,19 @@ public class UrlsWorkerApplication {
|
|||
|
||||
private static final Logger logger = LoggerFactory.getLogger(UrlsWorkerApplication.class);
|
||||
|
||||
private static final String controllerBaseUrlFilePath = FileUtils.workingDir + "controllerBaseUrl.txt";
|
||||
private static final String inputDataFilePath = FileUtils.workingDir + "inputData.txt";
|
||||
public static String workerId = null;
|
||||
public static String controllerBaseUrl = null; // BaseUrl template: "http://IP:PORT/api/"
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
setControllerBaseUrl(); // This may cause the Server to terminate early, in case the controllerBaseUrl cannot be found.
|
||||
setInputData(); // This may cause the Server to terminate early, in case the workerId or the controllerBaseUrl cannot be found.
|
||||
|
||||
SpringApplication.run(UrlsWorkerApplication.class, args);
|
||||
}
|
||||
|
||||
|
||||
@PreDestroy
|
||||
public static void preDestroy()
|
||||
{
|
||||
|
@ -48,39 +51,48 @@ public class UrlsWorkerApplication {
|
|||
}
|
||||
|
||||
|
||||
private static void setControllerBaseUrl()
|
||||
private static void setInputData()
|
||||
{
|
||||
// Take the controllerBaseUrl from the file.
|
||||
// Take the workerId and the controllerBaseUrl from the file.
|
||||
Scanner myReader = null;
|
||||
try {
|
||||
File controllerBaseUrlFile = new File(controllerBaseUrlFilePath);
|
||||
if ( !controllerBaseUrlFile.exists() ) {
|
||||
String errorMsg = "controllerBaseUrlFile \"" + controllerBaseUrlFilePath + "\" does not exists!";
|
||||
File inputDataFile = new File(inputDataFilePath);
|
||||
if ( !inputDataFile.exists() ) {
|
||||
String errorMsg = "controllerBaseUrlFile \"" + inputDataFilePath + "\" does not exists!";
|
||||
logger.error(errorMsg);
|
||||
System.err.println(errorMsg);
|
||||
System.exit(60);
|
||||
}
|
||||
|
||||
myReader = new Scanner(controllerBaseUrlFile);
|
||||
if ( !myReader.hasNextLine() ) {
|
||||
String errorMsg = "The controllerBaseUrlFile is empty! No WorkerReports can be sent from this worker! Exiting..";
|
||||
logger.error(errorMsg);
|
||||
System.err.println(errorMsg);
|
||||
System.exit(61);
|
||||
myReader = new Scanner(inputDataFile);
|
||||
if ( myReader.hasNextLine() ) {
|
||||
String[] data = myReader.nextLine().split(",");
|
||||
if ( data.length < 2 ) {
|
||||
String errorMsg = "Not all data were retrieved from file \"" + inputDataFilePath + "\"!";
|
||||
logger.error(errorMsg);
|
||||
System.err.println(errorMsg);
|
||||
System.exit(61);
|
||||
}
|
||||
workerId = data[0].trim();
|
||||
controllerBaseUrl = data[1].trim();
|
||||
if ( !controllerBaseUrl.endsWith("/") )
|
||||
controllerBaseUrl += "/"; // Make sure the whole urls will not break later.
|
||||
}
|
||||
|
||||
controllerBaseUrl = myReader.nextLine().trim();
|
||||
if ( !controllerBaseUrl.endsWith("/") )
|
||||
controllerBaseUrl += "/"; // Make sure the whole urls will not break later.
|
||||
if ( (workerId == null) || (controllerBaseUrl == null) ) {
|
||||
String errorMsg = "No \"workerId\" or/and \"controllerBaseUrl\" could be retrieved from the file: " + inputDataFilePath;
|
||||
logger.error(errorMsg);
|
||||
System.err.println(errorMsg);
|
||||
System.exit(62);
|
||||
}
|
||||
|
||||
logger.info("The controllerBaseUrl is: " + controllerBaseUrl);
|
||||
logger.info("workerId: " + workerId + ", controllerBaseUrl: " + controllerBaseUrl); // It's safe and helpful to show them in the logs.
|
||||
|
||||
} catch (Exception e) {
|
||||
String errorMsg = "An error prevented the retrieval of the controllerBaseUrl from the file: " + controllerBaseUrlFilePath + "\n" + e.getMessage();
|
||||
String errorMsg = "An error prevented the retrieval of the workerId and the controllerBaseUrl from the file: " + inputDataFilePath + "\n" + e.getMessage();
|
||||
logger.error(errorMsg);
|
||||
System.err.println(errorMsg);
|
||||
e.printStackTrace();
|
||||
System.exit(62);
|
||||
System.exit(63);
|
||||
} finally {
|
||||
if ( myReader != null )
|
||||
myReader.close();
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
package eu.openaire.urls_worker.controllers;
|
||||
|
||||
import eu.openaire.urls_worker.UrlsWorkerApplication;
|
||||
import eu.openaire.urls_worker.payloads.responces.WorkerResponse;
|
||||
import eu.openaire.urls_worker.util.AssignmentHandler;
|
||||
import eu.openaire.urls_worker.util.WorkerConstants;
|
||||
|
@ -35,7 +36,7 @@ public class GeneralController {
|
|||
|
||||
if ( AssignmentHandler.isAvailableForWork ) {
|
||||
logger.info("The worker is available for an assignment.");
|
||||
return ResponseEntity.status(200).body(new WorkerResponse(WorkerConstants.WORKER_ID, WorkerConstants.ASSIGNMENTS_LIMIT));
|
||||
return ResponseEntity.status(200).body(new WorkerResponse(UrlsWorkerApplication.workerId, WorkerConstants.ASSIGNMENTS_LIMIT));
|
||||
}
|
||||
else {
|
||||
logger.info("The worker is busy with another assignment.");
|
||||
|
|
|
@ -13,8 +13,12 @@ import com.fasterxml.jackson.annotation.JsonPropertyOrder;
|
|||
})
|
||||
public class UrlReport {
|
||||
|
||||
public enum StatusType {
|
||||
accessible, non_accessible
|
||||
}
|
||||
|
||||
@JsonProperty("status")
|
||||
private String status;
|
||||
private StatusType status;
|
||||
|
||||
@JsonProperty("payload")
|
||||
private Payload payload;
|
||||
|
@ -23,18 +27,18 @@ public class UrlReport {
|
|||
private Error error;
|
||||
|
||||
|
||||
public UrlReport(String status, Payload payload, Error error) {
|
||||
public UrlReport(StatusType status, Payload payload, Error error) {
|
||||
this.status = status;
|
||||
this.payload = payload;
|
||||
this.error = error;
|
||||
}
|
||||
|
||||
|
||||
public String getStatus() {
|
||||
public StatusType getStatus() {
|
||||
return this.status;
|
||||
}
|
||||
|
||||
public void setStatus(String status) {
|
||||
public void setStatus(StatusType status) {
|
||||
this.status = status;
|
||||
}
|
||||
|
||||
|
@ -57,9 +61,10 @@ public class UrlReport {
|
|||
@Override
|
||||
public String toString() {
|
||||
return "UrlReport{" +
|
||||
"status='" + status + '\'' +
|
||||
"status=" + status +
|
||||
", payload=" + payload +
|
||||
", error=" + error +
|
||||
'}';
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -139,13 +139,14 @@ public class PublicationsRetrieverPlugin {
|
|||
{
|
||||
for ( DataToBeLogged data : FileUtils.dataToBeLoggedList )
|
||||
{
|
||||
String status = null, fileLocation = null, comment = data.getComment(), mimeType = null, hash = data.getHash();
|
||||
UrlReport.StatusType status = null;
|
||||
String fileLocation = null, comment = data.getComment(), mimeType = null, hash = data.getHash();
|
||||
Long size = data.getSize();
|
||||
Error error = null;
|
||||
|
||||
if ( data.getWasDocumentOrDatasetAccessible().equals("true") )
|
||||
{
|
||||
status = "accessible";
|
||||
status = UrlReport.StatusType.accessible;
|
||||
if ( comment.contains(UrlUtils.alreadyDownloadedByIDMessage) ) {
|
||||
// The file of this docUrl was already downloaded by another docUrl.
|
||||
String previousId = comment.substring(UrlUtils.alreadyDownloadedByIDMessage.length() +1);
|
||||
|
@ -171,7 +172,7 @@ public class PublicationsRetrieverPlugin {
|
|||
error = new Error(null, null); // We do not want to send a "null" object, since it just adds more complicated handling in the controller..
|
||||
}
|
||||
else {
|
||||
status = "non-accessible";
|
||||
status = UrlReport.StatusType.non_accessible;
|
||||
if ( data.getCouldRetry().equals("true") )
|
||||
error = new Error(Error.ErrorType.couldRetry, comment);
|
||||
else
|
||||
|
|
|
@ -32,7 +32,7 @@ public class AssignmentHandler {
|
|||
|
||||
public static AssignmentRequest requestAssignments()
|
||||
{
|
||||
String requestUrl = UrlsWorkerApplication.controllerBaseUrl + "urls" + (askForTest ? "/test" : "") + "?workerId=" + WorkerConstants.WORKER_ID + "&workerAssignmentsLimit=" + WorkerConstants.ASSIGNMENTS_LIMIT;
|
||||
String requestUrl = UrlsWorkerApplication.controllerBaseUrl + "urls" + (askForTest ? "/test" : "") + "?workerId=" + UrlsWorkerApplication.workerId + "&workerAssignmentsLimit=" + WorkerConstants.ASSIGNMENTS_LIMIT;
|
||||
logger.info("Going to request assignments from the controller-server: " + requestUrl);
|
||||
|
||||
String json = null;
|
||||
|
@ -129,7 +129,7 @@ public class AssignmentHandler {
|
|||
logger.info("Going to post the WorkerReport to the controller-server: " + postUrl);
|
||||
|
||||
try {
|
||||
ResponseEntity<String> responseEntity = new RestTemplateBuilder().build().postForEntity(postUrl, new WorkerReport(WorkerConstants.WORKER_ID, assignmentRequestCounter, urlReports), String.class);
|
||||
ResponseEntity<String> responseEntity = new RestTemplateBuilder().build().postForEntity(postUrl, new WorkerReport(UrlsWorkerApplication.workerId, assignmentRequestCounter, urlReports), String.class);
|
||||
int responseCode = responseEntity.getStatusCode().value();
|
||||
if ( responseCode != HttpStatus.OK.value() ) {
|
||||
logger.error("Connection problem with the submission of the WorkerReport of assignment_" + assignmentRequestCounter + " to the Controller. Error-code was: " + responseCode);
|
||||
|
|
|
@ -2,7 +2,6 @@ package eu.openaire.urls_worker.util;
|
|||
|
||||
public interface WorkerConstants {
|
||||
|
||||
String WORKER_ID = "worker_1"; // This should be different for every deployment of a Worker.
|
||||
int ASSIGNMENTS_LIMIT = 10000;
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue