forked from lsmyrnaios/UrlsWorker
- Use the "Timestamp" data-type instead of the "Date", in order to include more information.
- Code cleanup.
This commit is contained in:
parent
20b71164d5
commit
045788c728
|
@ -2,7 +2,7 @@ package eu.openaire.urls_worker.components;
|
||||||
|
|
||||||
import eu.openaire.urls_worker.controllers.FullTextsController;
|
import eu.openaire.urls_worker.controllers.FullTextsController;
|
||||||
import eu.openaire.urls_worker.plugins.PublicationsRetrieverPlugin;
|
import eu.openaire.urls_worker.plugins.PublicationsRetrieverPlugin;
|
||||||
import eu.openaire.urls_worker.util.AssignmentHandler;
|
import eu.openaire.urls_worker.util.AssignmentsHandler;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.scheduling.annotation.Scheduled;
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
|
@ -32,8 +32,8 @@ public class ScheduledTasks {
|
||||||
|
|
||||||
@Scheduled(fixedRate = 900_000) // Every 15 mins: 900_000
|
@Scheduled(fixedRate = 900_000) // Every 15 mins: 900_000
|
||||||
public void handleNewAssignments() {
|
public void handleNewAssignments() {
|
||||||
if ( AssignmentHandler.isAvailableForWork )
|
if ( AssignmentsHandler.isAvailableForWork )
|
||||||
AssignmentHandler.handleAssignments();
|
AssignmentsHandler.handleAssignments();
|
||||||
else {
|
else {
|
||||||
//logger.debug("The worker is not available for work at the moment.."); // JUST FOR DEBUG!
|
//logger.debug("The worker is not available for work at the moment.."); // JUST FOR DEBUG!
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,7 @@ package eu.openaire.urls_worker.controllers;
|
||||||
|
|
||||||
import eu.openaire.urls_worker.UrlsWorkerApplication;
|
import eu.openaire.urls_worker.UrlsWorkerApplication;
|
||||||
import eu.openaire.urls_worker.payloads.responces.WorkerResponse;
|
import eu.openaire.urls_worker.payloads.responces.WorkerResponse;
|
||||||
import eu.openaire.urls_worker.util.AssignmentHandler;
|
import eu.openaire.urls_worker.util.AssignmentsHandler;
|
||||||
import eu.openaire.urls_worker.util.WorkerConstants;
|
import eu.openaire.urls_worker.util.WorkerConstants;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -38,7 +38,7 @@ public class GeneralController {
|
||||||
|
|
||||||
logger.info("Received an \"isWorkerAvailableForWork\" request.");
|
logger.info("Received an \"isWorkerAvailableForWork\" request.");
|
||||||
|
|
||||||
if ( AssignmentHandler.isAvailableForWork ) {
|
if ( AssignmentsHandler.isAvailableForWork ) {
|
||||||
logger.info("The worker is available for an assignment.");
|
logger.info("The worker is available for an assignment.");
|
||||||
return ResponseEntity.status(200).body(new WorkerResponse(UrlsWorkerApplication.workerId, WorkerConstants.ASSIGNMENTS_LIMIT));
|
return ResponseEntity.status(200).body(new WorkerResponse(UrlsWorkerApplication.workerId, WorkerConstants.ASSIGNMENTS_LIMIT));
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -4,7 +4,7 @@ import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
|
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
|
||||||
|
|
||||||
import java.util.Date;
|
import java.sql.Timestamp;
|
||||||
|
|
||||||
|
|
||||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
|
@ -13,7 +13,7 @@ import java.util.Date;
|
||||||
"original_url",
|
"original_url",
|
||||||
"datasource",
|
"datasource",
|
||||||
"workerId",
|
"workerId",
|
||||||
"date"
|
"timestamp"
|
||||||
})
|
})
|
||||||
public class Assignment {
|
public class Assignment {
|
||||||
|
|
||||||
|
@ -29,19 +29,18 @@ public class Assignment {
|
||||||
@JsonProperty("workerid")
|
@JsonProperty("workerid")
|
||||||
private String workerId;
|
private String workerId;
|
||||||
|
|
||||||
@JsonProperty("date")
|
@JsonProperty("timestamp")
|
||||||
private Date date;
|
private Timestamp timestamp;
|
||||||
|
|
||||||
|
|
||||||
public Assignment() {}
|
public Assignment() {}
|
||||||
|
|
||||||
|
public Assignment(String id, String originalUrl, Datasource datasource, String workerId, Timestamp timestamp) {
|
||||||
public Assignment(String id, String originalUrl, Datasource datasource, String workerId, Date date) {
|
|
||||||
this.id = id;
|
this.id = id;
|
||||||
this.originalUrl = originalUrl;
|
this.originalUrl = originalUrl;
|
||||||
this.datasource = datasource;
|
this.datasource = datasource;
|
||||||
this.workerId = workerId;
|
this.workerId = workerId;
|
||||||
this.date = date;
|
this.timestamp = timestamp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -77,15 +76,14 @@ public class Assignment {
|
||||||
this.workerId = workerId;
|
this.workerId = workerId;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Date getDate() {
|
public Timestamp getTimestamp() {
|
||||||
return date;
|
return timestamp;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setDate(Date date) {
|
public void setTimestamp(Timestamp timestamp) {
|
||||||
this.date = date;
|
this.timestamp = timestamp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "Assignment{" +
|
return "Assignment{" +
|
||||||
|
@ -93,7 +91,7 @@ public class Assignment {
|
||||||
", originalUrl='" + originalUrl + '\'' +
|
", originalUrl='" + originalUrl + '\'' +
|
||||||
", datasource=" + datasource +
|
", datasource=" + datasource +
|
||||||
", workerId='" + workerId + '\'' +
|
", workerId='" + workerId + '\'' +
|
||||||
", date=" + date +
|
", timestamp=" + timestamp +
|
||||||
'}';
|
'}';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,7 @@ import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
|
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
|
||||||
|
|
||||||
import java.util.Date;
|
import java.sql.Timestamp;
|
||||||
|
|
||||||
|
|
||||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
|
@ -12,7 +12,7 @@ import java.util.Date;
|
||||||
"id",
|
"id",
|
||||||
"original_url",
|
"original_url",
|
||||||
"actual_url",
|
"actual_url",
|
||||||
"date",
|
"timestamp_acquired",
|
||||||
"mime_type",
|
"mime_type",
|
||||||
"size",
|
"size",
|
||||||
"hash",
|
"hash",
|
||||||
|
@ -30,8 +30,8 @@ public class Payload {
|
||||||
@JsonProperty("actual_url")
|
@JsonProperty("actual_url")
|
||||||
private String actual_url;
|
private String actual_url;
|
||||||
|
|
||||||
@JsonProperty("date")
|
@JsonProperty("timestamp_acquired")
|
||||||
private Date date_acquired;
|
private Timestamp timestamp_acquired;
|
||||||
|
|
||||||
@JsonProperty("mime_type")
|
@JsonProperty("mime_type")
|
||||||
private String mime_type;
|
private String mime_type;
|
||||||
|
@ -48,12 +48,13 @@ public class Payload {
|
||||||
@JsonProperty("provenance")
|
@JsonProperty("provenance")
|
||||||
private String provenance; // "crawl:<PluginName>"
|
private String provenance; // "crawl:<PluginName>"
|
||||||
|
|
||||||
|
public Payload() {}
|
||||||
|
|
||||||
public Payload(String id, String original_url, String actual_url, Date date_acquired, String mime_type, Long size, String hash, String location, String provenance) {
|
public Payload(String id, String original_url, String actual_url, Timestamp timestamp_acquired, String mime_type, Long size, String hash, String location, String provenance) {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
this.original_url = original_url;
|
this.original_url = original_url;
|
||||||
this.actual_url = actual_url;
|
this.actual_url = actual_url;
|
||||||
this.date_acquired = date_acquired;
|
this.timestamp_acquired = timestamp_acquired;
|
||||||
this.mime_type = mime_type;
|
this.mime_type = mime_type;
|
||||||
this.size = size;
|
this.size = size;
|
||||||
this.hash = hash;
|
this.hash = hash;
|
||||||
|
@ -85,12 +86,12 @@ public class Payload {
|
||||||
this.actual_url = actual_url;
|
this.actual_url = actual_url;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Date getDate_acquired() {
|
public Timestamp getTimestamp_acquired() {
|
||||||
return date_acquired;
|
return timestamp_acquired;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setDate_acquired(Date date_acquired) {
|
public void setTimestamp_acquired(Timestamp timestamp_acquired) {
|
||||||
this.date_acquired = date_acquired;
|
this.timestamp_acquired = timestamp_acquired;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getMime_type() {
|
public String getMime_type() {
|
||||||
|
@ -139,7 +140,7 @@ public class Payload {
|
||||||
"id='" + id + '\'' +
|
"id='" + id + '\'' +
|
||||||
", original_url='" + original_url + '\'' +
|
", original_url='" + original_url + '\'' +
|
||||||
", actual_url='" + actual_url + '\'' +
|
", actual_url='" + actual_url + '\'' +
|
||||||
", date_acquired='" + date_acquired + '\'' +
|
", timestamp_acquired='" + timestamp_acquired + '\'' +
|
||||||
", mime_type='" + mime_type + '\'' +
|
", mime_type='" + mime_type + '\'' +
|
||||||
", size='" + size + '\'' +
|
", size='" + size + '\'' +
|
||||||
", hash='" + hash + '\'' +
|
", hash='" + hash + '\'' +
|
||||||
|
|
|
@ -8,27 +8,27 @@ import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
public class AssignmentRequest {
|
public class AssignmentsRequest {
|
||||||
|
|
||||||
@JsonProperty("assignmentCounter")
|
@JsonProperty("assignmentsCounter")
|
||||||
private Long assignmentCounter;
|
private Long assignmentsCounter;
|
||||||
|
|
||||||
@JsonProperty("assignments")
|
@JsonProperty("assignments")
|
||||||
private List<Assignment> assignments;
|
private List<Assignment> assignments;
|
||||||
|
|
||||||
public AssignmentRequest() { }
|
public AssignmentsRequest() { }
|
||||||
|
|
||||||
public AssignmentRequest(Long assignmentCounter, List<Assignment> assignments) {
|
public AssignmentsRequest(Long assignmentCounter, List<Assignment> assignments) {
|
||||||
this.assignmentCounter = assignmentCounter;
|
this.assignmentsCounter = assignmentCounter;
|
||||||
this.assignments = assignments;
|
this.assignments = assignments;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Long getAssignmentCounter() {
|
public Long getAssignmentsCounter() {
|
||||||
return assignmentCounter;
|
return assignmentsCounter;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setAssignmentCounter(Long assignmentCounter) {
|
public void setAssignmentsCounter(Long assignmentsCounter) {
|
||||||
this.assignmentCounter = assignmentCounter;
|
this.assignmentsCounter = assignmentsCounter;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Assignment> getAssignments() {
|
public List<Assignment> getAssignments() {
|
||||||
|
@ -41,8 +41,8 @@ public class AssignmentRequest {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "AssignmentRequest{" +
|
return "AssignmentsRequest{" +
|
||||||
"assignmentCounter=" + assignmentCounter +
|
"assignmentCounter=" + assignmentsCounter +
|
||||||
", assignments=" + assignments +
|
", assignments=" + assignments +
|
||||||
'}';
|
'}';
|
||||||
}
|
}
|
|
@ -14,13 +14,14 @@ import eu.openaire.urls_worker.models.Error;
|
||||||
import eu.openaire.urls_worker.models.Payload;
|
import eu.openaire.urls_worker.models.Payload;
|
||||||
import eu.openaire.urls_worker.models.UrlReport;
|
import eu.openaire.urls_worker.models.UrlReport;
|
||||||
import eu.openaire.urls_worker.services.FileStorageService;
|
import eu.openaire.urls_worker.services.FileStorageService;
|
||||||
import eu.openaire.urls_worker.util.AssignmentHandler;
|
import eu.openaire.urls_worker.util.AssignmentsHandler;
|
||||||
import eu.openaire.urls_worker.util.WorkerConstants;
|
import eu.openaire.urls_worker.util.WorkerConstants;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.sql.Timestamp;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.Callable;
|
import java.util.concurrent.Callable;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
|
@ -136,7 +137,7 @@ public class PublicationsRetrieverPlugin {
|
||||||
|
|
||||||
public static void addUrlReportsToWorkerReport()
|
public static void addUrlReportsToWorkerReport()
|
||||||
{
|
{
|
||||||
Date date = new Date();
|
Timestamp timestamp = new Timestamp(System.currentTimeMillis()); // Store it here, in order to have the same for all current records.
|
||||||
|
|
||||||
for ( DataToBeLogged data : FileUtils.dataToBeLoggedList )
|
for ( DataToBeLogged data : FileUtils.dataToBeLoggedList )
|
||||||
{
|
{
|
||||||
|
@ -191,10 +192,10 @@ public class PublicationsRetrieverPlugin {
|
||||||
if ( (hash != null) && (hash.equals("null")) )
|
if ( (hash != null) && (hash.equals("null")) )
|
||||||
hash = null;
|
hash = null;
|
||||||
|
|
||||||
Payload payload = new Payload(data.getUrlId(), data.getSourceUrl(), docOrDatasetUrl, date, mimeType, size, hash, fileLocation, "crawl:PublicationsRetriever");
|
Payload payload = new Payload(data.getUrlId(), data.getSourceUrl(), docOrDatasetUrl, timestamp, mimeType, size, hash, fileLocation, "crawl:PublicationsRetriever");
|
||||||
// TODO - If support is added for other doc-formats other than "pdf", then make sure the "mime_type" is correctly specified.
|
// TODO - If support is added for other doc-formats other than "pdf", then make sure the "mime_type" is correctly specified.
|
||||||
|
|
||||||
AssignmentHandler.urlReports.add(new UrlReport(status, payload, error));
|
AssignmentsHandler.urlReports.add(new UrlReport(status, payload, error));
|
||||||
}// end-for
|
}// end-for
|
||||||
FileUtils.dataToBeLoggedList.clear(); // Empty the list, to be re-populated by the next batch / assignment.
|
FileUtils.dataToBeLoggedList.clear(); // Empty the list, to be re-populated by the next batch / assignment.
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,7 @@ import com.google.common.collect.Multimap;
|
||||||
import eu.openaire.urls_worker.UrlsWorkerApplication;
|
import eu.openaire.urls_worker.UrlsWorkerApplication;
|
||||||
import eu.openaire.urls_worker.models.Assignment;
|
import eu.openaire.urls_worker.models.Assignment;
|
||||||
import eu.openaire.urls_worker.models.UrlReport;
|
import eu.openaire.urls_worker.models.UrlReport;
|
||||||
import eu.openaire.urls_worker.payloads.requests.AssignmentRequest;
|
import eu.openaire.urls_worker.payloads.requests.AssignmentsRequest;
|
||||||
import eu.openaire.urls_worker.payloads.responces.WorkerReport;
|
import eu.openaire.urls_worker.payloads.responces.WorkerReport;
|
||||||
import eu.openaire.urls_worker.plugins.PublicationsRetrieverPlugin;
|
import eu.openaire.urls_worker.plugins.PublicationsRetrieverPlugin;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -20,9 +20,9 @@ import java.time.Duration;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
public class AssignmentHandler {
|
public class AssignmentsHandler {
|
||||||
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(AssignmentHandler.class);
|
private static final Logger logger = LoggerFactory.getLogger(AssignmentsHandler.class);
|
||||||
|
|
||||||
public static boolean isAvailableForWork = true;
|
public static boolean isAvailableForWork = true;
|
||||||
public static final List<UrlReport> urlReports = new ArrayList<>(WorkerConstants.ASSIGNMENTS_LIMIT);
|
public static final List<UrlReport> urlReports = new ArrayList<>(WorkerConstants.ASSIGNMENTS_LIMIT);
|
||||||
|
@ -37,14 +37,14 @@ public class AssignmentHandler {
|
||||||
|
|
||||||
public static final RestTemplate restTemplate = new RestTemplateBuilder().setConnectTimeout(requestConnectTimeoutDuration).setReadTimeout(requestReadTimeoutDuration).build();
|
public static final RestTemplate restTemplate = new RestTemplateBuilder().setConnectTimeout(requestConnectTimeoutDuration).setReadTimeout(requestReadTimeoutDuration).build();
|
||||||
|
|
||||||
public static AssignmentRequest requestAssignments()
|
public static AssignmentsRequest requestAssignments()
|
||||||
{
|
{
|
||||||
String requestUrl = UrlsWorkerApplication.controllerBaseUrl + "urls" + (askForTest ? "/test" : "") + "?workerId=" + UrlsWorkerApplication.workerId + "&workerAssignmentsLimit=" + WorkerConstants.ASSIGNMENTS_LIMIT;
|
String requestUrl = UrlsWorkerApplication.controllerBaseUrl + "urls" + (askForTest ? "/test" : "") + "?workerId=" + UrlsWorkerApplication.workerId + "&workerAssignmentsLimit=" + WorkerConstants.ASSIGNMENTS_LIMIT;
|
||||||
logger.info("Going to request assignments from the controller-server: " + requestUrl);
|
logger.info("Going to request assignments from the controller-server: " + requestUrl);
|
||||||
|
|
||||||
AssignmentRequest assignmentRequest = null;
|
AssignmentsRequest assignmentRequest = null;
|
||||||
try { // Here, the HTTP-request is executed.
|
try { // Here, the HTTP-request is executed.
|
||||||
assignmentRequest = restTemplate.getForObject(requestUrl, AssignmentRequest.class);
|
assignmentRequest = restTemplate.getForObject(requestUrl, AssignmentsRequest.class);
|
||||||
} catch (RestClientException rce) {
|
} catch (RestClientException rce) {
|
||||||
logger.error("Could not retrieve the assignments!\n" + rce.getMessage()); // It shows the response body (after Spring v.2.5.6).
|
logger.error("Could not retrieve the assignments!\n" + rce.getMessage()); // It shows the response body (after Spring v.2.5.6).
|
||||||
return null;
|
return null;
|
||||||
|
@ -57,12 +57,12 @@ public class AssignmentHandler {
|
||||||
|
|
||||||
public static void handleAssignments()
|
public static void handleAssignments()
|
||||||
{
|
{
|
||||||
AssignmentRequest assignmentRequest = requestAssignments();
|
AssignmentsRequest assignmentsRequest = requestAssignments();
|
||||||
if ( assignmentRequest == null )
|
if ( assignmentsRequest == null )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
Long assignmentRequestCounter = assignmentRequest.getAssignmentCounter();
|
Long assignmentRequestCounter = assignmentsRequest.getAssignmentsCounter();
|
||||||
List<Assignment> assignments = assignmentRequest.getAssignments();
|
List<Assignment> assignments = assignmentsRequest.getAssignments();
|
||||||
if ( assignments == null ) {
|
if ( assignments == null ) {
|
||||||
logger.warn("The assignments were found to be null for assignmentRequestCounter = " + assignmentRequestCounter);
|
logger.warn("The assignments were found to be null for assignmentRequestCounter = " + assignmentRequestCounter);
|
||||||
return;
|
return;
|
Loading…
Reference in New Issue