- Delete any left-over full-texts after 36 hours.
- Upon shutting down, post a "shutdownReport" to the Controller.
This commit is contained in:
parent
903032f454
commit
9fdaa9503b
|
@ -123,6 +123,7 @@ public class AssignmentsHandler {
|
|||
return null;
|
||||
} catch (IllegalArgumentException iae) {
|
||||
logger.error("Could not retrieve the assignments, as the provided Controller's url was malformed!\n" + iae.getMessage());
|
||||
// We do not need to send a "ShutdownReport" to the Controller, since this error will appear upon the Worker's initialization and the Controller will not have any information about this Worker's existence.
|
||||
UrlsWorkerApplication.gentleAppShutdown();
|
||||
}
|
||||
//logger.debug(assignmentRequest.toString()); // DEBUG!
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
package eu.openaire.urls_worker.components;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.web.client.HttpServerErrorException;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
|
||||
@Component
|
||||
public class ConnWithController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(ConnWithController.class);
|
||||
|
||||
private final String controllerBaseUrl;
|
||||
|
||||
|
||||
public ConnWithController(@Value("${info.controllerBaseUrl}") String controllerBaseUrl) {
|
||||
this.controllerBaseUrl = controllerBaseUrl;
|
||||
}
|
||||
|
||||
|
||||
public boolean postShutdownReportToController(String workerId)
|
||||
{
|
||||
logger.info("Going to \"postShutdownReportToController\".");
|
||||
try {
|
||||
ResponseEntity<String> responseEntity = new RestTemplate().postForEntity(this.controllerBaseUrl + "workerShutdownReport?workerId=" + workerId, null, String.class);
|
||||
int responseCode = responseEntity.getStatusCodeValue();
|
||||
if ( responseCode != HttpStatus.OK.value() ) {
|
||||
logger.error("HTTP-Connection problem with the submission of the \"postShutdownReportToController\"! Error-code was: " + responseCode);
|
||||
return false;
|
||||
}
|
||||
} catch (HttpServerErrorException hsee) {
|
||||
logger.error("The Controller failed to handle the \"postShutdownReportToController\": " + hsee.getMessage());
|
||||
return false;
|
||||
} catch (Exception e) {
|
||||
logger.error("Error for \"postShutdownReportToController\" to the Controller.", e);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,13 +1,21 @@
|
|||
package eu.openaire.urls_worker.components;
|
||||
|
||||
import eu.openaire.urls_worker.UrlsWorkerApplication;
|
||||
import eu.openaire.urls_worker.controllers.FullTextsController;
|
||||
import eu.openaire.urls_worker.controllers.GeneralController;
|
||||
import eu.openaire.urls_worker.services.FileStorageService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Date;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
||||
@Component
|
||||
public class ScheduledTasks {
|
||||
|
@ -18,6 +26,19 @@ public class ScheduledTasks {
|
|||
@Autowired
|
||||
AssignmentsHandler assignmentsHandler;
|
||||
|
||||
@Autowired
|
||||
private FileStorageService fileStorageService;
|
||||
|
||||
@Autowired
|
||||
private ConnWithController connWithController;
|
||||
|
||||
@Value("${info.workerId}")
|
||||
private String workerId;
|
||||
|
||||
@Value("${workerReportsDirPath}")
|
||||
private String workerReportsDirPath;
|
||||
|
||||
|
||||
@Scheduled(fixedDelay = 1) // Request the next batch immediately after the last one finishes.
|
||||
public void handleNewAssignments() {
|
||||
if ( AssignmentsHandler.shouldNotRequestMore ) {
|
||||
|
@ -43,4 +64,123 @@ public class ScheduledTasks {
|
|||
assignmentsHandler.handleAssignments();
|
||||
}
|
||||
|
||||
|
||||
@Scheduled(initialDelay = 120_000, fixedDelay = 120_000) // TODO - MAKE THE NUMBER TO BE EQUAL TO 30 mins (in millis)
|
||||
public void checkIfShouldShutdown()
|
||||
{
|
||||
if ( !GeneralController.shouldShutdownWorker && !AssignmentsHandler.shouldNotRequestMore )
|
||||
return;
|
||||
|
||||
// Check if the full-texts have been delivered to the Controller.
|
||||
// In case some files have been left behind due to an error. DO not shutdown, but wait for the other scheduled task to clean the in the right time and then shutdown.
|
||||
|
||||
File fullTextsBaseDir = new File(fileStorageService.assignmentsBaseLocation);
|
||||
if ( fullTextsBaseDir.isDirectory() ) {
|
||||
File[] fulltextSubDirs = fullTextsBaseDir.listFiles(File::isDirectory);
|
||||
if ( fulltextSubDirs == null ) {
|
||||
logger.error("There was an error when getting the subDirs of \"fullTextsBaseDir\": " + fullTextsBaseDir);
|
||||
return;
|
||||
}
|
||||
if ( fulltextSubDirs.length > 0 ) {
|
||||
logger.warn("The base full-texts directory still has sub-directories with full-texts, wait for the Controller to take all the files or some time to past before they are deleted. Then the Worker will shut down.");
|
||||
return;
|
||||
} else
|
||||
logger.debug("The \"fullTextsBaseDir\" is empty. Shutting down..");
|
||||
} else
|
||||
logger.warn("The base full-texts directory was not found Shutting down..");
|
||||
|
||||
connWithController.postShutdownReportToController(workerId);
|
||||
UrlsWorkerApplication.gentleAppShutdown();
|
||||
}
|
||||
|
||||
|
||||
private static final Pattern ASSIGNMENTS_COUNTER = Pattern.compile(".*assignments_([\\d]+).*");
|
||||
|
||||
|
||||
private static final int hoursToWaitBeforeDeletion = 36;
|
||||
|
||||
@Scheduled(initialDelay = 60_000, fixedDelay = 60_000) // TODO - MAKE THE NUMBER TO BE EQUAL TO 6-HOURS (in millis)
|
||||
public void checkAndDeleteOldFiles() {
|
||||
// For any reason the Worker-report connection with the Controller may fail, but the Controller will continue requesting the full-text batches.
|
||||
|
||||
// Every X hours, check the last modification data of each "assignments_X_fulltexts" sub-directory.
|
||||
|
||||
// All sub-directories will have some files inside, as the duplicate files will not have been requested by the Controller, thus not been deleted after a batch.
|
||||
// Also, the last .zstd file will be inside.
|
||||
|
||||
// The way to know for which directory, we have a problem, is either by the amount of files or by the WorkerReport (in a separate directory).
|
||||
|
||||
// Even though we delete the full-texts batch-by-batch, some files may not have been previously deleted, since they may be duplicates of others found by previous assignments-batches
|
||||
// and thus, they may have not been requested by the Controller (and thus not deleted after transferring the batches).
|
||||
// Also, the ".tar.zstd" file of last batch will be deleted here, as well as the whole directory itself.
|
||||
|
||||
logger.debug("Going to check if any leftover full-texts exist and delete them.");
|
||||
|
||||
int usableDirsNum = 0;
|
||||
try {
|
||||
File fullTextsBaseDir = new File(fileStorageService.assignmentsBaseLocation);
|
||||
if ( fullTextsBaseDir.isDirectory() ) {
|
||||
logger.warn("The full-texts directory still exists, wait for the Controller to take all the files or some time to past before they are deleted. Then the Worker will shut down.");
|
||||
return;
|
||||
}
|
||||
|
||||
File[] fulltextSubDirs = fullTextsBaseDir.listFiles(File::isDirectory);
|
||||
if ( fulltextSubDirs == null ) {
|
||||
logger.error("There was an error when getting the subDirs of \"fullTextsBaseDir\": " + fullTextsBaseDir);
|
||||
return;
|
||||
}
|
||||
|
||||
usableDirsNum = fulltextSubDirs.length;
|
||||
if ( usableDirsNum == 0 ) {
|
||||
logger.debug("The \"fullTextsBaseDir\" is empty, so there is nothing to delete.");
|
||||
return;
|
||||
}
|
||||
|
||||
long currentTime = System.currentTimeMillis();
|
||||
|
||||
// Loop through the array and print only the directories
|
||||
for ( File subDir : fulltextSubDirs ) {
|
||||
long lastModified = subDir.lastModified();
|
||||
|
||||
if ( logger.isTraceEnabled() )
|
||||
logger.trace("The subDir \"" + subDir.getName() + "\" was last accessed in: " + new Date(lastModified));
|
||||
|
||||
// Get the difference in hours. /1000 to get seconds, /60 to get minutes and /60 to get hours.
|
||||
long elapsedHours = (currentTime - lastModified) / (1000 * 60 * 60);
|
||||
if ( elapsedHours > hoursToWaitBeforeDeletion ) {
|
||||
// Enough time has passed, the directory should be deleted immediately.
|
||||
String subDirName = subDir.getName();
|
||||
logger.warn("The subDir \"" + subDirName + "\" was accessed " + elapsedHours + " hours ago (passed the " + hoursToWaitBeforeDeletion + " hours limit) and will be deleted, along with the related WorkerReport.");
|
||||
FullTextsController.deleteDirectory(subDir);
|
||||
|
||||
// Extract the "assignmentsCounter" from subDir's name, in order to delete the right report file.
|
||||
Matcher matcher = ASSIGNMENTS_COUNTER.matcher(subDirName);
|
||||
if ( matcher.matches() ) {
|
||||
String assingmentsCounterString = matcher.group(1);
|
||||
if ( (assingmentsCounterString != null) && !assingmentsCounterString.isEmpty()) {
|
||||
if ( FullTextsController.deleteFile(this.workerReportsDirPath + this.workerId + "_assignments_" + assingmentsCounterString + "_report.json") )
|
||||
logger.warn("The subDir \"" + subDirName + "\" probably contains some failed file, since the workerReport for assignments_" + assingmentsCounterString + " was deleted only now, which means the Controller failed to successfully process the results of those assignments.");
|
||||
}
|
||||
else
|
||||
logger.error("The subDir \"" + subDirName + "\" has an invalid name! It does not contains the assignmentsCounter!");
|
||||
} else
|
||||
logger.error("The subDir \"" + subDirName + "\" has an invalid name! It could not be matched with regex: " + ASSIGNMENTS_COUNTER);
|
||||
usableDirsNum --; // Reduce the usableDirsNum even if some directories failed to be deleted, since the failed-dirs are not usable anyway.
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("", e);
|
||||
return;
|
||||
}
|
||||
|
||||
// After the cleanup of the remaining files, make sure we shutdown the Worker if it is desired.
|
||||
// Do this here, instead of waiting further, for the "checkIfShouldShutdown()" method to be called and shut it down.
|
||||
|
||||
if ( (GeneralController.shouldShutdownWorker || AssignmentsHandler.shouldNotRequestMore)
|
||||
&& (usableDirsNum == 0) ) { // Shutdown only if there are no "usable" directories left.
|
||||
connWithController.postShutdownReportToController(workerId);
|
||||
UrlsWorkerApplication.gentleAppShutdown();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@ import eu.openaire.publications_retriever.util.url.LoaderAndChecker;
|
|||
import eu.openaire.publications_retriever.util.url.UrlUtils;
|
||||
import eu.openaire.urls_worker.UrlsWorkerApplication;
|
||||
import eu.openaire.urls_worker.components.AssignmentsHandler;
|
||||
import eu.openaire.urls_worker.components.ConnWithController;
|
||||
import eu.openaire.urls_worker.models.Assignment;
|
||||
import eu.openaire.urls_worker.models.Error;
|
||||
import eu.openaire.urls_worker.models.Payload;
|
||||
|
@ -17,6 +18,7 @@ import eu.openaire.urls_worker.models.UrlReport;
|
|||
import eu.openaire.urls_worker.services.FileStorageService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
|
@ -37,6 +39,10 @@ public class PublicationsRetrieverPlugin {
|
|||
|
||||
private static final Logger logger = LoggerFactory.getLogger(PublicationsRetrieverPlugin.class);
|
||||
|
||||
@Autowired
|
||||
private ConnWithController connWithController;
|
||||
|
||||
|
||||
public static String assignmentsBasePath;
|
||||
|
||||
private static String workerId;
|
||||
|
@ -149,7 +155,8 @@ public class PublicationsRetrieverPlugin {
|
|||
|
||||
int numFailedTasks = LoaderAndChecker.invokeAllTasksAndWait(callableTasks);
|
||||
if ( numFailedTasks == -1 ) { // The unknown exception is logged inside the above method.
|
||||
System.err.println("Invoking and/or executing the callableTasks failed with the exception written in the log files!");
|
||||
System.err.println("Invoking and/or executing the callableTasks failed with the exception (which is written in the log files)!");
|
||||
connWithController.postShutdownReportToController(workerId);
|
||||
UrlsWorkerApplication.gentleAppShutdown();
|
||||
return; // Not relly needed, but have it for code-readability.
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue