- When the Worker is about to shut-down, after deleting all the handled assignments' files, check for remaining full-texts in the local storage and warn the user. If no remaining files were found, then delete the parent fulltexts' directory.
- Polish the code.
This commit is contained in:
parent
6450a4b8ac
commit
90a69686cf
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
The Worker's Application, requests assignments from the [Controller](https://code-repo.d4science.org/lsmyrnaios/UrlsController) and processes them, downloading the available full-texts.<br>
|
The Worker's Application, requests assignments from the [Controller](https://code-repo.d4science.org/lsmyrnaios/UrlsController) and processes them, downloading the available full-texts.<br>
|
||||||
Then, it posts the results to the Controller, which in turn, requests from the Worker, the full-texts which are not already found by other workers, in batches.<br>
|
Then, it posts the results to the Controller, which in turn, requests from the Worker, the full-texts which are not already found by other workers, in batches.<br>
|
||||||
The Worker responds by compressing and sending the requested files in each batch.<br>
|
The Worker responds by compressing and sending the requested files, in each batch.<br>
|
||||||
<br>
|
<br>
|
||||||
To install and run the application:
|
To install and run the application:
|
||||||
- Run ```git clone``` and then ```cd UrlsWorker```.
|
- Run ```git clone``` and then ```cd UrlsWorker```.
|
||||||
|
|
|
@ -27,6 +27,10 @@ dependencies {
|
||||||
|
|
||||||
implementation 'org.projectlombok:lombok:1.18.24'
|
implementation 'org.projectlombok:lombok:1.18.24'
|
||||||
|
|
||||||
|
// https://mvnrepository.com/artifact/commons-io/commons-io
|
||||||
|
implementation 'commons-io:commons-io:2.11.0'
|
||||||
|
|
||||||
|
|
||||||
//implementation group: 'io.jsonwebtoken', name: 'jjwt-api', version: '0.11.5' // Use this in case we use auth-tokens later on.
|
//implementation group: 'io.jsonwebtoken', name: 'jjwt-api', version: '0.11.5' // Use this in case we use auth-tokens later on.
|
||||||
|
|
||||||
// Enable the validation annotations.
|
// Enable the validation annotations.
|
||||||
|
|
|
@ -94,6 +94,7 @@ public class UrlsWorkerApplication {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ScheduledTasks.isLastTime = true;
|
||||||
ScheduledTasks.deleteHandledAssignmentsFullTexts();
|
ScheduledTasks.deleteHandledAssignmentsFullTexts();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,8 +13,12 @@ import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
|
||||||
@Component
|
@Component
|
||||||
|
@ -41,10 +45,13 @@ public class ScheduledTasks {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static boolean isLastTime = false;
|
||||||
|
|
||||||
|
|
||||||
@Scheduled(fixedDelay = 43_200_000, initialDelay = 43_200_000) // Every 12 hours, after 12 hours from the start of this app.
|
@Scheduled(fixedDelay = 43_200_000, initialDelay = 43_200_000) // Every 12 hours, after 12 hours from the start of this app.
|
||||||
public static void deleteHandledAssignmentsFullTexts()
|
public static void deleteHandledAssignmentsFullTexts()
|
||||||
{
|
{
|
||||||
Set<Map.Entry<Long, Boolean>> entrySet = FullTextsController.assignmentsNumsHandledAndLocallyDeleted.entrySet();
|
Set<Map.Entry<Long, Boolean>> entrySet = FullTextsController.assignmentsNumsHandledAndApprovedToBeDeleted.entrySet();
|
||||||
if ( entrySet.isEmpty() )
|
if ( entrySet.isEmpty() )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -67,11 +74,36 @@ public class ScheduledTasks {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
FileUtils.deleteDirectory(curDir);
|
FileUtils.deleteDirectory(curDir);
|
||||||
FullTextsController.assignmentsNumsHandledAndLocallyDeleted.put(curAssignments, true); // Set the is-handled to true.
|
FullTextsController.assignmentsNumsHandledAndApprovedToBeDeleted.put(curAssignments, true); // Set the is-handled to true.
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("The following directory could not be deleted: " + currentAssignmentsBasePath, e);
|
logger.error("The following directory could not be deleted: " + currentAssignmentsBasePath, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( isLastTime ) { // Delete the parent "assignments" directory if not files are left behind.
|
||||||
|
// In case something went wrong in the full-texts delivering to the controller, then the non-transferred files will remain in the Worker's local storage, for future fix.
|
||||||
|
// So, delete the parent directory, only if it's empty!
|
||||||
|
|
||||||
|
logger.info("Going to delete the parent \"" + PublicationsRetrieverPlugin.assignmentsBasePath + "\" directory.");
|
||||||
|
|
||||||
|
boolean isAnEmptyDir = false;
|
||||||
|
try ( Stream<Path> stream = Files.list(Paths.get(PublicationsRetrieverPlugin.assignmentsBasePath)) ) {
|
||||||
|
isAnEmptyDir = ! stream.findAny().isPresent();
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.error("Could not list the contents of the parent directory: " + PublicationsRetrieverPlugin.assignmentsBasePath, e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( isAnEmptyDir ) {
|
||||||
|
try {
|
||||||
|
FileUtils.deleteDirectory(new File(PublicationsRetrieverPlugin.assignmentsBasePath));
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.error("The following directory could not be deleted: " + PublicationsRetrieverPlugin.assignmentsBasePath, e);
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
logger.warn("The parent directory \"" + PublicationsRetrieverPlugin.assignmentsBasePath + "\" was not empty! Which means there were some unhandled full-text batches!");
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,7 @@ public class FullTextsController {
|
||||||
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(GeneralController.class);
|
private static final Logger logger = LoggerFactory.getLogger(GeneralController.class);
|
||||||
|
|
||||||
public static HashMap<Long, Boolean> assignmentsNumsHandledAndLocallyDeleted = new HashMap<>();
|
public static HashMap<Long, Boolean> assignmentsNumsHandledAndApprovedToBeDeleted = new HashMap<>();
|
||||||
|
|
||||||
public static String assignmentsBaseDir = null;
|
public static String assignmentsBaseDir = null;
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ public class FullTextsController {
|
||||||
|
|
||||||
// If this is the last batch for this assignments-count, then make sure it is deleted in the next scheduled delete-operation.
|
// If this is the last batch for this assignments-count, then make sure it is deleted in the next scheduled delete-operation.
|
||||||
if ( zipBatchCounter == totalZipBatches ) {
|
if ( zipBatchCounter == totalZipBatches ) {
|
||||||
assignmentsNumsHandledAndLocallyDeleted.put(assignmentsCounter, false);
|
assignmentsNumsHandledAndApprovedToBeDeleted.put(assignmentsCounter, false);
|
||||||
logger.debug("Will return the " + ((totalZipBatches > 1) ? "last" : "only") + " batch (" + zipBatchCounter + ") of Assignments_" + assignmentsCounter + " to the Controller and these assignments will be deleted later.");
|
logger.debug("Will return the " + ((totalZipBatches > 1) ? "last" : "only") + " batch (" + zipBatchCounter + ") of Assignments_" + assignmentsCounter + " to the Controller and these assignments will be deleted later.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -69,8 +69,8 @@ public class GeneralController {
|
||||||
@GetMapping("getHandledAssignmentsCounts")
|
@GetMapping("getHandledAssignmentsCounts")
|
||||||
public ResponseEntity<?> getHandledAssignmentsCounts()
|
public ResponseEntity<?> getHandledAssignmentsCounts()
|
||||||
{
|
{
|
||||||
List<Long> handledAssignmentsCounts = new ArrayList<>(FullTextsController.assignmentsNumsHandledAndLocallyDeleted.size()/2);
|
List<Long> handledAssignmentsCounts = new ArrayList<>(FullTextsController.assignmentsNumsHandledAndApprovedToBeDeleted.size()/2);
|
||||||
for ( Map.Entry<Long,Boolean> entry : FullTextsController.assignmentsNumsHandledAndLocallyDeleted.entrySet() )
|
for ( Map.Entry<Long,Boolean> entry : FullTextsController.assignmentsNumsHandledAndApprovedToBeDeleted.entrySet() )
|
||||||
{
|
{
|
||||||
if ( entry.getValue().equals(true) )
|
if ( entry.getValue().equals(true) )
|
||||||
handledAssignmentsCounts.add(entry.getKey());
|
handledAssignmentsCounts.add(entry.getKey());
|
||||||
|
|
|
@ -100,7 +100,7 @@ public class PublicationsRetrieverPlugin {
|
||||||
if ( (id == null) || id.isEmpty() || (url == null) || url.isEmpty() ) {
|
if ( (id == null) || id.isEmpty() || (url == null) || url.isEmpty() ) {
|
||||||
String errorMsg = "Got null or empty pair! ID=" + id + " , url=" + url;
|
String errorMsg = "Got null or empty pair! ID=" + id + " , url=" + url;
|
||||||
logger.warn(errorMsg);
|
logger.warn(errorMsg);
|
||||||
UrlUtils.logOutputData(id, url, null, "unreachable", "Discarded at loading time, due to input problems. " + errorMsg, null, true, "true", "false", "false", "false", "false", null, null);
|
UrlUtils.logOutputData(id, url, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to input problems. " + errorMsg, null, true, "true", "false", "false", "false", "false", null, null);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,7 +112,7 @@ public class PublicationsRetrieverPlugin {
|
||||||
String sourceUrl = urlToCheck; // Hold it here for the logging-messages.
|
String sourceUrl = urlToCheck; // Hold it here for the logging-messages.
|
||||||
if ( !sourceUrl.contains("#/") && (urlToCheck = URLCanonicalizer.getCanonicalURL(sourceUrl, null, StandardCharsets.UTF_8)) == null ) {
|
if ( !sourceUrl.contains("#/") && (urlToCheck = URLCanonicalizer.getCanonicalURL(sourceUrl, null, StandardCharsets.UTF_8)) == null ) {
|
||||||
logger.warn("Could not canonicalize url: " + sourceUrl);
|
logger.warn("Could not canonicalize url: " + sourceUrl);
|
||||||
UrlUtils.logOutputData(id, sourceUrl, null, "unreachable", "Discarded at loading time, due to canonicalization's problems.", null, true, "true", "false", "false", "false", "false", null, null);
|
UrlUtils.logOutputData(id, sourceUrl, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to canonicalization's problems.", null, true, "true", "false", "false", "false", "false", null, null);
|
||||||
LoaderAndChecker.connProblematicUrls.incrementAndGet();
|
LoaderAndChecker.connProblematicUrls.incrementAndGet();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -137,7 +137,7 @@ public class PublicationsRetrieverPlugin {
|
||||||
List<String> list = LoaderAndChecker.getWasValidAndCouldRetry(e);
|
List<String> list = LoaderAndChecker.getWasValidAndCouldRetry(e);
|
||||||
String wasUrlValid = list.get(0);
|
String wasUrlValid = list.get(0);
|
||||||
String couldRetry = list.get(1);
|
String couldRetry = list.get(1);
|
||||||
UrlUtils.logOutputData(id, urlToCheck, null, "unreachable", "Discarded at loading time, due to connectivity problems.", null, true, "true", wasUrlValid, "false", "false", couldRetry, null, null);
|
UrlUtils.logOutputData(id, urlToCheck, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to connectivity problems.", null, true, "true", wasUrlValid, "false", "false", couldRetry, null, null);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -272,7 +272,7 @@ public class PublicationsRetrieverPlugin {
|
||||||
List<String> list = LoaderAndChecker.getWasValidAndCouldRetry(e);
|
List<String> list = LoaderAndChecker.getWasValidAndCouldRetry(e);
|
||||||
String wasUrlValid = list.get(0);
|
String wasUrlValid = list.get(0);
|
||||||
String couldRetry = list.get(1);
|
String couldRetry = list.get(1);
|
||||||
UrlUtils.logOutputData(testID, urlToCheck, null, "unreachable", "Discarded at loading time, due to connectivity problems.", null, true, "true", wasUrlValid, "false", "false", couldRetry, null, null);
|
UrlUtils.logOutputData(testID, urlToCheck, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to connectivity problems.", null, true, "true", wasUrlValid, "false", "false", couldRetry, null, null);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,11 +21,11 @@ public class FilesZipper
|
||||||
|
|
||||||
public static File zipMultipleFilesAndGetZip(long assignmentsCounter, int zipBatchCounter, List<String> filesToZip, String baseDirectory)
|
public static File zipMultipleFilesAndGetZip(long assignmentsCounter, int zipBatchCounter, List<String> filesToZip, String baseDirectory)
|
||||||
{
|
{
|
||||||
String zipFilename = baseDirectory + "assignments_" + assignmentsCounter + "_full-texts_" + zipBatchCounter + ".zip";
|
String zipFileFullPath = baseDirectory + "assignments_" + assignmentsCounter + "_full-texts_" + zipBatchCounter + ".zip";
|
||||||
// For example: assignments_2_full-texts_4.zip | where < 4 > is referred to the 4th batch of files requested by the controller.
|
// For example: assignments_2_full-texts_4.zip | where < 4 > is referred to the 4th batch of files requested by the Controller.
|
||||||
|
|
||||||
int numZippedFiles = 0;
|
int numZippedFiles = 0;
|
||||||
File zipFile = new File(zipFilename);
|
File zipFile = new File(zipFileFullPath);
|
||||||
try ( ZipOutputStream zos = new ZipOutputStream(Files.newOutputStream(zipFile.toPath()), StandardCharsets.UTF_8) )
|
try ( ZipOutputStream zos = new ZipOutputStream(Files.newOutputStream(zipFile.toPath()), StandardCharsets.UTF_8) )
|
||||||
{
|
{
|
||||||
for ( String file : filesToZip ) {
|
for ( String file : filesToZip ) {
|
||||||
|
@ -33,10 +33,10 @@ public class FilesZipper
|
||||||
numZippedFiles ++;
|
numZippedFiles ++;
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Exception when creating the zip-file: " + zipFilename, e);
|
logger.error("Exception when creating the zip-file: " + zipFileFullPath, e);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
logger.debug("Zipped " + numZippedFiles + " files for assignments_" + assignmentsCounter + ", batch_" + zipBatchCounter);
|
logger.debug("Zipped " + numZippedFiles + " (out of " + filesToZip.size() + ") files for assignments_" + assignmentsCounter + ", batch_" + zipBatchCounter);
|
||||||
return zipFile;
|
return zipFile;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,7 +62,7 @@ public class FilesZipper
|
||||||
} finally {
|
} finally {
|
||||||
if ( shouldCloseEntry ) {
|
if ( shouldCloseEntry ) {
|
||||||
try {
|
try {
|
||||||
zos.closeEntry(); // close the entry here (not the ZipOutputStream)
|
zos.closeEntry(); // close just the ZipEntry here (not the ZipOutputStream)
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("", e);
|
logger.error("", e);
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,6 +11,8 @@
|
||||||
|
|
||||||
# HTTP CONFIGURATION
|
# HTTP CONFIGURATION
|
||||||
server.port = 1881
|
server.port = 1881
|
||||||
|
# Set the above value to < 0 >, in order to choose a random port (it will automatically choose a new random port, if the previously chosen is already in use)..
|
||||||
|
|
||||||
|
|
||||||
# Server api path
|
# Server api path
|
||||||
server.servlet.context-path=/api
|
server.servlet.context-path=/api
|
||||||
|
|
Loading…
Reference in New Issue