diff --git a/README.md b/README.md
index ca06072..7fcc352 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
The Worker's Application, requests assignments from the [Controller](https://code-repo.d4science.org/lsmyrnaios/UrlsController) and processes them, downloading the available full-texts.
Then, it posts the results to the Controller, which in turn, requests from the Worker, the full-texts which are not already found by other workers, in batches.
-The Worker responds by compressing and sending the requested files in each batch.
+The Worker responds by compressing and sending the requested files, in each batch.
To install and run the application:
- Run ```git clone``` and then ```cd UrlsWorker```.
diff --git a/build.gradle b/build.gradle
index b3dd6ec..c5316e3 100644
--- a/build.gradle
+++ b/build.gradle
@@ -27,6 +27,10 @@ dependencies {
implementation 'org.projectlombok:lombok:1.18.24'
+ // https://mvnrepository.com/artifact/commons-io/commons-io
+ implementation 'commons-io:commons-io:2.11.0'
+
+
//implementation group: 'io.jsonwebtoken', name: 'jjwt-api', version: '0.11.5' // Use this in case we use auth-tokens later on.
// Enable the validation annotations.
diff --git a/src/main/java/eu/openaire/urls_worker/UrlsWorkerApplication.java b/src/main/java/eu/openaire/urls_worker/UrlsWorkerApplication.java
index 89d8226..a639ed2 100644
--- a/src/main/java/eu/openaire/urls_worker/UrlsWorkerApplication.java
+++ b/src/main/java/eu/openaire/urls_worker/UrlsWorkerApplication.java
@@ -94,6 +94,7 @@ public class UrlsWorkerApplication {
}
}
+ ScheduledTasks.isLastTime = true;
ScheduledTasks.deleteHandledAssignmentsFullTexts();
}
diff --git a/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java b/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java
index 5c1d8ec..1d37d11 100644
--- a/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java
+++ b/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java
@@ -13,8 +13,12 @@ import org.springframework.stereotype.Component;
import java.io.File;
import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.Map;
import java.util.Set;
+import java.util.stream.Stream;
@Component
@@ -41,10 +45,13 @@ public class ScheduledTasks {
}
+ public static boolean isLastTime = false;
+
+
@Scheduled(fixedDelay = 43_200_000, initialDelay = 43_200_000) // Every 12 hours, after 12 hours from the start of this app.
public static void deleteHandledAssignmentsFullTexts()
{
- Set> entrySet = FullTextsController.assignmentsNumsHandledAndLocallyDeleted.entrySet();
+ Set> entrySet = FullTextsController.assignmentsNumsHandledAndApprovedToBeDeleted.entrySet();
if ( entrySet.isEmpty() )
return;
@@ -67,11 +74,36 @@ public class ScheduledTasks {
try {
FileUtils.deleteDirectory(curDir);
- FullTextsController.assignmentsNumsHandledAndLocallyDeleted.put(curAssignments, true); // Set the is-handled to true.
+ FullTextsController.assignmentsNumsHandledAndApprovedToBeDeleted.put(curAssignments, true); // Set the is-handled to true.
} catch (IOException e) {
logger.error("The following directory could not be deleted: " + currentAssignmentsBasePath, e);
}
}
+
+ if ( isLastTime ) { // Delete the parent "assignments" directory if not files are left behind.
+ // In case something went wrong in the full-texts delivering to the controller, then the non-transferred files will remain in the Worker's local storage, for future fix.
+ // So, delete the parent directory, only if it's empty!
+
+ logger.info("Going to delete the parent \"" + PublicationsRetrieverPlugin.assignmentsBasePath + "\" directory.");
+
+ boolean isAnEmptyDir = false;
+ try ( Stream stream = Files.list(Paths.get(PublicationsRetrieverPlugin.assignmentsBasePath)) ) {
+ isAnEmptyDir = ! stream.findAny().isPresent();
+ } catch (IOException e) {
+ logger.error("Could not list the contents of the parent directory: " + PublicationsRetrieverPlugin.assignmentsBasePath, e);
+ return;
+ }
+
+ if ( isAnEmptyDir ) {
+ try {
+ FileUtils.deleteDirectory(new File(PublicationsRetrieverPlugin.assignmentsBasePath));
+ } catch (IOException e) {
+ logger.error("The following directory could not be deleted: " + PublicationsRetrieverPlugin.assignmentsBasePath, e);
+ }
+ } else
+ logger.warn("The parent directory \"" + PublicationsRetrieverPlugin.assignmentsBasePath + "\" was not empty! Which means there were some unhandled full-text batches!");
+ }
+
}
}
diff --git a/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java b/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java
index 3ce1bbb..cd89c28 100644
--- a/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java
+++ b/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java
@@ -25,7 +25,7 @@ public class FullTextsController {
private static final Logger logger = LoggerFactory.getLogger(GeneralController.class);
- public static HashMap assignmentsNumsHandledAndLocallyDeleted = new HashMap<>();
+ public static HashMap assignmentsNumsHandledAndApprovedToBeDeleted = new HashMap<>();
public static String assignmentsBaseDir = null;
@@ -76,7 +76,7 @@ public class FullTextsController {
// If this is the last batch for this assignments-count, then make sure it is deleted in the next scheduled delete-operation.
if ( zipBatchCounter == totalZipBatches ) {
- assignmentsNumsHandledAndLocallyDeleted.put(assignmentsCounter, false);
+ assignmentsNumsHandledAndApprovedToBeDeleted.put(assignmentsCounter, false);
logger.debug("Will return the " + ((totalZipBatches > 1) ? "last" : "only") + " batch (" + zipBatchCounter + ") of Assignments_" + assignmentsCounter + " to the Controller and these assignments will be deleted later.");
}
diff --git a/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java b/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java
index e962a95..bf7d4f5 100644
--- a/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java
+++ b/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java
@@ -69,8 +69,8 @@ public class GeneralController {
@GetMapping("getHandledAssignmentsCounts")
public ResponseEntity> getHandledAssignmentsCounts()
{
- List handledAssignmentsCounts = new ArrayList<>(FullTextsController.assignmentsNumsHandledAndLocallyDeleted.size()/2);
- for ( Map.Entry entry : FullTextsController.assignmentsNumsHandledAndLocallyDeleted.entrySet() )
+ List handledAssignmentsCounts = new ArrayList<>(FullTextsController.assignmentsNumsHandledAndApprovedToBeDeleted.size()/2);
+ for ( Map.Entry entry : FullTextsController.assignmentsNumsHandledAndApprovedToBeDeleted.entrySet() )
{
if ( entry.getValue().equals(true) )
handledAssignmentsCounts.add(entry.getKey());
diff --git a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java
index 8bdfc4e..de3704b 100644
--- a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java
+++ b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java
@@ -100,7 +100,7 @@ public class PublicationsRetrieverPlugin {
if ( (id == null) || id.isEmpty() || (url == null) || url.isEmpty() ) {
String errorMsg = "Got null or empty pair! ID=" + id + " , url=" + url;
logger.warn(errorMsg);
- UrlUtils.logOutputData(id, url, null, "unreachable", "Discarded at loading time, due to input problems. " + errorMsg, null, true, "true", "false", "false", "false", "false", null, null);
+ UrlUtils.logOutputData(id, url, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to input problems. " + errorMsg, null, true, "true", "false", "false", "false", "false", null, null);
return false;
}
@@ -112,7 +112,7 @@ public class PublicationsRetrieverPlugin {
String sourceUrl = urlToCheck; // Hold it here for the logging-messages.
if ( !sourceUrl.contains("#/") && (urlToCheck = URLCanonicalizer.getCanonicalURL(sourceUrl, null, StandardCharsets.UTF_8)) == null ) {
logger.warn("Could not canonicalize url: " + sourceUrl);
- UrlUtils.logOutputData(id, sourceUrl, null, "unreachable", "Discarded at loading time, due to canonicalization's problems.", null, true, "true", "false", "false", "false", "false", null, null);
+ UrlUtils.logOutputData(id, sourceUrl, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to canonicalization's problems.", null, true, "true", "false", "false", "false", "false", null, null);
LoaderAndChecker.connProblematicUrls.incrementAndGet();
return false;
}
@@ -137,7 +137,7 @@ public class PublicationsRetrieverPlugin {
List list = LoaderAndChecker.getWasValidAndCouldRetry(e);
String wasUrlValid = list.get(0);
String couldRetry = list.get(1);
- UrlUtils.logOutputData(id, urlToCheck, null, "unreachable", "Discarded at loading time, due to connectivity problems.", null, true, "true", wasUrlValid, "false", "false", couldRetry, null, null);
+ UrlUtils.logOutputData(id, urlToCheck, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to connectivity problems.", null, true, "true", wasUrlValid, "false", "false", couldRetry, null, null);
return false;
}
return true;
@@ -272,7 +272,7 @@ public class PublicationsRetrieverPlugin {
List list = LoaderAndChecker.getWasValidAndCouldRetry(e);
String wasUrlValid = list.get(0);
String couldRetry = list.get(1);
- UrlUtils.logOutputData(testID, urlToCheck, null, "unreachable", "Discarded at loading time, due to connectivity problems.", null, true, "true", wasUrlValid, "false", "false", couldRetry, null, null);
+ UrlUtils.logOutputData(testID, urlToCheck, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to connectivity problems.", null, true, "true", wasUrlValid, "false", "false", couldRetry, null, null);
return false;
}
}
diff --git a/src/main/java/eu/openaire/urls_worker/util/FilesZipper.java b/src/main/java/eu/openaire/urls_worker/util/FilesZipper.java
index b824d1b..2d24e46 100644
--- a/src/main/java/eu/openaire/urls_worker/util/FilesZipper.java
+++ b/src/main/java/eu/openaire/urls_worker/util/FilesZipper.java
@@ -21,11 +21,11 @@ public class FilesZipper
public static File zipMultipleFilesAndGetZip(long assignmentsCounter, int zipBatchCounter, List filesToZip, String baseDirectory)
{
- String zipFilename = baseDirectory + "assignments_" + assignmentsCounter + "_full-texts_" + zipBatchCounter + ".zip";
- // For example: assignments_2_full-texts_4.zip | where < 4 > is referred to the 4th batch of files requested by the controller.
+ String zipFileFullPath = baseDirectory + "assignments_" + assignmentsCounter + "_full-texts_" + zipBatchCounter + ".zip";
+ // For example: assignments_2_full-texts_4.zip | where < 4 > is referred to the 4th batch of files requested by the Controller.
int numZippedFiles = 0;
- File zipFile = new File(zipFilename);
+ File zipFile = new File(zipFileFullPath);
try ( ZipOutputStream zos = new ZipOutputStream(Files.newOutputStream(zipFile.toPath()), StandardCharsets.UTF_8) )
{
for ( String file : filesToZip ) {
@@ -33,10 +33,10 @@ public class FilesZipper
numZippedFiles ++;
}
} catch (Exception e) {
- logger.error("Exception when creating the zip-file: " + zipFilename, e);
+ logger.error("Exception when creating the zip-file: " + zipFileFullPath, e);
return null;
}
- logger.debug("Zipped " + numZippedFiles + " files for assignments_" + assignmentsCounter + ", batch_" + zipBatchCounter);
+ logger.debug("Zipped " + numZippedFiles + " (out of " + filesToZip.size() + ") files for assignments_" + assignmentsCounter + ", batch_" + zipBatchCounter);
return zipFile;
}
@@ -62,7 +62,7 @@ public class FilesZipper
} finally {
if ( shouldCloseEntry ) {
try {
- zos.closeEntry(); // close the entry here (not the ZipOutputStream)
+ zos.closeEntry(); // close just the ZipEntry here (not the ZipOutputStream)
} catch (IOException e) {
logger.error("", e);
}
diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties
index edb61fd..9ddd43b 100644
--- a/src/main/resources/application.properties
+++ b/src/main/resources/application.properties
@@ -11,6 +11,8 @@
# HTTP CONFIGURATION
server.port = 1881
+# Set the above value to < 0 >, in order to choose a random port (it will automatically choose a new random port, if the previously chosen is already in use)..
+
# Server api path
server.servlet.context-path=/api