diff --git a/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java b/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java index bcf5de5..917dc9e 100644 --- a/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java +++ b/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java @@ -197,7 +197,7 @@ public class ScheduledTasks { } } } catch (Exception e) { - logger.error("", e); + logger.error("Failed to check and delete leftover fulltext files!", e); return; } diff --git a/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java b/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java index d9270d0..7d9e632 100644 --- a/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java +++ b/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java @@ -28,7 +28,7 @@ import java.util.List; @RequestMapping("full-texts/") public class FullTextsController { - private static final Logger logger = LoggerFactory.getLogger(GeneralController.class); + private static final Logger logger = LoggerFactory.getLogger(FullTextsController.class); @Autowired private FileStorageService fileStorageService; @@ -68,8 +68,9 @@ public class FullTextsController { File zstdFile = FilesCompressor.compressMultipleFilesIntoOne(assignmentsCounter, batchCounter, fileNamesWithExtensions, currentAssignmentsBaseFullTextsPath); if ( zstdFile == null ) { - // The failed files (including the ".tar"), have already been deleted. - String errorMsg = "Failed to create the zstd file for \"batchCounter\"-" + batchCounter; + // The failed files (including the ".tar" and ".zstd"), have already been deleted. + deleteDirectory(new File(currentAssignmentsBaseFullTextsPath)); // Delete this assignments' directory. + String errorMsg = "Failed to compress the full-text files for batch_" + batchCounter + ", assignments_" + assignmentsCounter; logger.error(errorMsg); return ResponseEntity.internalServerError().body(errorMsg); } diff --git a/src/main/java/eu/openaire/urls_worker/models/TarFileResult.java b/src/main/java/eu/openaire/urls_worker/models/TarFileResult.java new file mode 100644 index 0000000..72d39c8 --- /dev/null +++ b/src/main/java/eu/openaire/urls_worker/models/TarFileResult.java @@ -0,0 +1,22 @@ +package eu.openaire.urls_worker.models; + +import java.io.File; + +public class TarFileResult { + + private File tarFile; + private int numTarredFiles; + + public TarFileResult(File tarFile, int numTarredFiles) { + this.tarFile = tarFile; + this.numTarredFiles = numTarredFiles; + } + + public File getTarFile() { + return tarFile; + } + + public int getNumTarredFiles() { + return numTarredFiles; + } +} diff --git a/src/main/java/eu/openaire/urls_worker/util/FilesCompressor.java b/src/main/java/eu/openaire/urls_worker/util/FilesCompressor.java index 5eb9971..95e754d 100644 --- a/src/main/java/eu/openaire/urls_worker/util/FilesCompressor.java +++ b/src/main/java/eu/openaire/urls_worker/util/FilesCompressor.java @@ -1,6 +1,7 @@ package eu.openaire.urls_worker.util; import eu.openaire.urls_worker.controllers.FullTextsController; +import eu.openaire.urls_worker.models.TarFileResult; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStream; @@ -28,8 +29,11 @@ public class FilesCompressor { { // For example: assignments_2_full-texts_4.tar.zstd | where < 4 > is referred to the 4th batch of files requested by the Controller. File tarFile; + int numTarredFiles = 0; try { - tarFile = getTarArchiveWithFullTexts(filesToCompress, baseDirectory, assignmentsCounter, tarBatchCounter); + TarFileResult tarFileResult = getTarArchiveWithFullTexts(filesToCompress, baseDirectory, assignmentsCounter, tarBatchCounter); + tarFile = tarFileResult.getTarFile(); + numTarredFiles = tarFileResult.getNumTarredFiles(); } catch (Exception e) { logger.error("Exception when creating the tar-file for assignments_" + assignmentsCounter, e); return null; @@ -54,12 +58,15 @@ public class FilesCompressor { zOut.write(readByte); } catch (Exception e) { logger.error("Exception when compressing the tar-archive: " + tarFilePath, e); + // The ".zstd" file may have been partially created. It will be deleted, along with this assignments' directory, by the caller. return null; } finally { FullTextsController.deleteFile(tarFilePath); } - logger.debug("Finished archiving and compressing the full-texts of assignments_" + assignmentsCounter + ", batch_" + tarBatchCounter); + // At this point, the compressed files are the exact files included inside the tar archive, so the possible "missing-files" case will already have arisen, previously. + int totalFiles = filesToCompress.size(); + logger.debug("Finished archiving and compressing " + ((numTarredFiles == totalFiles) ? ("all " + totalFiles) : (numTarredFiles + " out of " + totalFiles)) + " full-texts of assignments_" + assignmentsCounter + ", batch_" + tarBatchCounter); return zStandardFile; } @@ -67,7 +74,7 @@ public class FilesCompressor { /** * This method adds the requested full-text file into a TAR archive, which later will be compressed. * */ - private static File getTarArchiveWithFullTexts(List filesToTar, String baseDir, long assignmentsCounter, int tarBatchCounter) throws Exception + private static TarFileResult getTarArchiveWithFullTexts(List filesToTar, String baseDir, long assignmentsCounter, int tarBatchCounter) throws Exception { String tarFileFullPath = baseDir + "assignments_" + assignmentsCounter + "_full-texts_" + tarBatchCounter + ".tar"; // For example: assignments_2_full-texts_4.tar.zstd | where < 4 > is referred to the 4th batch of files requested by the Controller. @@ -90,7 +97,7 @@ public class FilesCompressor { logger.warn("The number of \"numTarredFiles\" (" + numTarredFiles + ") is different from the number of files requested to be tarred (" + filesToTar.size() + "), for assignments_" + assignmentsCounter + ", batch_" + tarBatchCounter); // Still, some files may have been tarred, so we move on. It's up to the Controller, to handle such case. - return tarFile; + return new TarFileResult(tarFile, numTarredFiles); }