package eu.openaire.urls_worker.controllers; import eu.openaire.urls_worker.components.plugins.PublicationsRetrieverPlugin; import eu.openaire.urls_worker.services.FileStorageService; import eu.openaire.urls_worker.util.FilesCompressor; import org.apache.commons.io.FileDeleteStrategy; import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.core.io.InputStreamResource; import org.springframework.http.HttpHeaders; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import java.io.BufferedInputStream; import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; import java.util.List; @RestController @RequestMapping("full-texts/") public class FullTextsController { private static final Logger logger = LoggerFactory.getLogger(GeneralController.class); @Autowired private FileStorageService fileStorageService; @GetMapping("getFullTexts/{assignmentsCounter:[\\d]+}/{totalBatches:[\\d]+}/{batchCounter:[\\d]+}/{fileNamesWithExtensions}") public Object getFullTexts(@PathVariable long assignmentsCounter, @PathVariable int totalBatches, @PathVariable int batchCounter, @PathVariable List fileNamesWithExtensions) { int fileNamesListNum = fileNamesWithExtensions.size(); if ( (fileNamesListNum == 1) && (fileNamesWithExtensions.get(0).isEmpty()) ) { // In case the last "/" in the url was given (without any files following), then this list will not be empty, but have one empty item instead. // In case the url does not end in "/", then Spring will automatically return an "HTTP-BadRequest". String errorMsg = "An empty \"fileNamesWithExtensions\" list was given from assignments_" + assignmentsCounter + ", for batch_" + batchCounter; logger.error(errorMsg); return ResponseEntity.badRequest().body(errorMsg); } if ( totalBatches == 0 ) { String errorMsg = "The given \"totalBatches\" (" + totalBatches + ") was < 0 >!"; logger.error(errorMsg); return ResponseEntity.badRequest().body(errorMsg); } else if ( batchCounter > totalBatches ) { String errorMsg = "The given \"batchCounter\" (" + batchCounter + ") is greater than the \"totalBatches\" (" + totalBatches + ")!"; logger.error(errorMsg); return ResponseEntity.badRequest().body(errorMsg); } logger.info("Received a \"getFullTexts\" request for returning a \".tar.zstd\" file, containing " + fileNamesListNum + " full-texts, from assignments_" + assignmentsCounter + ", for batch_" + batchCounter + " (out of " + totalBatches + ")."); String currentAssignmentsBaseFullTextsPath = fileStorageService.assignmentsBaseLocation + "assignments_" + assignmentsCounter + "_fullTexts" + File.separator; if ( ! (new File(currentAssignmentsBaseFullTextsPath).isDirectory()) ) { String errorMsg = "The base directory for assignments_" + assignmentsCounter + " was not found: " + currentAssignmentsBaseFullTextsPath; logger.error(errorMsg); return ResponseEntity.badRequest().body(errorMsg); } File zstdFile = FilesCompressor.compressMultipleFilesIntoOne(assignmentsCounter, batchCounter, fileNamesWithExtensions, currentAssignmentsBaseFullTextsPath); if ( zstdFile == null ) { // The failed files (including the ".tar"), have already been deleted. String errorMsg = "Failed to create the zstd file for \"batchCounter\"-" + batchCounter; logger.error(errorMsg); return ResponseEntity.internalServerError().body(errorMsg); } if ( batchCounter == totalBatches ) logger.debug("Will return the " + ((totalBatches > 1) ? "last" : "only one") + " batch (" + batchCounter + ") of assignments_" + assignmentsCounter + " to the Controller."); String zstdName = zstdFile.getName(); String zstdTarFileFullPath = currentAssignmentsBaseFullTextsPath + zstdName; try { return ResponseEntity.ok() .contentType(MediaType.APPLICATION_OCTET_STREAM) .header(HttpHeaders.CONTENT_DISPOSITION, "inline; filename=\"" + zstdName + "\"") .body(new InputStreamResource(new BufferedInputStream(Files.newInputStream(Paths.get(zstdTarFileFullPath)), FilesCompressor.bufferSize))); } catch (Exception e) { String errorMsg = "Could not load the FileInputStream of the zstd-tar-file \"" + zstdTarFileFullPath + "\"!"; logger.error(errorMsg, e); return ResponseEntity.internalServerError().body(errorMsg); } finally { // The ".tar.zstd" file of this batch, for which we pass a steam to the Controller, will be deleted by the next batch or in the end of these assignments. // Now we will delete the zstd file of the previous assignments. int previousBatchCounter = (batchCounter -1); if ( previousBatchCounter >= 1 ) deleteFile(currentAssignmentsBaseFullTextsPath + "assignments_" + assignmentsCounter + "_full-texts_" + previousBatchCounter + ".tar.zstd"); } } @GetMapping("getFullText/{assignmentsCounter:[\\d]+}/{fileNameWithExtension:[\\w_:]+.[\\w]{2,10}}") public ResponseEntity getFullText(@PathVariable long assignmentsCounter, @PathVariable String fileNameWithExtension) { logger.info("Received a \"getFullText\" request."); String fullTextFileFullPath = fileStorageService.assignmentsBaseLocation + "assignments_" + assignmentsCounter + "_fullTexts" + File.separator + fileNameWithExtension; File file = new File(fullTextFileFullPath); if ( !file.isFile() ) { logger.error("The file \"" + fullTextFileFullPath + "\" does not exist!"); return ResponseEntity.notFound().build(); } try { return ResponseEntity.ok() .contentType(MediaType.APPLICATION_OCTET_STREAM) .header(HttpHeaders.CONTENT_DISPOSITION, "inline; filename=\"" + file.getName() + "\"") .body(new InputStreamResource(new BufferedInputStream(Files.newInputStream(Paths.get(fullTextFileFullPath)), FilesCompressor.bufferSize))); } catch (Exception e) { String errorMsg = "Could not load the FileInputStream of the full-text-file \"" + fullTextFileFullPath + "\"!"; logger.error(errorMsg, e); return ResponseEntity.internalServerError().body(errorMsg); } } public static boolean deleteAssignmentsDirectory(long curAssignments, File dir) { if ( dir == null ) { String directoryPath = PublicationsRetrieverPlugin.assignmentsBasePath; if ( curAssignments != -1 ) { directoryPath += "assignments_" + curAssignments + "_fullTexts"; logger.debug("Going to delete the files inside the directory of assignments_" + curAssignments); } else logger.debug("Going to delete the parent directory: " + directoryPath); dir = new File(directoryPath); } return deleteDirectory(dir); } public static boolean deleteDirectory(File directory) { try { FileUtils.deleteDirectory(directory); return true; } catch (IOException e) { logger.error("The following directory could not be deleted: " + directory.getPath(), e); return false; } catch (IllegalArgumentException iae) { logger.error("This directory does not exist: " + directory.getPath()); return false; } } public static boolean deleteFile(String fileFullPathString) { try { FileDeleteStrategy.FORCE.delete(new File(fileFullPathString)); } catch (IOException e) { logger.error("Error when deleting the file: " + fileFullPathString); return false; } return true; } }