2021-11-26 16:04:31 +01:00
package eu.openaire.urls_worker.controllers ;
2023-01-25 17:33:49 +01:00
import eu.openaire.urls_worker.components.plugins.PublicationsRetrieverPlugin ;
2021-11-26 16:04:31 +01:00
import eu.openaire.urls_worker.services.FileStorageService ;
2023-01-09 14:48:30 +01:00
import eu.openaire.urls_worker.util.FilesCompressor ;
2023-01-18 15:55:59 +01:00
import org.apache.commons.io.FileDeleteStrategy ;
2022-12-07 11:29:05 +01:00
import org.apache.commons.io.FileUtils ;
2021-11-26 16:04:31 +01:00
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
2023-03-07 15:21:32 +01:00
import org.springframework.beans.factory.annotation.Autowired ;
2021-12-17 07:24:09 +01:00
import org.springframework.core.io.InputStreamResource ;
2021-11-26 16:04:31 +01:00
import org.springframework.http.HttpHeaders ;
import org.springframework.http.MediaType ;
import org.springframework.http.ResponseEntity ;
import org.springframework.web.bind.annotation.GetMapping ;
import org.springframework.web.bind.annotation.PathVariable ;
import org.springframework.web.bind.annotation.RequestMapping ;
import org.springframework.web.bind.annotation.RestController ;
import java.io.File ;
2022-12-07 11:29:05 +01:00
import java.io.IOException ;
2022-09-12 15:48:44 +02:00
import java.nio.file.Files ;
import java.nio.file.Paths ;
2021-11-26 16:04:31 +01:00
import java.util.List ;
@RestController
@RequestMapping ( " full-texts/ " )
public class FullTextsController {
private static final Logger logger = LoggerFactory . getLogger ( GeneralController . class ) ;
2023-03-07 15:21:32 +01:00
@Autowired
private FileStorageService fileStorageService ;
2021-11-26 16:04:31 +01:00
2023-02-16 13:23:47 +01:00
@GetMapping ( " getFullTexts/{assignmentsCounter:[ \\ d]+}/{totalBatches:[ \\ d]+}/{batchCounter:[ \\ d]+}/{fileNamesWithExtensions} " )
public Object getFullTexts ( @PathVariable long assignmentsCounter , @PathVariable int totalBatches , @PathVariable int batchCounter , @PathVariable List < String > fileNamesWithExtensions ) {
2023-01-09 14:48:30 +01:00
int fileNamesListNum = fileNamesWithExtensions . size ( ) ;
if ( ( fileNamesListNum = = 1 ) & & ( fileNamesWithExtensions . get ( 0 ) . length ( ) = = 0 ) ) { // In case the last "/" in the url was given (without any files following), then this list will not be empty, but have one empty item instead.
// In case the url does not end in "/", then Spring will automatically return an "HTTP-BadRequest".
String errorMsg = " An empty \" fileNamesWithExtensions \" list was given from assignments_ " + assignmentsCounter + " , for batch_ " + batchCounter ;
logger . error ( errorMsg ) ;
return ResponseEntity . badRequest ( ) . body ( errorMsg ) ;
}
if ( totalBatches = = 0 ) {
String errorMsg = " The given \" totalBatches \" ( " + totalBatches + " ) was < 0 >! " ;
logger . error ( errorMsg ) ;
return ResponseEntity . badRequest ( ) . body ( errorMsg ) ;
}
else if ( batchCounter > totalBatches ) {
String errorMsg = " The given \" batchCounter \" ( " + batchCounter + " ) is greater than the \" totalBatches \" ( " + totalBatches + " )! " ;
logger . error ( errorMsg ) ;
return ResponseEntity . badRequest ( ) . body ( errorMsg ) ;
}
2023-02-16 13:23:47 +01:00
logger . info ( " Received a \" getFullTexts \" request for returning a tar-file containing " + fileNamesListNum + " full-texts, from assignments_ " + assignmentsCounter + " , for batch_ " + batchCounter + " (out of " + totalBatches + " ). " ) ;
2023-01-09 14:48:30 +01:00
2023-03-07 15:21:32 +01:00
String currentAssignmentsBaseFullTextsPath = fileStorageService . assignmentsBaseLocation + " assignments_ " + assignmentsCounter + " _fullTexts " + File . separator ;
2023-01-09 14:48:30 +01:00
if ( ! ( new File ( currentAssignmentsBaseFullTextsPath ) . isDirectory ( ) ) ) {
String errorMsg = " The base directory for assignments_ " + assignmentsCounter + " was not found: " + currentAssignmentsBaseFullTextsPath ;
logger . error ( errorMsg ) ;
return ResponseEntity . badRequest ( ) . body ( errorMsg ) ;
}
File zstdFile = FilesCompressor . compressMultipleFilesIntoOne ( assignmentsCounter , batchCounter , fileNamesWithExtensions , currentAssignmentsBaseFullTextsPath ) ;
if ( zstdFile = = null ) {
String errorMsg = " Failed to create the zstd file for \" batchCounter \" - " + batchCounter ;
logger . error ( errorMsg ) ;
return ResponseEntity . internalServerError ( ) . body ( errorMsg ) ;
2023-01-18 15:55:59 +01:00
// The related files will be deleted later, upon completing the Worker-report process, in "AssignmentsHandler.postWorkerReport()".
2023-01-09 14:48:30 +01:00
}
if ( batchCounter = = totalBatches )
logger . debug ( " Will return the " + ( ( totalBatches > 1 ) ? " last " : " only one " ) + " batch ( " + batchCounter + " ) of Assignments_ " + assignmentsCounter + " to the Controller. " ) ;
String zstdName = zstdFile . getName ( ) ;
String zstdTarFileFullPath = currentAssignmentsBaseFullTextsPath + zstdName ;
try {
return ResponseEntity . ok ( )
. contentType ( MediaType . APPLICATION_OCTET_STREAM )
. header ( HttpHeaders . CONTENT_DISPOSITION , " inline; filename= \" " + zstdName + " \" " )
. body ( new InputStreamResource ( Files . newInputStream ( Paths . get ( zstdTarFileFullPath ) ) ) ) ;
} catch ( Exception e ) {
String errorMsg = " Could not load the FileInputStream of the zstd-tar-file \" " + zstdTarFileFullPath + " \" ! " ;
logger . error ( errorMsg , e ) ;
return ResponseEntity . internalServerError ( ) . body ( errorMsg ) ;
2023-01-18 15:55:59 +01:00
} finally {
// In some cases, the full-texts might be too large and their total number too,
// so if we leave them be, and wait for all batches to finish, we may get a "java.io.IOException: No space left on device" error.
deleteFulltextBatchFiles ( currentAssignmentsBaseFullTextsPath , assignmentsCounter , batchCounter , fileNamesWithExtensions ) ;
// The ".tar.zstd" file of this batch, for which we pass a steam to the Controller, will be deleted after the next batch, or after all batches are transferred and handles by the Controller.
2023-01-09 14:48:30 +01:00
}
// The related fulltext and (zstd-)tar files will be deleted in "AssignmentsHandler.postWorkerReport()", after the Controller has finished transferring them. They will be deleted even in case of a Controller-error.
// In case of an error and file-deletion, the related id-url records will just be re-processed in the future by some (maybe different) Worker.
}
2021-12-17 07:24:09 +01:00
@GetMapping ( " getFullText/{assignmentsCounter:[ \\ d]+}/{fileNameWithExtension:[ \\ w_:]+.[ \\ w]{2,10}} " )
public ResponseEntity < ? > getFullText ( @PathVariable long assignmentsCounter , @PathVariable String fileNameWithExtension ) {
2021-11-26 16:04:31 +01:00
logger . info ( " Received a \" getFullText \" request. " ) ;
2023-03-07 15:21:32 +01:00
String fullTextFileFullPath = fileStorageService . assignmentsBaseLocation + " assignments_ " + assignmentsCounter + " _fullTexts " + File . separator + fileNameWithExtension ;
2021-12-13 14:29:03 +01:00
File file = new File ( fullTextFileFullPath ) ;
2021-11-26 16:04:31 +01:00
if ( ! file . isFile ( ) ) {
2021-12-13 14:29:03 +01:00
logger . error ( " The file \" " + fullTextFileFullPath + " \" does not exist! " ) ;
2021-11-26 16:04:31 +01:00
return ResponseEntity . notFound ( ) . build ( ) ;
}
2021-12-17 07:24:09 +01:00
try {
return ResponseEntity . ok ( )
. contentType ( MediaType . APPLICATION_OCTET_STREAM )
. header ( HttpHeaders . CONTENT_DISPOSITION , " inline; filename= \" " + file . getName ( ) + " \" " )
2022-09-12 15:48:44 +02:00
. body ( new InputStreamResource ( Files . newInputStream ( Paths . get ( fullTextFileFullPath ) ) ) ) ;
2021-12-17 07:24:09 +01:00
} catch ( Exception e ) {
String errorMsg = " Could not load the FileInputStream of the full-text-file \" " + fullTextFileFullPath + " \" ! " ;
logger . error ( errorMsg , e ) ;
return ResponseEntity . internalServerError ( ) . body ( errorMsg ) ;
2021-11-26 16:04:31 +01:00
}
}
2022-12-07 11:29:05 +01:00
public static boolean deleteDirectory ( long curAssignments )
{
String directoryPath = PublicationsRetrieverPlugin . assignmentsBasePath ;
if ( curAssignments ! = - 1 ) {
directoryPath + = " assignments_ " + curAssignments + " _fullTexts " + File . separator ;
logger . debug ( " Going to delete the files inside the directory of assignments_ " + curAssignments ) ;
} else
logger . debug ( " Going to delete the parent directory: " + directoryPath ) ;
try {
FileUtils . deleteDirectory ( new File ( directoryPath ) ) ;
return true ;
} catch ( IOException e ) {
logger . error ( " The following directory could not be deleted: " + directoryPath , e ) ;
return false ;
} catch ( IllegalArgumentException iae ) {
logger . error ( " This assignments-dir does not exist: " + directoryPath ) ;
return false ;
}
}
2023-01-18 15:55:59 +01:00
public static void deleteFulltextBatchFiles ( String assignmentsBatchDir , long assignmentsCounter , long fulltextsBatch , List < String > filenames )
{
// We will delete all the files related to the given fulltexts-batch, along with the created tar and zstd files.
for ( String fileName : filenames )
deleteFile ( assignmentsBatchDir + fileName ) ;
// Now let's delete the ".tar" and ".tar.zstd" files as well.
String partialNonBatchFileName = assignmentsBatchDir + " assignments_ " + assignmentsCounter + " _full-texts_ " ;
deleteFile ( partialNonBatchFileName + fulltextsBatch + " .tar " ) ;
// The ".tar.zstd" file of this batch will be deleted by the next batch or in the end of these assignments.
// Now we will delete the zstd file of the previous assignments.
if ( fulltextsBatch > = 2 )
deleteFile ( partialNonBatchFileName + ( fulltextsBatch - 1 ) + " .tar.zstd " ) ;
// We do not use a fulltexts-batch directory, since even if it makes the deletion faster, it will make the full-texts delivery to the controller slower,
// since we will need to move the requested full-texts to that directory before continuing with tarring and compressing the files and sending them over to the Controller.
// Also, we cannot pre-create such directories, since it will add complexity in the download process and also some of the full-texts may not be requested by the Controller (because of duplicates).
}
public static boolean deleteFile ( String fileFullPathString )
{
File currentFile = new File ( fileFullPathString ) ;
try {
FileDeleteStrategy . FORCE . delete ( currentFile ) ;
} catch ( IOException e ) {
logger . error ( " Error when deleting the file: " + fileFullPathString ) ;
return false ;
}
return true ;
}
2021-11-26 16:04:31 +01:00
}