From c46c8c448af3d58ff038cfe46c3b67ff0773bf01 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Fri, 17 Dec 2021 08:24:09 +0200 Subject: [PATCH] - Upgrade the zip-file delivery by using the "InputStreamResource". This way is more reliable, have better performance and uses less memory. - Use the "InputStreamResource" also in "get(single)FullText"-endpoint, in order to avoid loading a big full-text file in memory. - Decrease the system-reserved memory by 128 MB. - Fix path-variable regexes for "getFullText"-endpoint. - Optimize imports. - Code cleanup. --- installAndRun.sh | 2 +- .../components/ScheduledTasks.java | 3 +- .../controllers/FullTextsController.java | 62 ++++++++----------- .../controllers/GeneralController.java | 5 +- .../plugins/PublicationsRetrieverPlugin.java | 6 +- .../services/FileStorageService.java | 46 +++----------- .../urls_worker/util/AssignmentsHandler.java | 8 ++- 7 files changed, 48 insertions(+), 84 deletions(-) diff --git a/installAndRun.sh b/installAndRun.sh index e7d7cb6..07c6fae 100755 --- a/installAndRun.sh +++ b/installAndRun.sh @@ -65,7 +65,7 @@ if [[ justInstall -eq 0 ]]; then # Update the max-heap-size based on the machine's physical memory. machine_memory_mb=$(grep MemTotal /proc/meminfo | awk '{print $2}' | xargs -I {} echo "scale=4; {}/1024" | bc) # It returns the size in MB. - max_heap_size_mb=$(echo "($machine_memory_mb - 1024)/1" | bc) # Leave 1024 MB to the system (the "()/1" is used to take the floor value). + max_heap_size_mb=$(echo "($machine_memory_mb - 896)/1" | bc) # Leave 896 MB to the system (the "()/1" is used to take the floor value). # Now, we replace the "-Xmx" parameter inside the "./build.gradle" file, with "-Xmx${max_heap_size}m" echo -e "\n\nThe max-heap-size (-Xmx) will be set to: ${max_heap_size_mb}m\n\n" sed -i "s/'-Xmx[0-9]\+[gm]'/'-Xmx${max_heap_size_mb}m'/g" ./build.gradle diff --git a/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java b/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java index b17ce66..5091e5d 100644 --- a/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java +++ b/src/main/java/eu/openaire/urls_worker/components/ScheduledTasks.java @@ -3,6 +3,7 @@ package eu.openaire.urls_worker.components; import eu.openaire.urls_worker.controllers.FullTextsController; import eu.openaire.urls_worker.plugins.PublicationsRetrieverPlugin; import eu.openaire.urls_worker.util.AssignmentsHandler; +import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.scheduling.annotation.Scheduled; @@ -15,8 +16,6 @@ import java.util.Date; import java.util.Map; import java.util.Set; -import org.apache.commons.io.FileUtils; - @Component public class ScheduledTasks { diff --git a/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java b/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java index 1fcf4fd..bbc7e3a 100644 --- a/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java +++ b/src/main/java/eu/openaire/urls_worker/controllers/FullTextsController.java @@ -4,19 +4,17 @@ import eu.openaire.urls_worker.services.FileStorageService; import eu.openaire.urls_worker.util.FilesZipper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.core.io.Resource; +import org.springframework.core.io.InputStreamResource; import org.springframework.http.HttpHeaders; -import org.springframework.http.HttpStatus; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; -import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody; -import javax.servlet.http.HttpServletRequest; import java.io.File; +import java.io.FileInputStream; import java.util.HashMap; import java.util.List; @@ -26,21 +24,18 @@ public class FullTextsController { private static final Logger logger = LoggerFactory.getLogger(GeneralController.class); - private final FileStorageService fileStorageService; - public static HashMap assignmentsNumsHandledAndLocallyDeleted = new HashMap<>(); public static String assignmentsBaseDir = null; - public FullTextsController(FileStorageService fileStorageService) { - this.fileStorageService = fileStorageService; + public FullTextsController() { assignmentsBaseDir = FileStorageService.assignmentsLocation.toString() + File.separator; } @GetMapping("getFullTexts/{assignmentsCounter:[\\d]+}/{totalZipBatches:[\\d]+}/{zipBatchCounter:[\\d]+}/{fileNamesWithExtensions}") - public ResponseEntity getMultipleFullTexts(@PathVariable long assignmentsCounter, @PathVariable int totalZipBatches, @PathVariable int zipBatchCounter, @PathVariable List fileNamesWithExtensions, HttpServletRequest request) { + public Object getMultipleFullTexts(@PathVariable long assignmentsCounter, @PathVariable int totalZipBatches, @PathVariable int zipBatchCounter, @PathVariable List fileNamesWithExtensions) { int fileNamesListNum = fileNamesWithExtensions.size(); if ( (fileNamesListNum == 1) && (fileNamesWithExtensions.get(0).length() == 0) ) { // In case the last "/" in the url was given, then this list will not be empty, but have one empty item instead. @@ -73,28 +68,29 @@ public class FullTextsController { return ResponseEntity.internalServerError().body(errorMsg); } - String zipName = zipFile.getName(); - String zipFileFullPath = currentAssignmentsBaseFullTextsPath + zipName; - StreamingResponseBody streamingResponseBody = this.fileStorageService.loadFileAsAStream(zipFileFullPath); - if ( streamingResponseBody == null ) - return ResponseEntity.status(HttpStatus.NOT_FOUND).body("Could not load zip-file: " + zipFileFullPath); - // If this is the last batch for this assignments-count, then make sure it is deleted in the next scheduled delete-operation. if ( zipBatchCounter == totalZipBatches ) { assignmentsNumsHandledAndLocallyDeleted.put(assignmentsCounter, false); logger.debug("Will return the last batch (" + zipBatchCounter + ") of Assignments_" + assignmentsCounter + " to the Controller and these assignments will be deleted later."); } - logger.info("Sending the zip file \"" + zipFileFullPath + "\"."); - return ResponseEntity.ok() - // Do not set any contentType here, Spring will handle it itself (otherwise it fails with "application/zip" and "application/octet-stream"). - .header(HttpHeaders.CONTENT_DISPOSITION, "inline; filename=\"" + zipName + "\"") - .body(streamingResponseBody); + String zipName = zipFile.getName(); + String zipFileFullPath = currentAssignmentsBaseFullTextsPath + zipName; + try { + return ResponseEntity.ok() + .contentType(MediaType.APPLICATION_OCTET_STREAM) + .header(HttpHeaders.CONTENT_DISPOSITION, "inline; filename=\"" + zipName + "\"") + .body(new InputStreamResource(new FileInputStream(zipFileFullPath))); + } catch (Exception e) { + String errorMsg = "Could not load the FileInputStream of the zip-file \"" + zipFileFullPath + "\"!"; + logger.error(errorMsg, e); + return ResponseEntity.internalServerError().body(errorMsg); + } } - @GetMapping("getFullText/{assignmentsCounter:[\\d]+]}/{fileNameWithExtension:[\\w]+.[\\w]{2,10}}") - public ResponseEntity getFullText(@PathVariable long assignmentsCounter, @PathVariable String fileNameWithExtension, HttpServletRequest request) { + @GetMapping("getFullText/{assignmentsCounter:[\\d]+}/{fileNameWithExtension:[\\w_:]+.[\\w]{2,10}}") + public ResponseEntity getFullText(@PathVariable long assignmentsCounter, @PathVariable String fileNameWithExtension) { logger.info("Received a \"getFullText\" request."); String fullTextFileFullPath = assignmentsBaseDir + "assignments_" + assignmentsCounter + "_fullTexts" + File.separator + fileNameWithExtension; @@ -104,20 +100,16 @@ public class FullTextsController { return ResponseEntity.notFound().build(); } - Resource resource = this.fileStorageService.loadFileAsResource(fullTextFileFullPath); - if ( resource == null ) - return ResponseEntity.internalServerError().body("Could not load file: " + fullTextFileFullPath); - - String contentType = null; - contentType = request.getServletContext().getMimeType(fullTextFileFullPath); - if ( contentType == null ) { - contentType = "application/octet-stream"; + try { + return ResponseEntity.ok() + .contentType(MediaType.APPLICATION_OCTET_STREAM) + .header(HttpHeaders.CONTENT_DISPOSITION, "inline; filename=\"" + file.getName() + "\"") + .body(new InputStreamResource(new FileInputStream(fullTextFileFullPath))); + } catch (Exception e) { + String errorMsg = "Could not load the FileInputStream of the full-text-file \"" + fullTextFileFullPath + "\"!"; + logger.error(errorMsg, e); + return ResponseEntity.internalServerError().body(errorMsg); } - - return ResponseEntity.ok() - .contentType(MediaType.parseMediaType(contentType)) - .header(HttpHeaders.CONTENT_DISPOSITION, "inline; filename=\"" + resource.getFilename() + "\"") - .body(resource); } } diff --git a/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java b/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java index e600191..3459d11 100644 --- a/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java +++ b/src/main/java/eu/openaire/urls_worker/controllers/GeneralController.java @@ -3,12 +3,13 @@ package eu.openaire.urls_worker.controllers; import eu.openaire.urls_worker.UrlsWorkerApplication; import eu.openaire.urls_worker.payloads.responces.WorkerResponse; import eu.openaire.urls_worker.util.AssignmentsHandler; -import eu.openaire.urls_worker.util.WorkerConstants; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.*; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; import java.util.ArrayList; import java.util.List; diff --git a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java index d57d55f..aea0be8 100644 --- a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java +++ b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java @@ -18,10 +18,12 @@ import eu.openaire.urls_worker.util.AssignmentsHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.*; +import java.io.File; import java.nio.charset.StandardCharsets; import java.sql.Timestamp; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.Executors; diff --git a/src/main/java/eu/openaire/urls_worker/services/FileStorageService.java b/src/main/java/eu/openaire/urls_worker/services/FileStorageService.java index 410daf8..c0cf815 100644 --- a/src/main/java/eu/openaire/urls_worker/services/FileStorageService.java +++ b/src/main/java/eu/openaire/urls_worker/services/FileStorageService.java @@ -7,12 +7,14 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.core.io.Resource; import org.springframework.core.io.UrlResource; import org.springframework.stereotype.Service; -import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody; -import java.io.*; -import java.nio.file.*; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.Properties; -import java.util.concurrent.atomic.AtomicReference; @Service @@ -36,7 +38,7 @@ public class FileStorageService { System.exit(-10); } catch (IOException ioe) { logger.error("I/O error when reading the properties file!", ioe); - System.exit(-10); + System.exit(-11); } } @@ -51,40 +53,6 @@ public class FileStorageService { } - private static final int bufferSize = 10485760; // 10 MB - public StreamingResponseBody loadFileAsAStream(String fullFileName) - { - StreamingResponseBody streamingResponseBody = null; - AtomicReference fileInputStream = new AtomicReference<>(); - try { - streamingResponseBody = response -> { // This does not need to be explicitly closed. - fileInputStream.set(new FileInputStream(fullFileName)); - int bytesRead; - byte[] byteBuffer = new byte[bufferSize]; - FileInputStream fInS = fileInputStream.get(); - while ( (bytesRead = fInS.read(byteBuffer, 0, bufferSize)) > 0 ) - response.write(byteBuffer, 0, bytesRead); - }; - return streamingResponseBody; - } catch (Exception e) { - if ( e instanceof FileNotFoundException ) - logger.error("File \"" + fullFileName + "\" is not a file!"); - else - logger.error(e.getMessage(), e); - return null; - } finally { - FileInputStream fInS = fileInputStream.get(); - if ( fInS != null ) { - try { - fInS.close(); - } catch (IOException ex) { - logger.error("", ex); - } - } - } - } - - public Resource loadFileAsResource(String fullFileName) { try { Path filePath = assignmentsLocation.resolve(fullFileName).normalize(); diff --git a/src/main/java/eu/openaire/urls_worker/util/AssignmentsHandler.java b/src/main/java/eu/openaire/urls_worker/util/AssignmentsHandler.java index 9693f68..16bf88a 100644 --- a/src/main/java/eu/openaire/urls_worker/util/AssignmentsHandler.java +++ b/src/main/java/eu/openaire/urls_worker/util/AssignmentsHandler.java @@ -17,7 +17,9 @@ import org.springframework.web.client.RestClientException; import org.springframework.web.client.RestTemplate; import java.time.Duration; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; public class AssignmentsHandler { @@ -27,7 +29,6 @@ public class AssignmentsHandler { public static boolean isAvailableForWork = true; public static List urlReports = null; private static final int expectedDatasourcesPerRequest = 1400; // Per 10_000 assignments. - private static int expectedAssignmentsPerDatasource = 0; public static Multimap assignmentsForPlugins = null; private static final boolean askForTest = false; // Enable this only for testing. @@ -41,10 +42,11 @@ public class AssignmentsHandler { public AssignmentsHandler() { urlReports = new ArrayList<>(UrlsWorkerApplication.maxAssignmentsLimitPerBatch); - expectedAssignmentsPerDatasource = (UrlsWorkerApplication.maxAssignmentsLimitPerBatch / expectedDatasourcesPerRequest); + int expectedAssignmentsPerDatasource = (UrlsWorkerApplication.maxAssignmentsLimitPerBatch / expectedDatasourcesPerRequest); assignmentsForPlugins = HashMultimap.create(expectedDatasourcesPerRequest, expectedAssignmentsPerDatasource); } + public static AssignmentsRequest requestAssignments() { String requestUrl = UrlsWorkerApplication.controllerBaseUrl + "urls" + (askForTest ? "/test" : "") + "?workerId=" + UrlsWorkerApplication.workerId + "&workerAssignmentsLimit=" + UrlsWorkerApplication.maxAssignmentsLimitPerBatch;