- Optimize the "FileZipper.zipMultipleFilesAndGetZip()" and "FileZipper.zipAFile()" methods.
- Improve the "getMultipleFullTexts"-endpoint. Check if the "fileNamesWithExtensions"-list is empty. Check if the baseDir for the fullTexts of a given assignments-counter is missing. - Optimize the "PublicationsRetrieverPlugin.processAssignments()" method. - Set a max-size limit to the amount of space the logs can use. Over that size, the older logs will be deleted. - Show the heap size, in the beginning. - Update Gradle. - Code cleanup.
This commit is contained in:
parent
212f8f377d
commit
018326eedd
|
@ -1,5 +1,5 @@
|
|||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3-bin.zip
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.1-bin.zip
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
||||
|
|
|
@ -25,7 +25,7 @@ if [[ ! -f $inputDataFile ]]; then
|
|||
echo -e "\n\n"
|
||||
fi
|
||||
|
||||
gradleVersion="7.3"
|
||||
gradleVersion="7.3.1"
|
||||
|
||||
if [[ justInstall -eq 0 ]]; then
|
||||
|
||||
|
|
|
@ -43,6 +43,11 @@ public class UrlsWorkerApplication {
|
|||
new PublicationsRetrieverPlugin();
|
||||
|
||||
SpringApplication.run(UrlsWorkerApplication.class, args);
|
||||
|
||||
Runtime javaRuntime = Runtime.getRuntime();
|
||||
logger.debug("HeapSize: " + javaRuntime.totalMemory());
|
||||
logger.debug("HeapMaxSize: " + javaRuntime.maxMemory());
|
||||
logger.debug("HeapFreeSize: " + javaRuntime.freeMemory());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -42,10 +42,24 @@ public class FullTextsController {
|
|||
@GetMapping("getFullTexts/{assignmentsCounter:[\\d]+}/{totalZipBatches:[\\d]+}/{zipBatchCounter:[\\d]+}/{fileNamesWithExtensions}")
|
||||
public ResponseEntity<?> getMultipleFullTexts(@PathVariable long assignmentsCounter, @PathVariable int totalZipBatches, @PathVariable int zipBatchCounter, @PathVariable List<String> fileNamesWithExtensions, HttpServletRequest request) {
|
||||
|
||||
logger.info("Received a \"getMultipleFullTexts\" request for returning a zip-file containing " + fileNamesWithExtensions.size() + " full-texts, from assignments-" + assignmentsCounter + ", for batch-" + zipBatchCounter);
|
||||
int fileNamesListNum = fileNamesWithExtensions.size();
|
||||
if ( (fileNamesListNum == 1) && (fileNamesWithExtensions.get(0).length() == 0) ) { // In case the last "/" in the url was given, then this list will not be empty, but have one empty item instead.
|
||||
// In case the url does not end in "/", then Spring will automatically return an "HTTP-BadRequest".
|
||||
String errorMsg = "An empty \"fileNamesWithExtensions\" list was given from assignments_" + assignmentsCounter + ", for batch_" + zipBatchCounter;
|
||||
logger.warn(errorMsg);
|
||||
return ResponseEntity.badRequest().body(errorMsg);
|
||||
}
|
||||
|
||||
logger.info("Received a \"getMultipleFullTexts\" request for returning a zip-file containing " + fileNamesListNum + " full-texts, from assignments_" + assignmentsCounter + ", for batch_" + zipBatchCounter);
|
||||
|
||||
String currentAssignmentsBaseFullTextsPath = assignmentsBaseDir + "assignments_" + assignmentsCounter + "_fullTexts" + File.separator;
|
||||
|
||||
if ( ! (new File(currentAssignmentsBaseFullTextsPath).isDirectory()) ) {
|
||||
String errorMsg = "The base directory for assignments_" + assignmentsCounter + " was not found: " + currentAssignmentsBaseFullTextsPath;
|
||||
logger.error(errorMsg);
|
||||
return ResponseEntity.badRequest().body(errorMsg);
|
||||
}
|
||||
|
||||
File zipFile = FilesZipper.zipMultipleFilesAndGetZip(assignmentsCounter, zipBatchCounter, fileNamesWithExtensions, currentAssignmentsBaseFullTextsPath);
|
||||
if ( zipFile == null ) {
|
||||
String errorMsg = "Failed to create the zip file for \"zipBatchCounter\"-" + zipBatchCounter;
|
||||
|
|
|
@ -59,23 +59,25 @@ public class PublicationsRetrieverPlugin {
|
|||
|
||||
private static final List<Callable<Boolean>> callableTasks = new ArrayList<>(FileUtils.jsonBatchSize);
|
||||
|
||||
public static void processAssignments(Long assignmentRequestCounter, Collection<Assignment> assignments) throws RuntimeException, FileNotFoundException
|
||||
public static void processAssignments(Long assignmentRequestCounter, Collection<Assignment> assignments) throws RuntimeException
|
||||
{
|
||||
FileUtils.storeDocFilesDir = assignmentsBasePath + "assignments_" + assignmentRequestCounter + "_fullTexts" + File.separator; // It needs the last separator, because of how the docFiles are named and stored.
|
||||
|
||||
File curAssignmentsDirs = new File(FileUtils.storeDocFilesDir);
|
||||
if ( !curAssignmentsDirs.exists() ) {
|
||||
if ( !curAssignmentsDirs.mkdirs() ) { // Create the directories.
|
||||
String workingDir = System.getProperty("user.dir") + File.separator;
|
||||
logger.error("Could not create the \"assignments_fullTexts directories\": \"" + FileUtils.storeDocFilesDir + "\". Using the \"workingDir\" instead (" + workingDir + ").");
|
||||
FileUtils.storeDocFilesDir = assignmentsBasePath = workingDir;
|
||||
try {
|
||||
if ( !curAssignmentsDirs.exists() ) {
|
||||
if ( !curAssignmentsDirs.mkdirs() ) { // Create the directories.
|
||||
String workingDir = System.getProperty("user.dir") + File.separator;
|
||||
logger.error("Could not create the \"assignments_fullTexts directories\": \"" + FileUtils.storeDocFilesDir + "\". Using the \"workingDir\" instead (" + workingDir + ").");
|
||||
FileUtils.storeDocFilesDir = assignmentsBasePath = workingDir;
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
String errorMsg = "Failed to create the full-texts directory for assignments_" + assignmentRequestCounter;
|
||||
logger.error(errorMsg, e);
|
||||
throw new RuntimeException(errorMsg + ": " + e.getMessage());
|
||||
}
|
||||
|
||||
int tasksNumber = assignments.size();
|
||||
int batchCount = 0;
|
||||
int tasksCount = 0;
|
||||
|
||||
// Start loading and checking urls.
|
||||
for ( Assignment assignment : assignments )
|
||||
{
|
||||
|
@ -121,17 +123,13 @@ public class PublicationsRetrieverPlugin {
|
|||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
// Invoke the tasks every time we reach the "jsonBatchSize" tasks, or we are at the end of the list.
|
||||
tasksCount ++;
|
||||
if ( (tasksCount == FileUtils.jsonBatchSize) || (tasksCount == tasksNumber) )
|
||||
{
|
||||
logger.info("Batch counter: " + (++batchCount) + " | progress: " + PublicationsRetriever.df.format((batchCount * tasksCount) * 100.0 / tasksNumber) + "% | every batch contains " + FileUtils.jsonBatchSize + " id-url pairs.");
|
||||
LoaderAndChecker.invokeAllTasksAndWait(callableTasks);
|
||||
addUrlReportsToWorkerReport();
|
||||
callableTasks.clear(); // Reset the thread-tasks-list for the next batch.
|
||||
}
|
||||
}// end tasks-for-loop
|
||||
int numFailedTasks = LoaderAndChecker.invokeAllTasksAndWait(callableTasks);
|
||||
if ( numFailedTasks > 0 )
|
||||
logger.warn(numFailedTasks + " tasks failed!");
|
||||
addUrlReportsToWorkerReport();
|
||||
callableTasks.clear(); // Reset the thread-tasks-list for the next batch.
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -103,8 +103,8 @@ public class AssignmentsHandler {
|
|||
try {
|
||||
PublicationsRetrieverPlugin.processAssignments(assignmentRequestCounter, assignmentsForPlugins.values());
|
||||
} catch (Exception e) {
|
||||
logger.error(e.getMessage(), e);
|
||||
}
|
||||
logger.error("Exception when processing the assignments_" + assignmentRequestCounter, e);
|
||||
} // In this case, we will either have an empty WorkerReport or a half-filled one. Either way, we want to report back to the Controller.
|
||||
|
||||
if ( askForTest ) {
|
||||
logger.debug("UrlReports:"); // DEBUG!
|
||||
|
@ -115,9 +115,10 @@ public class AssignmentsHandler {
|
|||
postWorkerReport(assignmentRequestCounter);
|
||||
|
||||
isAvailableForWork = true; // State this after posting, to avoid breaking the "UrlReports" in the current or the next run.
|
||||
// Also, since the worker has limited resources, it's better to finish sending the full-texts first and then request a new batch of assignments.
|
||||
|
||||
// Note: Cannot call this method here retrospectively, as if it runs 100s of times, the memory-stack may break..
|
||||
// The scheduler will handle calling it every half an hour, in case the Worker is available for work..
|
||||
// The scheduler will handle calling it every 15 mins, in case the Worker is available for work..
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
@ -16,47 +17,35 @@ public class FilesZipper
|
|||
|
||||
public static File zipMultipleFilesAndGetZip(long assignmentsCounter, int zipBatchCounter, List<String> filesToZip, String baseDirectory)
|
||||
{
|
||||
File zipFile = null;
|
||||
ZipOutputStream zos = null;
|
||||
try {
|
||||
String zipFilename = baseDirectory + "assignments_" + assignmentsCounter + "_full-texts_" + zipBatchCounter + ".zip";
|
||||
// For example: assignments_2_full-texts_4.zip | where < 4 > is referred to the 4th batch of files requested by the controller.
|
||||
zipFile = new File(zipFilename);
|
||||
zos = new ZipOutputStream(new FileOutputStream(zipFile));
|
||||
String zipFilename = baseDirectory + "assignments_" + assignmentsCounter + "_full-texts_" + zipBatchCounter + ".zip";
|
||||
// For example: assignments_2_full-texts_4.zip | where < 4 > is referred to the 4th batch of files requested by the controller.
|
||||
|
||||
// Iterate over the given full-texts and add them to the zip.
|
||||
for ( String file : filesToZip )
|
||||
{
|
||||
File zipFile = new File(zipFilename);
|
||||
try ( ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(zipFile), StandardCharsets.UTF_8) )
|
||||
{
|
||||
for ( String file : filesToZip ) {
|
||||
zipAFile(file, zos, baseDirectory);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("", e);
|
||||
logger.error("Exception when creating the zip-file: " + zipFilename, e);
|
||||
return null;
|
||||
} finally {
|
||||
try {
|
||||
if ( zos != null )
|
||||
zos.close();
|
||||
} catch (IOException e) {
|
||||
logger.error(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
return zipFile;
|
||||
}
|
||||
|
||||
|
||||
private static boolean zipAFile(String fileName, ZipOutputStream zos, String baseDir)
|
||||
private static final int BUFFER_SIZE = 3145728; // 3MB (average fullText-size)
|
||||
private static final byte[] dataBuffer = new byte[BUFFER_SIZE];
|
||||
|
||||
// This method is "synchronized" to avoid any future problems with shared-buffer, if the requests are asynchronous.
|
||||
private static synchronized boolean zipAFile(String fileName, ZipOutputStream zos, String baseDir)
|
||||
{
|
||||
final int BUFFER = 1048576; // 1 MB
|
||||
byte[] data = new byte[BUFFER];
|
||||
BufferedInputStream bis = null;
|
||||
String fullFileName = baseDir + fileName;
|
||||
try {
|
||||
FileInputStream fis = new FileInputStream(fullFileName);
|
||||
bis = new BufferedInputStream(fis, BUFFER);
|
||||
try ( BufferedInputStream bis = new BufferedInputStream(new FileInputStream(fullFileName), BUFFER_SIZE) ) {
|
||||
zos.putNextEntry(new ZipEntry(fileName));
|
||||
int count;
|
||||
while ( (count = bis.read(data, 0, BUFFER)) != -1 ) {
|
||||
zos.write(data, 0, count);
|
||||
while ( (count = bis.read(dataBuffer, 0, BUFFER_SIZE)) != -1 ) {
|
||||
zos.write(dataBuffer, 0, count);
|
||||
}
|
||||
zos.closeEntry(); // close the entry here (not the ZipOutputStream)
|
||||
} catch (FileNotFoundException fnfe) {
|
||||
|
@ -66,13 +55,6 @@ public class FilesZipper
|
|||
if ( ! e.getMessage().contains("duplicate") )
|
||||
logger.error("Error zipping file: " + fullFileName, e);
|
||||
return false;
|
||||
} finally {
|
||||
try {
|
||||
if ( bis != null )
|
||||
bis.close();
|
||||
} catch (IOException e) {
|
||||
logger.error(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
<configuration debug="false">
|
||||
|
||||
<appender name="File" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||
<appender name="RollingFile" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||
<file>logs/UrlsWorker.log</file>
|
||||
|
||||
<rollingPolicy class="ch.qos.logback.core.rolling.FixedWindowRollingPolicy">
|
||||
<fileNamePattern>logs/UrlsWorker.%i.log.zip</fileNamePattern>
|
||||
<minIndex>1</minIndex>
|
||||
<maxIndex>20</maxIndex>
|
||||
</rollingPolicy>
|
||||
|
||||
<triggeringPolicy class="ch.qos.logback.core.rolling.SizeBasedTriggeringPolicy">
|
||||
|
@ -24,7 +26,7 @@
|
|||
</appender>
|
||||
|
||||
<root level="debug">
|
||||
<appender-ref ref="File" />
|
||||
<appender-ref ref="RollingFile" />
|
||||
</root>
|
||||
|
||||
</configuration>
|
Loading…
Reference in New Issue