- Optimize the "FileZipper.zipMultipleFilesAndGetZip()" and "FileZipper.zipAFile()" methods.

- Improve the "getMultipleFullTexts"-endpoint. Check if the "fileNamesWithExtensions"-list is empty. Check if the baseDir for the fullTexts of a given assignments-counter is missing.
- Optimize the "PublicationsRetrieverPlugin.processAssignments()" method.
- Set a max-size limit to the amount of space the logs can use. Over that size, the older logs will be deleted.
- Show the heap size, in the beginning.
- Update Gradle.
- Code cleanup.
This commit is contained in:
Lampros Smyrnaios 2021-12-03 04:09:40 +02:00
parent 212f8f377d
commit 018326eedd
8 changed files with 64 additions and 62 deletions

View File

@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.1-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

View File

@ -25,7 +25,7 @@ if [[ ! -f $inputDataFile ]]; then
echo -e "\n\n"
fi
gradleVersion="7.3"
gradleVersion="7.3.1"
if [[ justInstall -eq 0 ]]; then

View File

@ -43,6 +43,11 @@ public class UrlsWorkerApplication {
new PublicationsRetrieverPlugin();
SpringApplication.run(UrlsWorkerApplication.class, args);
Runtime javaRuntime = Runtime.getRuntime();
logger.debug("HeapSize: " + javaRuntime.totalMemory());
logger.debug("HeapMaxSize: " + javaRuntime.maxMemory());
logger.debug("HeapFreeSize: " + javaRuntime.freeMemory());
}

View File

@ -42,10 +42,24 @@ public class FullTextsController {
@GetMapping("getFullTexts/{assignmentsCounter:[\\d]+}/{totalZipBatches:[\\d]+}/{zipBatchCounter:[\\d]+}/{fileNamesWithExtensions}")
public ResponseEntity<?> getMultipleFullTexts(@PathVariable long assignmentsCounter, @PathVariable int totalZipBatches, @PathVariable int zipBatchCounter, @PathVariable List<String> fileNamesWithExtensions, HttpServletRequest request) {
logger.info("Received a \"getMultipleFullTexts\" request for returning a zip-file containing " + fileNamesWithExtensions.size() + " full-texts, from assignments-" + assignmentsCounter + ", for batch-" + zipBatchCounter);
int fileNamesListNum = fileNamesWithExtensions.size();
if ( (fileNamesListNum == 1) && (fileNamesWithExtensions.get(0).length() == 0) ) { // In case the last "/" in the url was given, then this list will not be empty, but have one empty item instead.
// In case the url does not end in "/", then Spring will automatically return an "HTTP-BadRequest".
String errorMsg = "An empty \"fileNamesWithExtensions\" list was given from assignments_" + assignmentsCounter + ", for batch_" + zipBatchCounter;
logger.warn(errorMsg);
return ResponseEntity.badRequest().body(errorMsg);
}
logger.info("Received a \"getMultipleFullTexts\" request for returning a zip-file containing " + fileNamesListNum + " full-texts, from assignments_" + assignmentsCounter + ", for batch_" + zipBatchCounter);
String currentAssignmentsBaseFullTextsPath = assignmentsBaseDir + "assignments_" + assignmentsCounter + "_fullTexts" + File.separator;
if ( ! (new File(currentAssignmentsBaseFullTextsPath).isDirectory()) ) {
String errorMsg = "The base directory for assignments_" + assignmentsCounter + " was not found: " + currentAssignmentsBaseFullTextsPath;
logger.error(errorMsg);
return ResponseEntity.badRequest().body(errorMsg);
}
File zipFile = FilesZipper.zipMultipleFilesAndGetZip(assignmentsCounter, zipBatchCounter, fileNamesWithExtensions, currentAssignmentsBaseFullTextsPath);
if ( zipFile == null ) {
String errorMsg = "Failed to create the zip file for \"zipBatchCounter\"-" + zipBatchCounter;

View File

@ -59,23 +59,25 @@ public class PublicationsRetrieverPlugin {
private static final List<Callable<Boolean>> callableTasks = new ArrayList<>(FileUtils.jsonBatchSize);
public static void processAssignments(Long assignmentRequestCounter, Collection<Assignment> assignments) throws RuntimeException, FileNotFoundException
public static void processAssignments(Long assignmentRequestCounter, Collection<Assignment> assignments) throws RuntimeException
{
FileUtils.storeDocFilesDir = assignmentsBasePath + "assignments_" + assignmentRequestCounter + "_fullTexts" + File.separator; // It needs the last separator, because of how the docFiles are named and stored.
File curAssignmentsDirs = new File(FileUtils.storeDocFilesDir);
if ( !curAssignmentsDirs.exists() ) {
if ( !curAssignmentsDirs.mkdirs() ) { // Create the directories.
String workingDir = System.getProperty("user.dir") + File.separator;
logger.error("Could not create the \"assignments_fullTexts directories\": \"" + FileUtils.storeDocFilesDir + "\". Using the \"workingDir\" instead (" + workingDir + ").");
FileUtils.storeDocFilesDir = assignmentsBasePath = workingDir;
try {
if ( !curAssignmentsDirs.exists() ) {
if ( !curAssignmentsDirs.mkdirs() ) { // Create the directories.
String workingDir = System.getProperty("user.dir") + File.separator;
logger.error("Could not create the \"assignments_fullTexts directories\": \"" + FileUtils.storeDocFilesDir + "\". Using the \"workingDir\" instead (" + workingDir + ").");
FileUtils.storeDocFilesDir = assignmentsBasePath = workingDir;
}
}
} catch (Exception e) {
String errorMsg = "Failed to create the full-texts directory for assignments_" + assignmentRequestCounter;
logger.error(errorMsg, e);
throw new RuntimeException(errorMsg + ": " + e.getMessage());
}
int tasksNumber = assignments.size();
int batchCount = 0;
int tasksCount = 0;
// Start loading and checking urls.
for ( Assignment assignment : assignments )
{
@ -121,17 +123,13 @@ public class PublicationsRetrieverPlugin {
}
return true;
});
}
// Invoke the tasks every time we reach the "jsonBatchSize" tasks, or we are at the end of the list.
tasksCount ++;
if ( (tasksCount == FileUtils.jsonBatchSize) || (tasksCount == tasksNumber) )
{
logger.info("Batch counter: " + (++batchCount) + " | progress: " + PublicationsRetriever.df.format((batchCount * tasksCount) * 100.0 / tasksNumber) + "% | every batch contains " + FileUtils.jsonBatchSize + " id-url pairs.");
LoaderAndChecker.invokeAllTasksAndWait(callableTasks);
addUrlReportsToWorkerReport();
callableTasks.clear(); // Reset the thread-tasks-list for the next batch.
}
}// end tasks-for-loop
int numFailedTasks = LoaderAndChecker.invokeAllTasksAndWait(callableTasks);
if ( numFailedTasks > 0 )
logger.warn(numFailedTasks + " tasks failed!");
addUrlReportsToWorkerReport();
callableTasks.clear(); // Reset the thread-tasks-list for the next batch.
}

View File

@ -103,8 +103,8 @@ public class AssignmentsHandler {
try {
PublicationsRetrieverPlugin.processAssignments(assignmentRequestCounter, assignmentsForPlugins.values());
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
logger.error("Exception when processing the assignments_" + assignmentRequestCounter, e);
} // In this case, we will either have an empty WorkerReport or a half-filled one. Either way, we want to report back to the Controller.
if ( askForTest ) {
logger.debug("UrlReports:"); // DEBUG!
@ -115,9 +115,10 @@ public class AssignmentsHandler {
postWorkerReport(assignmentRequestCounter);
isAvailableForWork = true; // State this after posting, to avoid breaking the "UrlReports" in the current or the next run.
// Also, since the worker has limited resources, it's better to finish sending the full-texts first and then request a new batch of assignments.
// Note: Cannot call this method here retrospectively, as if it runs 100s of times, the memory-stack may break..
// The scheduler will handle calling it every half an hour, in case the Worker is available for work..
// The scheduler will handle calling it every 15 mins, in case the Worker is available for work..
}

View File

@ -4,6 +4,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
@ -16,47 +17,35 @@ public class FilesZipper
public static File zipMultipleFilesAndGetZip(long assignmentsCounter, int zipBatchCounter, List<String> filesToZip, String baseDirectory)
{
File zipFile = null;
ZipOutputStream zos = null;
try {
String zipFilename = baseDirectory + "assignments_" + assignmentsCounter + "_full-texts_" + zipBatchCounter + ".zip";
// For example: assignments_2_full-texts_4.zip | where < 4 > is referred to the 4th batch of files requested by the controller.
zipFile = new File(zipFilename);
zos = new ZipOutputStream(new FileOutputStream(zipFile));
String zipFilename = baseDirectory + "assignments_" + assignmentsCounter + "_full-texts_" + zipBatchCounter + ".zip";
// For example: assignments_2_full-texts_4.zip | where < 4 > is referred to the 4th batch of files requested by the controller.
// Iterate over the given full-texts and add them to the zip.
for ( String file : filesToZip )
{
File zipFile = new File(zipFilename);
try ( ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(zipFile), StandardCharsets.UTF_8) )
{
for ( String file : filesToZip ) {
zipAFile(file, zos, baseDirectory);
}
} catch (Exception e) {
logger.error("", e);
logger.error("Exception when creating the zip-file: " + zipFilename, e);
return null;
} finally {
try {
if ( zos != null )
zos.close();
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
}
return zipFile;
}
private static boolean zipAFile(String fileName, ZipOutputStream zos, String baseDir)
private static final int BUFFER_SIZE = 3145728; // 3MB (average fullText-size)
private static final byte[] dataBuffer = new byte[BUFFER_SIZE];
// This method is "synchronized" to avoid any future problems with shared-buffer, if the requests are asynchronous.
private static synchronized boolean zipAFile(String fileName, ZipOutputStream zos, String baseDir)
{
final int BUFFER = 1048576; // 1 MB
byte[] data = new byte[BUFFER];
BufferedInputStream bis = null;
String fullFileName = baseDir + fileName;
try {
FileInputStream fis = new FileInputStream(fullFileName);
bis = new BufferedInputStream(fis, BUFFER);
try ( BufferedInputStream bis = new BufferedInputStream(new FileInputStream(fullFileName), BUFFER_SIZE) ) {
zos.putNextEntry(new ZipEntry(fileName));
int count;
while ( (count = bis.read(data, 0, BUFFER)) != -1 ) {
zos.write(data, 0, count);
while ( (count = bis.read(dataBuffer, 0, BUFFER_SIZE)) != -1 ) {
zos.write(dataBuffer, 0, count);
}
zos.closeEntry(); // close the entry here (not the ZipOutputStream)
} catch (FileNotFoundException fnfe) {
@ -66,13 +55,6 @@ public class FilesZipper
if ( ! e.getMessage().contains("duplicate") )
logger.error("Error zipping file: " + fullFileName, e);
return false;
} finally {
try {
if ( bis != null )
bis.close();
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
}
return true;
}

View File

@ -1,10 +1,12 @@
<configuration debug="false">
<appender name="File" class="ch.qos.logback.core.rolling.RollingFileAppender">
<appender name="RollingFile" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>logs/UrlsWorker.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.FixedWindowRollingPolicy">
<fileNamePattern>logs/UrlsWorker.%i.log.zip</fileNamePattern>
<minIndex>1</minIndex>
<maxIndex>20</maxIndex>
</rollingPolicy>
<triggeringPolicy class="ch.qos.logback.core.rolling.SizeBasedTriggeringPolicy">
@ -24,7 +26,7 @@
</appender>
<root level="debug">
<appender-ref ref="File" />
<appender-ref ref="RollingFile" />
</root>
</configuration>