- Set the "ConnSupportUtils.shouldBlockMost5XXDomains" to "false" and call the "LoaderAndChecker.setCouldRetryRegex()" method. The above, make sure that for HTTP-5XX-errors, only the 511-domains get blocked and only the 511-urls get labeled with "noRetry".

- Improve performance and reduce memory consumption, by calling the "ConnSupportUtils.setKnownMimeTypes()" method only once, in the constructor-method.
- Code cleanup.
This commit is contained in:
Lampros Smyrnaios 2021-11-30 06:57:51 +02:00
parent 6355b3e397
commit 212f8f377d
2 changed files with 12 additions and 11 deletions

View File

@ -47,7 +47,7 @@ if [[ justInstall -eq 0 ]]; then
# Delete the directory with the source-code.
cd ../ && rm -rf PublicationsRetriever
# Clean and (re)build and run the project.
# Clean, (re)build and run the project.
cd ../
if [[ ! -d /opt/gradle/gradle-${gradleVersion} ]]; then

View File

@ -33,28 +33,30 @@ public class PublicationsRetrieverPlugin {
public static String assignmentsBasePath;
static {
public PublicationsRetrieverPlugin() {
// Specify some configurations
LoaderAndChecker.retrieveDocuments = true;
LoaderAndChecker.retrieveDatasets = false;
ConnSupportUtils.setKnownMimeTypes();
FileUtils.shouldDownloadDocFiles = true;
FileUtils.docFileNameType = FileUtils.DocFileNameType.idName;
PublicationsRetriever.targetUrlType = "docUrl";
FileUtils.jsonBatchSize = WorkerConstants.ASSIGNMENTS_LIMIT;
assignmentsBasePath = FileStorageService.assignmentsLocation.toString();
if ( !assignmentsBasePath.endsWith(File.separator) )
assignmentsBasePath += File.separator;
ConnSupportUtils.shouldBlockMost5XXDomains = false;
LoaderAndChecker.setCouldRetryRegex();
PublicationsRetriever.threadsMultiplier = 6;
int workerThreadsCount = Runtime.getRuntime().availableProcessors() * PublicationsRetriever.threadsMultiplier;
logger.info("Use " + workerThreadsCount + " worker-threads.");
PublicationsRetriever.executor = Executors.newFixedThreadPool(workerThreadsCount);
}
public PublicationsRetrieverPlugin() {
assignmentsBasePath = FileStorageService.assignmentsLocation.toString();
if ( !assignmentsBasePath.endsWith(File.separator) )
assignmentsBasePath += File.separator;
}
private static final List<Callable<Boolean>> callableTasks = new ArrayList<>(FileUtils.jsonBatchSize);
public static void processAssignments(Long assignmentRequestCounter, Collection<Assignment> assignments) throws RuntimeException, FileNotFoundException
@ -66,11 +68,10 @@ public class PublicationsRetrieverPlugin {
if ( !curAssignmentsDirs.mkdirs() ) { // Create the directories.
String workingDir = System.getProperty("user.dir") + File.separator;
logger.error("Could not create the \"assignments_fullTexts directories\": \"" + FileUtils.storeDocFilesDir + "\". Using the \"workingDir\" instead (" + workingDir + ").");
FileUtils.storeDocFilesDir = assignmentsBasePath = workingDir;
FileUtils.storeDocFilesDir = assignmentsBasePath = workingDir;
}
}
ConnSupportUtils.setKnownMimeTypes();
int tasksNumber = assignments.size();
int batchCount = 0;
int tasksCount = 0;