- Reduce memory-consumption in the long-run, by clearing some underlying data-structures after a threshold.
- Update Gradle.
This commit is contained in:
parent
4cadaf98fc
commit
3d1faf4a8a
|
@ -1,5 +1,5 @@
|
||||||
distributionBase=GRADLE_USER_HOME
|
distributionBase=GRADLE_USER_HOME
|
||||||
distributionPath=wrapper/dists
|
distributionPath=wrapper/dists
|
||||||
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip
|
distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip
|
||||||
zipStoreBase=GRADLE_USER_HOME
|
zipStoreBase=GRADLE_USER_HOME
|
||||||
zipStorePath=wrapper/dists
|
zipStorePath=wrapper/dists
|
||||||
|
|
|
@ -31,7 +31,7 @@ if [[ ! -f $inputDataFile ]]; then
|
||||||
echo -e "\n\n"
|
echo -e "\n\n"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
gradleVersion="7.3.3"
|
gradleVersion="7.4"
|
||||||
|
|
||||||
if [[ justInstall -eq 0 ]]; then
|
if [[ justInstall -eq 0 ]]; then
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@ import eu.openaire.publications_retriever.util.file.FileUtils;
|
||||||
import eu.openaire.publications_retriever.util.http.ConnSupportUtils;
|
import eu.openaire.publications_retriever.util.http.ConnSupportUtils;
|
||||||
import eu.openaire.publications_retriever.util.http.HttpConnUtils;
|
import eu.openaire.publications_retriever.util.http.HttpConnUtils;
|
||||||
import eu.openaire.publications_retriever.util.url.DataToBeLogged;
|
import eu.openaire.publications_retriever.util.url.DataToBeLogged;
|
||||||
|
import eu.openaire.publications_retriever.util.url.GenericUtils;
|
||||||
import eu.openaire.publications_retriever.util.url.LoaderAndChecker;
|
import eu.openaire.publications_retriever.util.url.LoaderAndChecker;
|
||||||
import eu.openaire.publications_retriever.util.url.UrlUtils;
|
import eu.openaire.publications_retriever.util.url.UrlUtils;
|
||||||
import eu.openaire.urls_worker.UrlsWorkerApplication;
|
import eu.openaire.urls_worker.UrlsWorkerApplication;
|
||||||
|
@ -34,6 +35,9 @@ public class PublicationsRetrieverPlugin {
|
||||||
|
|
||||||
public static String assignmentsBasePath;
|
public static String assignmentsBasePath;
|
||||||
|
|
||||||
|
public static final long idUrlsToHandleBeforeClearingDomainAndPathTrackingData = 10_000_000;
|
||||||
|
public static final long idUrlsToHandleBeforeClearingDuplicateUrlsData = 1_000_000;
|
||||||
|
|
||||||
|
|
||||||
public PublicationsRetrieverPlugin() {
|
public PublicationsRetrieverPlugin() {
|
||||||
// Specify some configurations
|
// Specify some configurations
|
||||||
|
@ -147,6 +151,15 @@ public class PublicationsRetrieverPlugin {
|
||||||
|
|
||||||
UrlUtils.docOrDatasetUrlsWithIDs.clear(); // This HashTable is useful only for a single assignments-batch.
|
UrlUtils.docOrDatasetUrlsWithIDs.clear(); // This HashTable is useful only for a single assignments-batch.
|
||||||
// In the next batch, the previously stored files might have been already uploaded by the Controller and deleted by the worker. Also, they will be stored in a different directory anyway.
|
// In the next batch, the previously stored files might have been already uploaded by the Controller and deleted by the worker. Also, they will be stored in a different directory anyway.
|
||||||
|
|
||||||
|
// Every time we reach a "limit" of handled id-url clear some data-structures of the underlying "PublicationsRetriever" program.
|
||||||
|
// This helps with reducing the memory consumption over the period of weeks or months, and also give a 2nd chance to some domains which may be blocked due to a connectivity issues, but after a month they may be fine.
|
||||||
|
long idUrlPairsHandled = (assignmentRequestCounter * UrlsWorkerApplication.maxAssignmentsLimitPerBatch);
|
||||||
|
if ( idUrlPairsHandled >= idUrlsToHandleBeforeClearingDuplicateUrlsData )
|
||||||
|
UrlUtils.duplicateUrls.clear();
|
||||||
|
|
||||||
|
if ( idUrlPairsHandled >= idUrlsToHandleBeforeClearingDomainAndPathTrackingData )
|
||||||
|
GenericUtils.clearDomainAndPathTrackingData();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -179,7 +192,7 @@ public class PublicationsRetrieverPlugin {
|
||||||
fileLocation = data_2.getComment();
|
fileLocation = data_2.getComment();
|
||||||
size = data_2.getSize();
|
size = data_2.getSize();
|
||||||
hash = data_2.getHash();
|
hash = data_2.getHash();
|
||||||
mimeType = "application/pdf"; // TODO - If support is added for other doc-formats other than "pdf", then make sure the "mime_type" is correctly specified.
|
mimeType = "application/pdf"; // TODO - If support is added for other doc-formats other than "pdf", then make sure the "mime_type" is assigned correctly.
|
||||||
foundAlreadyDownloadedFullText = true;
|
foundAlreadyDownloadedFullText = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue