diff --git a/build.gradle b/build.gradle index 76d0ca6..e9c664c 100644 --- a/build.gradle +++ b/build.gradle @@ -8,7 +8,6 @@ group = 'eu.openaire.urls_worker' version = '1.0.0-SNAPSHOT' sourceCompatibility = '1.8' - repositories { mavenCentral() flatDir { @@ -16,7 +15,6 @@ repositories { } } - dependencies { runtimeOnly "org.springframework.boot:spring-boot-devtools" diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 84d1f85..d2880ba 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.1-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.2-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/installAndRun.sh b/installAndRun.sh index 60ffd25..4d9500c 100755 --- a/installAndRun.sh +++ b/installAndRun.sh @@ -28,7 +28,7 @@ if [[ ! -f $inputDataFile ]]; then echo -e "\n\n" fi -gradleVersion="7.3.1" +gradleVersion="7.3.2" if [[ justInstall -eq 0 ]]; then @@ -63,6 +63,13 @@ if [[ justInstall -eq 0 ]]; then export PATH=$PATH:/opt/gradle/gradle-${gradleVersion}/bin + # Update the max-heap-size based on the machine's physical memory. + machine_memory_mb=$(grep MemTotal /proc/meminfo | awk '{print $2}' | xargs -I {} echo "scale=4; {}/1024" | bc) # It returns the size in MB. + max_heap_size_mb=$(echo "($machine_memory_mb - 768)/1" | bc) # Leave 768 MB to the system (the "()/1" is used to take the floor value). + # Now, we replace the "-Xmx" parameter inside the "./build.gradle" file, with "-Xmx${max_heap_size}m" + echo -e "\n\nThe max-heap-size (-Xmx) will be set to: ${max_heap_size_mb}m\n\n" + sed -i "s/'-Xmx[0-9]\+[gm]'/'-Xmx${max_heap_size_mb}m'/g" ./build.gradle + gradle wrapper --gradle-version=${gradleVersion} --distribution-type=bin #gradle tasks # For debugging installation diff --git a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java index 3b8d8ec..d57d55f 100644 --- a/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java +++ b/src/main/java/eu/openaire/urls_worker/plugins/PublicationsRetrieverPlugin.java @@ -2,7 +2,6 @@ package eu.openaire.urls_worker.plugins; import edu.uci.ics.crawler4j.url.URLCanonicalizer; import eu.openaire.publications_retriever.PublicationsRetriever; -import eu.openaire.publications_retriever.exceptions.DocFileNotRetrievedException; import eu.openaire.publications_retriever.util.file.FileUtils; import eu.openaire.publications_retriever.util.http.ConnSupportUtils; import eu.openaire.publications_retriever.util.http.HttpConnUtils; @@ -136,8 +135,6 @@ public class PublicationsRetrieverPlugin { } - private static final String DocFileNotRetrievedExceptionName = DocFileNotRetrievedException.class.getSimpleName(); // Keep it here for easily spot if the exception changes inside the PublicationsRetriever library. - public static void addUrlReportsToWorkerReport() { Timestamp timestamp = new Timestamp(System.currentTimeMillis()); // Store it here, in order to have the same for all current records. @@ -149,7 +146,7 @@ public class PublicationsRetrieverPlugin { Long size = data.getSize(); Error error = null; - if ( data.getWasDocumentOrDatasetAccessible().equals("true") ) + if ( "true".equals(data.getWasDocumentOrDatasetAccessible()) ) // The reversed order defends against a potential NPE. { status = UrlReport.StatusType.accessible; if ( comment.contains(UrlUtils.alreadyDownloadedByIDMessage) ) { @@ -168,15 +165,18 @@ public class PublicationsRetrieverPlugin { } // TODO - The case where the "twin-ID" is not found, should "never" happen. But should we check? How to handle if that is the case..? } - else if ( ! comment.contains(DocFileNotRetrievedExceptionName) ) { // If it was downloaded without an error. + else if ( ! comment.equals(HttpConnUtils.docFileNotRetrievedMessage) ) { // If it was downloaded without an error. fileLocation = comment; // This is the full-file-path. mimeType = "application/pdf"; - } - error = new Error(null, null); // We do not want to send a "null" object, since it just adds more complicated handling in the controller.. + } else // Else the file was not retrieved, so all file-related data are kept "null". + error = new Error(Error.ErrorType.couldRetry, comment); // We can still try to download it in the future. + + if ( error == null ) // If the file was retrieved, in any time. + error = new Error(Error.ErrorType.couldRetry, null); // We do not want to send a "null" object, since it just adds more complicated handling in the controller.. } else { status = UrlReport.StatusType.non_accessible; - if ( data.getCouldRetry().equals("true") ) + if ( "true".equals(data.getCouldRetry()) ) error = new Error(Error.ErrorType.couldRetry, comment); else error = new Error(Error.ErrorType.noRetry, comment);