- Calculate and set the max heap size with respect to the system resources, in "installAndRun.sh".
- Fix not setting the right "Error"-members when the docUrl was found, but the full-text was not retrieved. - Set a "couldRetry"-indication in the "Error"-class, when the full-text was retrieved, as, in general, it could be retried to give the same successful result. - Update the "docFileNotRetrieved"-check to use the standardized string. - Eliminate some possible NPEs. - Update Gradle.
This commit is contained in:
parent
0db35a83e7
commit
82d69f3bf5
|
@ -8,7 +8,6 @@ group = 'eu.openaire.urls_worker'
|
||||||
version = '1.0.0-SNAPSHOT'
|
version = '1.0.0-SNAPSHOT'
|
||||||
sourceCompatibility = '1.8'
|
sourceCompatibility = '1.8'
|
||||||
|
|
||||||
|
|
||||||
repositories {
|
repositories {
|
||||||
mavenCentral()
|
mavenCentral()
|
||||||
flatDir {
|
flatDir {
|
||||||
|
@ -16,7 +15,6 @@ repositories {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
runtimeOnly "org.springframework.boot:spring-boot-devtools"
|
runtimeOnly "org.springframework.boot:spring-boot-devtools"
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
distributionBase=GRADLE_USER_HOME
|
distributionBase=GRADLE_USER_HOME
|
||||||
distributionPath=wrapper/dists
|
distributionPath=wrapper/dists
|
||||||
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.1-bin.zip
|
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.2-bin.zip
|
||||||
zipStoreBase=GRADLE_USER_HOME
|
zipStoreBase=GRADLE_USER_HOME
|
||||||
zipStorePath=wrapper/dists
|
zipStorePath=wrapper/dists
|
||||||
|
|
|
@ -28,7 +28,7 @@ if [[ ! -f $inputDataFile ]]; then
|
||||||
echo -e "\n\n"
|
echo -e "\n\n"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
gradleVersion="7.3.1"
|
gradleVersion="7.3.2"
|
||||||
|
|
||||||
if [[ justInstall -eq 0 ]]; then
|
if [[ justInstall -eq 0 ]]; then
|
||||||
|
|
||||||
|
@ -63,6 +63,13 @@ if [[ justInstall -eq 0 ]]; then
|
||||||
|
|
||||||
export PATH=$PATH:/opt/gradle/gradle-${gradleVersion}/bin
|
export PATH=$PATH:/opt/gradle/gradle-${gradleVersion}/bin
|
||||||
|
|
||||||
|
# Update the max-heap-size based on the machine's physical memory.
|
||||||
|
machine_memory_mb=$(grep MemTotal /proc/meminfo | awk '{print $2}' | xargs -I {} echo "scale=4; {}/1024" | bc) # It returns the size in MB.
|
||||||
|
max_heap_size_mb=$(echo "($machine_memory_mb - 768)/1" | bc) # Leave 768 MB to the system (the "()/1" is used to take the floor value).
|
||||||
|
# Now, we replace the "-Xmx" parameter inside the "./build.gradle" file, with "-Xmx${max_heap_size}m"
|
||||||
|
echo -e "\n\nThe max-heap-size (-Xmx) will be set to: ${max_heap_size_mb}m\n\n"
|
||||||
|
sed -i "s/'-Xmx[0-9]\+[gm]'/'-Xmx${max_heap_size_mb}m'/g" ./build.gradle
|
||||||
|
|
||||||
gradle wrapper --gradle-version=${gradleVersion} --distribution-type=bin
|
gradle wrapper --gradle-version=${gradleVersion} --distribution-type=bin
|
||||||
|
|
||||||
#gradle tasks # For debugging installation
|
#gradle tasks # For debugging installation
|
||||||
|
|
|
@ -2,7 +2,6 @@ package eu.openaire.urls_worker.plugins;
|
||||||
|
|
||||||
import edu.uci.ics.crawler4j.url.URLCanonicalizer;
|
import edu.uci.ics.crawler4j.url.URLCanonicalizer;
|
||||||
import eu.openaire.publications_retriever.PublicationsRetriever;
|
import eu.openaire.publications_retriever.PublicationsRetriever;
|
||||||
import eu.openaire.publications_retriever.exceptions.DocFileNotRetrievedException;
|
|
||||||
import eu.openaire.publications_retriever.util.file.FileUtils;
|
import eu.openaire.publications_retriever.util.file.FileUtils;
|
||||||
import eu.openaire.publications_retriever.util.http.ConnSupportUtils;
|
import eu.openaire.publications_retriever.util.http.ConnSupportUtils;
|
||||||
import eu.openaire.publications_retriever.util.http.HttpConnUtils;
|
import eu.openaire.publications_retriever.util.http.HttpConnUtils;
|
||||||
|
@ -136,8 +135,6 @@ public class PublicationsRetrieverPlugin {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static final String DocFileNotRetrievedExceptionName = DocFileNotRetrievedException.class.getSimpleName(); // Keep it here for easily spot if the exception changes inside the PublicationsRetriever library.
|
|
||||||
|
|
||||||
public static void addUrlReportsToWorkerReport()
|
public static void addUrlReportsToWorkerReport()
|
||||||
{
|
{
|
||||||
Timestamp timestamp = new Timestamp(System.currentTimeMillis()); // Store it here, in order to have the same for all current records.
|
Timestamp timestamp = new Timestamp(System.currentTimeMillis()); // Store it here, in order to have the same for all current records.
|
||||||
|
@ -149,7 +146,7 @@ public class PublicationsRetrieverPlugin {
|
||||||
Long size = data.getSize();
|
Long size = data.getSize();
|
||||||
Error error = null;
|
Error error = null;
|
||||||
|
|
||||||
if ( data.getWasDocumentOrDatasetAccessible().equals("true") )
|
if ( "true".equals(data.getWasDocumentOrDatasetAccessible()) ) // The reversed order defends against a potential NPE.
|
||||||
{
|
{
|
||||||
status = UrlReport.StatusType.accessible;
|
status = UrlReport.StatusType.accessible;
|
||||||
if ( comment.contains(UrlUtils.alreadyDownloadedByIDMessage) ) {
|
if ( comment.contains(UrlUtils.alreadyDownloadedByIDMessage) ) {
|
||||||
|
@ -168,15 +165,18 @@ public class PublicationsRetrieverPlugin {
|
||||||
}
|
}
|
||||||
// TODO - The case where the "twin-ID" is not found, should "never" happen. But should we check? How to handle if that is the case..?
|
// TODO - The case where the "twin-ID" is not found, should "never" happen. But should we check? How to handle if that is the case..?
|
||||||
}
|
}
|
||||||
else if ( ! comment.contains(DocFileNotRetrievedExceptionName) ) { // If it was downloaded without an error.
|
else if ( ! comment.equals(HttpConnUtils.docFileNotRetrievedMessage) ) { // If it was downloaded without an error.
|
||||||
fileLocation = comment; // This is the full-file-path.
|
fileLocation = comment; // This is the full-file-path.
|
||||||
mimeType = "application/pdf";
|
mimeType = "application/pdf";
|
||||||
}
|
} else // Else the file was not retrieved, so all file-related data are kept "null".
|
||||||
error = new Error(null, null); // We do not want to send a "null" object, since it just adds more complicated handling in the controller..
|
error = new Error(Error.ErrorType.couldRetry, comment); // We can still try to download it in the future.
|
||||||
|
|
||||||
|
if ( error == null ) // If the file was retrieved, in any time.
|
||||||
|
error = new Error(Error.ErrorType.couldRetry, null); // We do not want to send a "null" object, since it just adds more complicated handling in the controller..
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
status = UrlReport.StatusType.non_accessible;
|
status = UrlReport.StatusType.non_accessible;
|
||||||
if ( data.getCouldRetry().equals("true") )
|
if ( "true".equals(data.getCouldRetry()) )
|
||||||
error = new Error(Error.ErrorType.couldRetry, comment);
|
error = new Error(Error.ErrorType.couldRetry, comment);
|
||||||
else
|
else
|
||||||
error = new Error(Error.ErrorType.noRetry, comment);
|
error = new Error(Error.ErrorType.noRetry, comment);
|
||||||
|
|
Loading…
Reference in New Issue