Use newer api form "PublicationsRetriever" software.

This commit is contained in:
Lampros Smyrnaios 2025-01-11 14:07:03 +02:00
parent b70ae3ed58
commit b4391fa599
1 changed files with 7 additions and 5 deletions

View File

@ -1,6 +1,7 @@
package eu.openaire.urls_worker.components.plugins; package eu.openaire.urls_worker.components.plugins;
import eu.openaire.publications_retriever.PublicationsRetriever; import eu.openaire.publications_retriever.PublicationsRetriever;
import eu.openaire.publications_retriever.models.IdUrlMimeTypeTriple;
import eu.openaire.publications_retriever.util.args.ArgsUtils; import eu.openaire.publications_retriever.util.args.ArgsUtils;
import eu.openaire.publications_retriever.util.file.FileUtils; import eu.openaire.publications_retriever.util.file.FileUtils;
import eu.openaire.publications_retriever.util.http.ConnSupportUtils; import eu.openaire.publications_retriever.util.http.ConnSupportUtils;
@ -110,7 +111,7 @@ public class PublicationsRetrieverPlugin {
if ( (id == null) || id.isEmpty() || (url == null) || url.isEmpty() ) { if ( (id == null) || id.isEmpty() || (url == null) || url.isEmpty() ) {
String errorMsg = "Got null or empty pair! ID=" + id + " , url=" + url; String errorMsg = "Got null or empty pair! ID=" + id + " , url=" + url;
logger.warn(errorMsg); logger.warn(errorMsg);
UrlUtils.addOutputData(id, url, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to input problems. " + errorMsg, null, true, "true", "false", "false", "false", "false", null, null); UrlUtils.addOutputData(id, url, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to input problems. " + errorMsg, null, true, "true", "false", "false", "false", "false", null, null, null);
return false; return false;
} }
@ -121,13 +122,14 @@ public class PublicationsRetrieverPlugin {
String sourceUrl = urlToCheck; // Hold it here for the logging-messages. String sourceUrl = urlToCheck; // Hold it here for the logging-messages.
if ( (urlToCheck = LoaderAndChecker.basicURLNormalizer.filter(sourceUrl)) == null ) { if ( (urlToCheck = LoaderAndChecker.basicURLNormalizer.filter(sourceUrl)) == null ) {
logger.warn("Could not normalize url: " + sourceUrl); logger.warn("Could not normalize url: " + sourceUrl);
UrlUtils.addOutputData(id, sourceUrl, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to canonicalization's problems.", null, true, "true", "false", "false", "false", "false", null, null); UrlUtils.addOutputData(id, sourceUrl, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to canonicalization's problems.", null, true, "true", "false", "false", "false", "false", null, null, null);
LoaderAndChecker.connProblematicUrls.incrementAndGet(); LoaderAndChecker.connProblematicUrls.incrementAndGet();
return false; return false;
} }
if ( UrlUtils.docOrDatasetUrlsWithIDs.containsKey(url) ) { // If we got into an already-found docUrl, log it and return. IdUrlMimeTypeTriple originalIdUrlMimeTypeTriple = UrlUtils.docOrDatasetUrlsWithIDs.get(url);
ConnSupportUtils.handleReCrossedDocUrl(id, url, url, url, true); if ( originalIdUrlMimeTypeTriple != null ) { // If we got into an already-found docUrl, log it and return.
ConnSupportUtils.handleReCrossedDocUrl(id, url, url, url, originalIdUrlMimeTypeTriple, true);
return true; return true;
} }
@ -147,7 +149,7 @@ public class PublicationsRetrieverPlugin {
String wasUrlValid = list.get(0); String wasUrlValid = list.get(0);
String couldRetry = list.get(1); String couldRetry = list.get(1);
String errorMsg = "Discarded at loading time, as " + list.get(2); String errorMsg = "Discarded at loading time, as " + list.get(2);
UrlUtils.addOutputData(id, urlToCheck, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, errorMsg, null, true, "true", wasUrlValid, "false", "false", couldRetry, null, null); UrlUtils.addOutputData(id, urlToCheck, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, errorMsg, null, true, "true", wasUrlValid, "false", "false", couldRetry, null, null, null);
return false; return false;
} }
return true; return true;