diff --git a/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java b/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java index a3e8de3..6cd9bd1 100644 --- a/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java +++ b/src/main/java/eu/openaire/urls_worker/components/plugins/PublicationsRetrieverPlugin.java @@ -1,6 +1,7 @@ package eu.openaire.urls_worker.components.plugins; import eu.openaire.publications_retriever.PublicationsRetriever; +import eu.openaire.publications_retriever.models.IdUrlMimeTypeTriple; import eu.openaire.publications_retriever.util.args.ArgsUtils; import eu.openaire.publications_retriever.util.file.FileUtils; import eu.openaire.publications_retriever.util.http.ConnSupportUtils; @@ -110,7 +111,7 @@ public class PublicationsRetrieverPlugin { if ( (id == null) || id.isEmpty() || (url == null) || url.isEmpty() ) { String errorMsg = "Got null or empty pair! ID=" + id + " , url=" + url; logger.warn(errorMsg); - UrlUtils.addOutputData(id, url, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to input problems. " + errorMsg, null, true, "true", "false", "false", "false", "false", null, null); + UrlUtils.addOutputData(id, url, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to input problems. " + errorMsg, null, true, "true", "false", "false", "false", "false", null, null, null); return false; } @@ -121,13 +122,14 @@ public class PublicationsRetrieverPlugin { String sourceUrl = urlToCheck; // Hold it here for the logging-messages. if ( (urlToCheck = LoaderAndChecker.basicURLNormalizer.filter(sourceUrl)) == null ) { logger.warn("Could not normalize url: " + sourceUrl); - UrlUtils.addOutputData(id, sourceUrl, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to canonicalization's problems.", null, true, "true", "false", "false", "false", "false", null, null); + UrlUtils.addOutputData(id, sourceUrl, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, "Discarded at loading time, due to canonicalization's problems.", null, true, "true", "false", "false", "false", "false", null, null, null); LoaderAndChecker.connProblematicUrls.incrementAndGet(); return false; } - if ( UrlUtils.docOrDatasetUrlsWithIDs.containsKey(url) ) { // If we got into an already-found docUrl, log it and return. - ConnSupportUtils.handleReCrossedDocUrl(id, url, url, url, true); + IdUrlMimeTypeTriple originalIdUrlMimeTypeTriple = UrlUtils.docOrDatasetUrlsWithIDs.get(url); + if ( originalIdUrlMimeTypeTriple != null ) { // If we got into an already-found docUrl, log it and return. + ConnSupportUtils.handleReCrossedDocUrl(id, url, url, url, originalIdUrlMimeTypeTriple, true); return true; } @@ -147,7 +149,7 @@ public class PublicationsRetrieverPlugin { String wasUrlValid = list.get(0); String couldRetry = list.get(1); String errorMsg = "Discarded at loading time, as " + list.get(2); - UrlUtils.addOutputData(id, urlToCheck, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, errorMsg, null, true, "true", wasUrlValid, "false", "false", couldRetry, null, null); + UrlUtils.addOutputData(id, urlToCheck, null, UrlUtils.unreachableDocOrDatasetUrlIndicator, errorMsg, null, true, "true", wasUrlValid, "false", "false", couldRetry, null, null, null); return false; } return true;