From 19a9bddab139a14b4f10f37560ed3dcd447c1926 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Tue, 10 Dec 2024 16:26:24 +0100 Subject: [PATCH] [gtr2 plugin] changed to try not to die if one publication link point to the website of the project --- .../eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java | 2 +- .../collection/plugin/gtr2/Gtr2PublicationsIterator.java | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java index 1adad104e3..8172456bb6 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.java @@ -160,7 +160,7 @@ public class ORCIDExtractor extends Thread { } } finally { for (SequenceFile.Writer k : fileMap.values()) { - log.info("Thread {}: Completed processed {} items", id, extractedItem); + log.info("Thread {}: Completed processed {} items", id, extractedItem); k.hflush(); k.close(); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIterator.java index add920ed79..4b24d3a035 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIterator.java @@ -152,6 +152,12 @@ public class Gtr2PublicationsIterator implements Iterator { } catch (final Throwable e) { log.error("Error dowloading url: {}, attempt = {}", cleanUrl, attempt, e); + if(attempt == -1) + try{ + DocumentHelper.parseText(""); + }catch(Throwable t){ + throw new RuntimeException(); + } if (attempt >= MAX_ATTEMPTS) { throw new RuntimeException("Error downloading url: " + cleanUrl, e); }