From ebf53a1616b256de90aa62255d5c4e2b13d34237 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 15 Sep 2021 16:10:37 +0200 Subject: [PATCH 1/3] added cleaning for relation fields: subRelType & relClass according to dedicated vocabs --- .../dhp/oa/graph/clean/CleaningRuleMap.java | 31 ++++++++--- .../clean/GraphCleaningFunctionsTest.java | 55 ++++++++++++------- .../dnetlib/dhp/oa/graph/clean/relation.json | 10 ++++ .../dnetlib/dhp/oa/graph/clean/synonyms.txt | 12 +++- .../eu/dnetlib/dhp/oa/graph/clean/terms.txt | 39 ++++++++++++- pom.xml | 2 +- 6 files changed, 117 insertions(+), 32 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java index 95aa749b24..7a35832892 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java @@ -12,6 +12,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.AccessRight; import eu.dnetlib.dhp.schema.oaf.Country; import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Relation; public class CleaningRuleMap extends HashMap, SerializableConsumer> implements Serializable { @@ -24,17 +25,31 @@ public class CleaningRuleMap extends HashMap, SerializableConsumer cleanQualifier(vocabularies, (Qualifier) o)); mapping.put(AccessRight.class, o -> cleanQualifier(vocabularies, (AccessRight) o)); - mapping.put(Country.class, o -> { - final Country c = (Country) o; - if (StringUtils.isBlank(c.getSchemeid())) { - c.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE); - c.setSchemename(ModelConstants.DNET_COUNTRY_TYPE); - } - cleanQualifier(vocabularies, c); - }); + mapping.put(Country.class, o -> cleanCountry(vocabularies, (Country) o)); + mapping.put(Relation.class, o -> cleanRelation(vocabularies, (Relation) o)); return mapping; } + private static void cleanRelation(VocabularyGroup vocabularies, Relation r) { + if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_SUBRELTYPE)) { + Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_SUBRELTYPE, r.getSubRelType()); + r.setSubRelType(newValue.getClassid()); + } + if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_RELCLASS)) { + Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_RELCLASS, r.getRelClass()); + r.setRelClass(newValue.getClassid()); + } + } + + private static void cleanCountry(VocabularyGroup vocabularies, Country o) { + final Country c = o; + if (StringUtils.isBlank(c.getSchemeid())) { + c.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE); + c.setSchemename(ModelConstants.DNET_COUNTRY_TYPE); + } + cleanQualifier(vocabularies, c); + } + private static void cleanQualifier(VocabularyGroup vocabularies, Q q) { if (vocabularies.vocabularyExists(q.getSchemeid())) { Qualifier newValue = vocabularies.lookup(q.getSchemeid(), q.getClassid()); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index edcd72ab43..42d9f226cd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -5,6 +5,7 @@ import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.lenient; import java.io.IOException; +import java.util.Collection; import java.util.List; import java.util.Set; import java.util.stream.Stream; @@ -16,12 +17,12 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -29,7 +30,8 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class GraphCleaningFunctionsTest { - public static final ObjectMapper MAPPER = new ObjectMapper(); + public static final ObjectMapper MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @Mock private ISLookUpService isLookUpService; @@ -49,6 +51,23 @@ public class GraphCleaningFunctionsTest { mapping = CleaningRuleMap.create(vocabularies); } + @Test + void testCleanRelations() throws Exception { + + List lines = IOUtils + .readLines(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/relation.json")); + for (String json : lines) { + Relation r_in = MAPPER.readValue(json, Relation.class); + assertNotNull(r_in); + + assertFalse(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_in.getRelClass())); + + Relation r_out = OafCleaner.apply(r_in, mapping); + assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_out.getRelClass())); + assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_SUBRELTYPE).contains(r_out.getSubRelType())); + } + } + @Test void testCleaning() throws Exception { @@ -87,7 +106,7 @@ public class GraphCleaningFunctionsTest { p_out .getPid() .stream() - .map(p -> p.getQualifier()) + .map(StructuredProperty::getQualifier) .allMatch(q -> pidTerms.contains(q.getClassid()))); List poi = p_out.getInstance(); @@ -101,8 +120,8 @@ public class GraphCleaningFunctionsTest { assertEquals(2, poii.getPid().size()); assertTrue( - poii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent()); - assertTrue(poii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent()); + poii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1007/s109090161569x"))); + assertTrue(poii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1008/abcd"))); assertNotNull(poii.getAlternateIdentifier()); assertEquals(2, poii.getAlternateIdentifier().size()); @@ -111,16 +130,12 @@ public class GraphCleaningFunctionsTest { poii .getAlternateIdentifier() .stream() - .filter(s -> s.getValue().equals("10.1007/s109090161569x")) - .findFirst() - .isPresent()); + .anyMatch(s -> s.getValue().equals("10.1007/s109090161569x"))); assertTrue( poii .getAlternateIdentifier() .stream() - .filter(s -> s.getValue().equals("10.1009/qwerty")) - .findFirst() - .isPresent()); + .anyMatch(s -> s.getValue().equals("10.1009/qwerty"))); Publication p_cleaned = GraphCleaningFunctions.cleanup(p_out); @@ -142,8 +157,8 @@ public class GraphCleaningFunctionsTest { assertEquals(2, pcii.getPid().size()); assertTrue( - pcii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent()); - assertTrue(pcii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent()); + pcii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1007/s109090161569x"))); + assertTrue(pcii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1008/abcd"))); assertNotNull(pcii.getAlternateIdentifier()); assertEquals(1, pcii.getAlternateIdentifier().size()); @@ -151,9 +166,7 @@ public class GraphCleaningFunctionsTest { pcii .getAlternateIdentifier() .stream() - .filter(s -> s.getValue().equals("10.1009/qwerty")) - .findFirst() - .isPresent()); + .anyMatch(s -> s.getValue().equals("10.1009/qwerty"))); getAuthorPids(p_cleaned).forEach(pid -> { System.out @@ -172,17 +185,17 @@ public class GraphCleaningFunctionsTest { return pub .getAuthor() .stream() - .map(a -> a.getPid()) - .flatMap(p -> p.stream()) - .map(s -> s.getQualifier()); + .map(Author::getPid) + .flatMap(Collection::stream) + .map(StructuredProperty::getQualifier); } private Stream getAuthorPids(Result pub) { return pub .getAuthor() .stream() - .map(a -> a.getPid()) - .flatMap(p -> p.stream()); + .map(Author::getPid) + .flatMap(Collection::stream); } private List vocs() throws IOException { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json new file mode 100644 index 0000000000..97764de009 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json @@ -0,0 +1,10 @@ +{"relType":"resultResult","subRelType":"citation","relClass":"cites","source":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","target":"50|openaire____::007a4870b31056f89b768cf508e1538e"} +{"relType":"resultResult","subRelType":"citation","relClass":"isCitedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"part","relClass":"isPartOf","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"part","relClass":"hasPart","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"review","relClass":"isReviewedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"review","relClass":"reviews","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"relationship","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"publicationDataset","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt index 729296522b..79dc7cd2d4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt @@ -1231,4 +1231,14 @@ dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-学術雑誌論文(査 dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-紀要論文(査読有り) dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-雑誌記事(査読有り) dnet:review_levels @=@ 0001 @=@ 原著論文(査読有り) -dnet:review_levels @=@ 0001 @=@ 査読論文 \ No newline at end of file +dnet:review_levels @=@ 0001 @=@ 査読論文 +dnet:relation_relClass @=@ Cites @=@ cites +dnet:relation_relClass @=@ IsCitedBy @=@ isCitedBy +dnet:relation_relClass @=@ HasPart @=@ hasPart +dnet:relation_relClass @=@ IsPartOf @=@ isPartOf +dnet:relation_relClass @=@ IsReviewedBy @=@ isReviewedBy +dnet:relation_relClass @=@ Reviews @=@ reviews +dnet:relation_relClass @=@ IsSupplementTo @=@ isSupplementTo +dnet:relation_relClass @=@ IsSupplementedBy @=@ isSupplementedBy +dnet:relation_relClass @=@ IsRelatedTo @=@ isRelatedTo +dnet:relation_subRelType @=@ relationship @=@ publicationDataset \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt index ba47aaf5c8..bb1e5fbf98 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt @@ -1079,4 +1079,41 @@ dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/DATASET/IS_SUPPLEMENTED dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/AUTHOR/ORCID @=@ An Open Researcher and Contributor ID (ORCID) that can be associated to an author of your publications dnet:review_levels @=@ dnet:review_levels @=@ 0000 @=@ Unknown dnet:review_levels @=@ dnet:review_levels @=@ 0002 @=@ nonPeerReviewed -dnet:review_levels @=@ dnet:review_levels @=@ 0001 @=@ peerReviewed \ No newline at end of file +dnet:review_levels @=@ dnet:review_levels @=@ 0001 @=@ peerReviewed +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Cites @=@ Cites +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsCitedBy @=@ IsCitedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ HasPart @=@ HasPart +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsPartOf @=@ IsPartOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsReviewedBy @=@ IsReviewedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Reviews @=@ Reviews +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsSupplementTo @=@ IsSupplementTo +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsSupplementedBy @=@ IsSupplementedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsRelatedTo @=@ IsRelatedTo +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Compiles @=@ Compiles +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Continues @=@ Continues +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Documents @=@ Documents +dnet:relation_relClass @=@ dnet:relation_relClass @=@ HasAmongTopNSimilarDocuments @=@ HasAmongTopNSimilarDocuments +dnet:relation_relClass @=@ dnet:relation_relClass @=@ HasVersion @=@ HasVersion +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsAmongTopNSimilarDocuments @=@ IsAmongTopNSimilarDocuments +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsCompiledBy @=@ IsCompiledBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsContinuedBy @=@ IsContinuedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsDerivedFrom @=@ IsDerivedFrom +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsDocumentedBy @=@ IsDocumentedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsNewVersionOf @=@ IsNewVersionOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsObsoletedBy @=@ IsObsoletedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsOriginalFormOf @=@ IsOriginalFormOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsPreviousVersionOf @=@ IsPreviousVersionOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsSourceOf @=@ IsSourceOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsVariantFormOf @=@ IsVariantFormOf +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ affiliation @=@ affiliation +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ citation @=@ citation +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ dedup @=@ dedup +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ outcome @=@ outcome +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ part @=@ part +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ participation @=@ participation +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ provision @=@ provision +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ relationship @=@ relationship +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ review @=@ review +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ similarity @=@ similarity +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ supplement @=@ supplement +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ version @=@ version \ No newline at end of file diff --git a/pom.xml b/pom.xml index 99525ef85a..61b0ad8733 100644 --- a/pom.xml +++ b/pom.xml @@ -753,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.17] + [2.7.18] [4.0.3] [6.0.5] [3.1.6] From 663b1556d7adc0b9f8a94f75d99c299e216b08a1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 15 Sep 2021 16:40:25 +0200 Subject: [PATCH 2/3] manually integrating PR#140 https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/140 --- .../main/java/eu/dnetlib/dhp/common/Constants.java | 6 ++++++ .../dhp/common/collection/HttpConnector2.java | 14 +++++++++++++- .../collection/plugin/oai/OaiCollectorPlugin.java | 4 ++-- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java index 8fab94e92f..a62a0ac799 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java @@ -52,4 +52,10 @@ public class Constants { public static final String CONTENT_INVALIDRECORDS = "InvalidRecords"; public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems"; + // IETF Draft and used by Repositories like ZENODO , not included in APACHE HTTP java packages + // see https://ietf-wg-httpapi.github.io/ratelimit-headers/draft-ietf-httpapi-ratelimit-headers.html + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT = "X-RateLimit-Limit"; + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING = "X-RateLimit-Remaining"; + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_RESET = "X-RateLimit-Reset"; + } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index 724f5f0e1d..dd46ab1f4b 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -15,12 +15,13 @@ import org.apache.http.HttpHeaders; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import eu.dnetlib.dhp.common.Constants; import eu.dnetlib.dhp.common.aggregation.AggregatorReport; /** * Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java * - * @author jochen, michele, andrea, alessia, claudio + * @author jochen, michele, andrea, alessia, claudio, andreas */ public class HttpConnector2 { @@ -112,6 +113,17 @@ public class HttpConnector2 { } int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); + String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT); + String rateRemaining = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING); + + if ((rateLimit != null) && (rateRemaining != null) && (Integer.parseInt(rateRemaining) < 2)) { + if (retryAfter > 0) { + backoffAndSleep(retryAfter); + } else { + backoffAndSleep(1000); + } + } + if (is2xx(urlConn.getResponseCode())) { input = urlConn.getInputStream(); responseType = urlConn.getContentType(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java index 878e286e0a..2d04b25742 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java @@ -66,11 +66,11 @@ public class OaiCollectorPlugin implements CollectorPlugin { } if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) { - throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate); + throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + fromDate); } if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) { - throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate); + throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + untilDate); } final Iterator> iters = sets From e9ccdf853f128a144fb193546553fa1bdcd10399 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 15 Sep 2021 18:44:54 +0200 Subject: [PATCH 3/3] related to https://code-repo.d4science.org/D-Net/dnet-hadoop/issues/132 --- .../java/eu/dnetlib/dhp/PropagationConstant.java | 15 ++++++++------- .../SparkOrcidToResultFromSemRelJob.java | 5 +++-- ...SparkResultToCommunityFromOrganizationJob.java | 4 +++- .../SparkResultToCommunityThroughSemRelJob.java | 4 +++- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 0b4a80b2de..0d7c74475e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -68,27 +68,28 @@ public class PropagationConstant { getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_COUNTRY_INSTREPO_CLASS_ID, - PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME)); + PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS)); return nc; } public static DataInfo getDataInfo( - String inference_provenance, String inference_class_id, String inference_class_name) { + String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema) { DataInfo di = new DataInfo(); di.setInferred(true); di.setDeletedbyinference(false); di.setTrust("0.85"); di.setInferenceprovenance(inference_provenance); - di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name)); + di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name, qualifierSchema)); return di; } - public static Qualifier getQualifier(String inference_class_id, String inference_class_name) { + public static Qualifier getQualifier(String inference_class_id, String inference_class_name, String qualifierSchema) { Qualifier pa = new Qualifier(); pa.setClassid(inference_class_id); pa.setClassname(inference_class_name); - pa.setSchemeid(ModelConstants.DNET_PID_TYPES); - pa.setSchemename(ModelConstants.DNET_PID_TYPES); + pa.setSchemeid(qualifierSchema); + pa.setSchemename(qualifierSchema); return pa; } @@ -107,7 +108,7 @@ public class PropagationConstant { r.setRelClass(rel_class); r.setRelType(rel_type); r.setSubRelType(subrel_type); - r.setDataInfo(getDataInfo(inference_provenance, inference_class_id, inference_class_name)); + r.setDataInfo(getDataInfo(inference_provenance, inference_class_id, inference_class_name, ModelConstants.DNET_PROVENANCE_ACTIONS)); return r; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 40faef7f33..68949b9004 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -173,13 +173,14 @@ public class SparkOrcidToResultFromSemRelJob { if (toaddpid) { StructuredProperty p = new StructuredProperty(); p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME)); + p.setQualifier(getQualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES)); p .setDataInfo( getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, - PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME)); + PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS)); Optional> authorPid = Optional.ofNullable(author.getPid()); if (authorPid.isPresent()) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index cb80a90ca9..1289ff644f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -10,6 +10,7 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -128,7 +129,8 @@ public class SparkResultToCommunityFromOrganizationJob { getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID, - PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME))); + PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS))); propagatedContexts.add(newContext); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index 3690351fb8..7f76ead94b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -124,7 +125,8 @@ public class SparkResultToCommunityThroughSemRelJob { getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, - PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME))); + PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS))); return newContext; } return null;