From 99cfb027facae6727a7dc8f6c5f3e353dce324c4 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 23 Apr 2021 17:09:36 +0200 Subject: [PATCH 01/16] making ODF record parsing namespace unaware (#6629) --- .../dhp/oa/graph/raw/OdfToOafMapper.java | 81 +++++++++++-------- 1 file changed, 46 insertions(+), 35 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 683b37630..c2c2cb645 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -40,19 +40,20 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareTitles(final Document doc, final DataInfo info) { - return prepareListStructProps(doc, "//datacite:title", MAIN_TITLE_QUALIFIER, info); + return prepareListStructProps( + doc, "//*[local-name()='titles']/*[local-name()='title']", MAIN_TITLE_QUALIFIER, info); } @Override protected List prepareAuthors(final Document doc, final DataInfo info) { final List res = new ArrayList<>(); int pos = 1; - for (final Object o : doc.selectNodes("//datacite:creator")) { + for (final Object o : doc.selectNodes("//*[local-name()='creator']")) { final Node n = (Node) o; final Author author = new Author(); - final String fullname = n.valueOf("./datacite:creatorName"); - final String name = n.valueOf("./datacite:givenName"); - final String surname = n.valueOf("./datacite:familyName"); + final String fullname = n.valueOf("./*[local-name()='creatorName']"); + final String name = n.valueOf("./*[local-name()='givenName']"); + final String surname = n.valueOf("./*[local-name()='familyName']"); if (StringUtils.isNotBlank(fullname) || StringUtils.isNotBlank(name) || StringUtils.isNotBlank(surname)) { author.setFullname(fullname); @@ -74,7 +75,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { author.setFullname(String.format("%s, %s", author.getSurname(), author.getName())); } - author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info)); + author.setAffiliation(prepareListFields(n, "./*[local-name()='affiliation']", info)); author.setPid(preparePids(n, info)); author.setRank(pos++); res.add(author); @@ -85,7 +86,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { private List preparePids(final Node n, final DataInfo info) { final List res = new ArrayList<>(); - for (final Object o : n.selectNodes("./datacite:nameIdentifier")) { + for (final Object o : n.selectNodes("./*[local-name()='nameIdentifier']")) { final String id = ((Node) o).getText(); final String type = ((Node) o) @@ -128,23 +129,25 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); final Set url = new HashSet<>(); - for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) { + for (final Object o : doc + .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='URL']")) { url.add(((Node) o).getText().trim()); } for (final Object o : doc - .selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='landingPage']")) { + .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='landingPage']")) { url.add(((Node) o).getText().trim()); } - for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) { + for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='URL']")) { url.add(((Node) o).getText().trim()); } - for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='landingPage']")) { + for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='landingPage']")) { url.add(((Node) o).getText().trim()); } - for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) { + for (final Object o : doc + .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='DOI']")) { url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); } - for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) { + for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='DOI']")) { url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); } if (!url.isEmpty()) { @@ -162,7 +165,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareRelevantDates(final Document doc, final DataInfo info) { final List res = new ArrayList<>(); - for (final Object o : doc.selectNodes("//datacite:date")) { + for (final Object o : doc.selectNodes("//*[local-name()='date']")) { final String dateType = ((Node) o).valueOf("@dateType"); if (StringUtils.isBlank(dateType) || (!dateType.equalsIgnoreCase("Accepted") @@ -192,32 +195,32 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List> prepareContributors(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//datacite:contributorName", info); + return prepareListFields(doc, "//*[local-name()='contributorName']", info); } @Override protected List> prepareFormats(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//datacite:format", info); + return prepareListFields(doc, "//*[local-name()=':format']", info); } @Override protected Field preparePublisher(final Document doc, final DataInfo info) { - return prepareField(doc, "//datacite:publisher", info); + return prepareField(doc, "//*[local-name()=':publisher']", info); } @Override protected List> prepareDescriptions(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//datacite:description[@descriptionType='Abstract']", info); + return prepareListFields(doc, "//*[local-name()='description' and ./@descriptionType='Abstract']", info); } @Override protected List prepareSubjects(final Document doc, final DataInfo info) { - return prepareListStructProps(doc, "//datacite:subject", info); + return prepareListStructProps(doc, "//*[local-name()='subject']", info); } @Override protected Qualifier prepareLanguages(final Document doc) { - return prepareQualifier(doc, "//datacite:language", DNET_LANGUAGES); + return prepareQualifier(doc, "//*[local-name()=':language']", DNET_LANGUAGES); } @Override @@ -232,7 +235,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Document doc, final DataInfo info) { return prepareListFields( - doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info); + doc, + "//*[local-name()='contributor' and ./@contributorType='ContactGroup']/*[local-name()='contributorName']", + info); } @Override @@ -240,12 +245,14 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Document doc, final DataInfo info) { return prepareListFields( - doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info); + doc, + "//*[local-name()='contributor' and ./@contributorType='ContactPerson']/*[local-name()='contributorName']", + info); } @Override protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { - return prepareQualifier(doc, "//datacite:format", "dnet:programming_languages"); + return prepareQualifier(doc, "//*[local-name()='format']", "dnet:programming_languages"); } @Override @@ -267,7 +274,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Document doc, final DataInfo info) { return prepareListFields( - doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info); + doc, + "//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", + info); } // DATASETS @@ -276,11 +285,11 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { protected List prepareDatasetGeoLocations(final Document doc, final DataInfo info) { final List res = new ArrayList<>(); - for (final Object o : doc.selectNodes("//datacite:geoLocation")) { + for (final Object o : doc.selectNodes("//*[local-name()='geoLocation']")) { final GeoLocation loc = new GeoLocation(); - loc.setBox(((Node) o).valueOf("./datacite:geoLocationBox")); - loc.setPlace(((Node) o).valueOf("./datacite:geoLocationPlace")); - loc.setPoint(((Node) o).valueOf("./datacite:geoLocationPoint")); + loc.setBox(((Node) o).valueOf("./*[local-name()=':geoLocationBox']")); + loc.setPlace(((Node) o).valueOf("./*[local-name()=':geoLocationPlace']")); + loc.setPoint(((Node) o).valueOf("./*[local-name()=':geoLocationPoint']")); res.add(loc); } return res; @@ -297,17 +306,17 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { protected Field prepareDatasetLastMetadataUpdate( final Document doc, final DataInfo info) { - return prepareField(doc, "//datacite:date[@dateType='Updated']", info); + return prepareField(doc, "//*[local-name()='date' and ./@dateType='Updated']", info); } @Override protected Field prepareDatasetVersion(final Document doc, final DataInfo info) { - return prepareField(doc, "//datacite:version", info); + return prepareField(doc, "//*[local-name()='version']", info); } @Override protected Field prepareDatasetSize(final Document doc, final DataInfo info) { - return prepareField(doc, "//datacite:size", info); + return prepareField(doc, "//*[local-name()='size']", info); } @Override @@ -317,7 +326,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Field prepareDatasetStorageDate(final Document doc, final DataInfo info) { - return prepareField(doc, "//datacite:date[@dateType='Issued']", info); + return prepareField(doc, "//*[local-name()='date' and ./@dateType='Issued']", info); } @Override @@ -331,7 +340,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final List res = new ArrayList<>(); - for (final Object o : doc.selectNodes("//datacite:relatedIdentifier[@relatedIdentifierType='OPENAIRE']")) { + for (final Object o : doc + .selectNodes("//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='OPENAIRE']")) { final String originalId = ((Node) o).getText(); @@ -385,13 +395,14 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { res .addAll( prepareListStructPropsWithValidQualifier( - doc, "//datacite:identifier[@identifierType != 'URL' and @identifierType != 'landingPage']", + doc, + "//*[local-name()='identifier' and ./@identifierType != 'URL' and ./@identifierType != 'landingPage']", "@identifierType", DNET_PID_TYPES, info)); res .addAll( prepareListStructPropsWithValidQualifier( doc, - "//datacite:alternateIdentifier[@alternateIdentifierType != 'URL' and @alternateIdentifierType != 'landingPage']", + "//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType != 'URL' and ./@alternateIdentifierType != 'landingPage']", "@alternateIdentifierType", DNET_PID_TYPES, info)); return Lists.newArrayList(res); } From 7ed107be53757d6920a67d82d6a8e386bb88186d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 23 Apr 2021 17:52:36 +0200 Subject: [PATCH 02/16] depending on external dhp-schemas module --- dhp-common/pom.xml | 1 - dhp-schemas/README.md | 11 - dhp-schemas/pom.xml | 73 --- .../dhp/schema/action/AtomicAction.java | 40 -- .../action/AtomicActionDeserializer.java | 32 -- .../dnetlib/dhp/schema/common/EntityType.java | 21 - .../dhp/schema/common/LicenseComparator.java | 69 --- .../dhp/schema/common/MainEntityType.java | 7 - .../dhp/schema/common/ModelConstants.java | 130 ----- .../dhp/schema/common/ModelSupport.java | 501 ------------------ .../dhp/schema/common/RelationInverse.java | 46 -- .../eu/dnetlib/dhp/schema/dump/oaf/APC.java | 29 - .../dhp/schema/dump/oaf/AccessRight.java | 31 -- .../dnetlib/dhp/schema/dump/oaf/Author.java | 73 --- .../dhp/schema/dump/oaf/Container.java | 136 ----- .../dhp/schema/dump/oaf/ControlledField.java | 38 -- .../dnetlib/dhp/schema/dump/oaf/Country.java | 37 -- .../dnetlib/dhp/schema/dump/oaf/Funder.java | 36 -- .../dhp/schema/dump/oaf/GeoLocation.java | 53 -- .../dnetlib/dhp/schema/dump/oaf/Instance.java | 81 --- .../dnetlib/dhp/schema/dump/oaf/KeyValue.java | 48 -- .../eu/dnetlib/dhp/schema/dump/oaf/Pid.java | 45 -- .../dnetlib/dhp/schema/dump/oaf/Project.java | 51 -- .../dhp/schema/dump/oaf/Provenance.java | 41 -- .../dhp/schema/dump/oaf/Qualifier.java | 42 -- .../dnetlib/dhp/schema/dump/oaf/Result.java | 379 ------------- .../dnetlib/dhp/schema/dump/oaf/Subject.java | 34 -- .../dump/oaf/community/CommunityInstance.java | 36 -- .../dump/oaf/community/CommunityResult.java | 63 --- .../schema/dump/oaf/community/Context.java | 40 -- .../dhp/schema/dump/oaf/community/Funder.java | 23 - .../schema/dump/oaf/community/Project.java | 47 -- .../dhp/schema/dump/oaf/graph/Constants.java | 21 - .../dhp/schema/dump/oaf/graph/Datasource.java | 316 ----------- .../dhp/schema/dump/oaf/graph/Funder.java | 22 - .../dhp/schema/dump/oaf/graph/Fundings.java | 35 -- .../dhp/schema/dump/oaf/graph/Granted.java | 55 -- .../schema/dump/oaf/graph/GraphResult.java | 24 - .../dump/oaf/graph/H2020Classification.java | 82 --- .../dhp/schema/dump/oaf/graph/Node.java | 38 -- .../schema/dump/oaf/graph/Organization.java | 86 --- .../dhp/schema/dump/oaf/graph/Programme.java | 36 -- .../dhp/schema/dump/oaf/graph/Project.java | 192 ------- .../dhp/schema/dump/oaf/graph/RelType.java | 39 -- .../dhp/schema/dump/oaf/graph/Relation.java | 67 --- .../dump/oaf/graph/ResearchCommunity.java | 20 - .../dump/oaf/graph/ResearchInitiative.java | 75 --- .../eu/dnetlib/dhp/schema/oaf/Author.java | 89 ---- .../eu/dnetlib/dhp/schema/oaf/Context.java | 46 -- .../eu/dnetlib/dhp/schema/oaf/Country.java | 34 -- .../eu/dnetlib/dhp/schema/oaf/DataInfo.java | 85 --- .../eu/dnetlib/dhp/schema/oaf/Dataset.java | 116 ---- .../eu/dnetlib/dhp/schema/oaf/Datasource.java | 472 ----------------- .../dhp/schema/oaf/ExternalReference.java | 119 ----- .../eu/dnetlib/dhp/schema/oaf/ExtraInfo.java | 77 --- .../java/eu/dnetlib/dhp/schema/oaf/Field.java | 45 -- .../dnetlib/dhp/schema/oaf/GeoLocation.java | 76 --- .../dhp/schema/oaf/H2020Classification.java | 88 --- .../dhp/schema/oaf/H2020Programme.java | 44 -- .../eu/dnetlib/dhp/schema/oaf/Instance.java | 152 ------ .../eu/dnetlib/dhp/schema/oaf/Journal.java | 167 ------ .../eu/dnetlib/dhp/schema/oaf/KeyValue.java | 74 --- .../eu/dnetlib/dhp/schema/oaf/Measure.java | 59 --- .../dnetlib/dhp/schema/oaf/OAIProvenance.java | 33 -- .../java/eu/dnetlib/dhp/schema/oaf/Oaf.java | 102 ---- .../eu/dnetlib/dhp/schema/oaf/OafEntity.java | 126 ----- .../dnetlib/dhp/schema/oaf/Organization.java | 214 -------- .../dhp/schema/oaf/OriginDescription.java | 88 --- .../dhp/schema/oaf/OtherResearchProduct.java | 60 --- .../eu/dnetlib/dhp/schema/oaf/Project.java | 358 ------------- .../dnetlib/dhp/schema/oaf/Publication.java | 39 -- .../eu/dnetlib/dhp/schema/oaf/Qualifier.java | 87 --- .../eu/dnetlib/dhp/schema/oaf/Relation.java | 165 ------ .../eu/dnetlib/dhp/schema/oaf/Result.java | 351 ------------ .../eu/dnetlib/dhp/schema/oaf/Software.java | 80 --- .../dhp/schema/oaf/StructuredProperty.java | 60 --- .../dnetlib/dhp/schema/orcid/AuthorData.java | 72 --- .../eu/dnetlib/dhp/schema/orcid/OrcidDOI.java | 25 - .../dhp/schema/scholexplorer/DLIDataset.java | 89 ---- .../schema/scholexplorer/DLIPublication.java | 87 --- .../dhp/schema/scholexplorer/DLIUnknown.java | 132 ----- .../dhp/schema/scholexplorer/OafUtils.scala | 90 ---- .../schema/scholexplorer/ProvenaceInfo.java | 47 -- .../dhp/schema/action/AtomicActionTest.java | 40 -- .../dhp/schema/common/ModelSupportTest.java | 37 -- .../dnetlib/dhp/schema/oaf/MeasureTest.java | 57 -- .../eu/dnetlib/dhp/schema/oaf/MergeTest.java | 138 ----- .../dhp/schema/scholexplorer/DLItest.java | 83 --- dhp-workflows/dhp-actionmanager/pom.xml | 1 - dhp-workflows/dhp-aggregation/pom.xml | 1 - dhp-workflows/dhp-blacklist/pom.xml | 1 - dhp-workflows/dhp-broker-events/pom.xml | 1 - dhp-workflows/dhp-dedup-openaire/pom.xml | 1 - dhp-workflows/dhp-dedup-scholexplorer/pom.xml | 1 - dhp-workflows/dhp-doiboost/pom.xml | 1 - dhp-workflows/dhp-enrichment/pom.xml | 1 - dhp-workflows/dhp-graph-mapper/pom.xml | 1 - .../dhp-graph-provision-scholexplorer/pom.xml | 1 - dhp-workflows/dhp-graph-provision/pom.xml | 1 - pom.xml | 7 +- 100 files changed, 6 insertions(+), 7817 deletions(-) delete mode 100644 dhp-schemas/README.md delete mode 100644 dhp-schemas/pom.xml delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicAction.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicActionDeserializer.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/EntityType.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/MainEntityType.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/RelationInverse.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/APC.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/AccessRight.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Author.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Container.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/ControlledField.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Country.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Funder.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/GeoLocation.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Instance.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/KeyValue.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Pid.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Project.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Provenance.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Qualifier.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Result.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Subject.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/CommunityInstance.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/CommunityResult.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Context.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Funder.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Project.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Constants.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Datasource.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Funder.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Fundings.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Granted.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/GraphResult.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/H2020Classification.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Node.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Organization.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Programme.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Project.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/RelType.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Relation.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/ResearchCommunity.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/ResearchInitiative.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Author.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Context.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Country.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Dataset.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Datasource.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/ExternalReference.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/ExtraInfo.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/GeoLocation.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020Classification.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020Programme.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Instance.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Journal.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/KeyValue.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Measure.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OAIProvenance.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Organization.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OriginDescription.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OtherResearchProduct.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Publication.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Qualifier.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Software.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/StructuredProperty.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorData.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIDataset.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIPublication.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIUnknown.java delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/OafUtils.scala delete mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/ProvenaceInfo.java delete mode 100644 dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/action/AtomicActionTest.java delete mode 100644 dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/common/ModelSupportTest.java delete mode 100644 dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MeasureTest.java delete mode 100644 dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java delete mode 100644 dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/scholexplorer/DLItest.java diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index b295bc1f1..c4c8aeb61 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -101,7 +101,6 @@ eu.dnetlib.dhp dhp-schemas - ${project.version} diff --git a/dhp-schemas/README.md b/dhp-schemas/README.md deleted file mode 100644 index 7431cda42..000000000 --- a/dhp-schemas/README.md +++ /dev/null @@ -1,11 +0,0 @@ -Description of the project --------------------------- -This project defines **object schemas** of the OpenAIRE main entities and the relationships that intercur among them. -Namely it defines the model for - -- **research product (result)** which subclasses in publication, dataset, other research product, software -- **data source** object describing the data provider (institutional repository, aggregators, cris systems) -- **organization** research bodies managing a data source or participating to a research project -- **project** research project - -Te serialization of such objects (data store files) are used to pass data between workflow nodes in the processing pipeline. diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml deleted file mode 100644 index 73efeabb4..000000000 --- a/dhp-schemas/pom.xml +++ /dev/null @@ -1,73 +0,0 @@ - - - 4.0.0 - - - eu.dnetlib.dhp - dhp - 1.2.4-SNAPSHOT - ../pom.xml - - - dhp-schemas - jar - - This module contains common schema classes meant to be used across the dnet-hadoop submodules - - - - - net.alchim31.maven - scala-maven-plugin - 4.0.1 - - - scala-compile-first - initialize - - add-source - compile - - - - scala-test-compile - process-test-resources - - testCompile - - - - - ${scala.version} - - - - - - - - - - commons-io - commons-io - - - - org.apache.commons - commons-lang3 - - - - com.fasterxml.jackson.core - jackson-databind - - - - com.google.guava - guava - - - - - - diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicAction.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicAction.java deleted file mode 100644 index 84b22c81c..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicAction.java +++ /dev/null @@ -1,40 +0,0 @@ - -package eu.dnetlib.dhp.schema.action; - -import java.io.Serializable; - -import com.fasterxml.jackson.databind.annotation.JsonDeserialize; - -import eu.dnetlib.dhp.schema.oaf.Oaf; - -@JsonDeserialize(using = AtomicActionDeserializer.class) -public class AtomicAction implements Serializable { - - private Class clazz; - - private T payload; - - public AtomicAction() { - } - - public AtomicAction(Class clazz, T payload) { - this.clazz = clazz; - this.payload = payload; - } - - public Class getClazz() { - return clazz; - } - - public void setClazz(Class clazz) { - this.clazz = clazz; - } - - public T getPayload() { - return payload; - } - - public void setPayload(T payload) { - this.payload = payload; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicActionDeserializer.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicActionDeserializer.java deleted file mode 100644 index 7b88e9c7e..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/action/AtomicActionDeserializer.java +++ /dev/null @@ -1,32 +0,0 @@ - -package eu.dnetlib.dhp.schema.action; - -import java.io.IOException; - -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.schema.oaf.Oaf; - -public class AtomicActionDeserializer extends JsonDeserializer { - - @Override - public Object deserialize(JsonParser jp, DeserializationContext ctxt) - throws IOException { - JsonNode node = jp.getCodec().readTree(jp); - String classTag = node.get("clazz").asText(); - JsonNode payload = node.get("payload"); - ObjectMapper mapper = new ObjectMapper(); - - try { - final Class clazz = Class.forName(classTag); - return new AtomicAction(clazz, (Oaf) mapper.readValue(payload.toString(), clazz)); - } catch (ClassNotFoundException e) { - throw new IOException(e); - } - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/EntityType.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/EntityType.java deleted file mode 100644 index 54f30cf33..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/EntityType.java +++ /dev/null @@ -1,21 +0,0 @@ - -package eu.dnetlib.dhp.schema.common; - -import eu.dnetlib.dhp.schema.oaf.OafEntity; - -/** Actual entity types in the Graph */ -public enum EntityType { - publication, dataset, otherresearchproduct, software, datasource, organization, project; - - /** - * Resolves the EntityType, given the relative class name - * - * @param clazz the given class name - * @param actual OafEntity subclass - * @return the EntityType associated to the given class - */ - public static EntityType fromClass(Class clazz) { - - return EntityType.valueOf(clazz.getSimpleName().toLowerCase()); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java deleted file mode 100644 index db523ad1a..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/LicenseComparator.java +++ /dev/null @@ -1,69 +0,0 @@ - -package eu.dnetlib.dhp.schema.common; - -import java.util.Comparator; - -import eu.dnetlib.dhp.schema.oaf.Qualifier; - -public class LicenseComparator implements Comparator { - - @Override - public int compare(Qualifier left, Qualifier right) { - - if (left == null && right == null) - return 0; - if (left == null) - return 1; - if (right == null) - return -1; - - String lClass = left.getClassid(); - String rClass = right.getClassid(); - - if (lClass.equals(rClass)) - return 0; - - if (lClass.equals("OPEN SOURCE")) - return -1; - if (rClass.equals("OPEN SOURCE")) - return 1; - - if (lClass.equals("OPEN")) - return -1; - if (rClass.equals("OPEN")) - return 1; - - if (lClass.equals("6MONTHS")) - return -1; - if (rClass.equals("6MONTHS")) - return 1; - - if (lClass.equals("12MONTHS")) - return -1; - if (rClass.equals("12MONTHS")) - return 1; - - if (lClass.equals("EMBARGO")) - return -1; - if (rClass.equals("EMBARGO")) - return 1; - - if (lClass.equals("RESTRICTED")) - return -1; - if (rClass.equals("RESTRICTED")) - return 1; - - if (lClass.equals("CLOSED")) - return -1; - if (rClass.equals("CLOSED")) - return 1; - - if (lClass.equals("UNKNOWN")) - return -1; - if (rClass.equals("UNKNOWN")) - return 1; - - // Else (but unlikely), lexicographical ordering will do. - return lClass.compareTo(rClass); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/MainEntityType.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/MainEntityType.java deleted file mode 100644 index cda8ba484..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/MainEntityType.java +++ /dev/null @@ -1,7 +0,0 @@ - -package eu.dnetlib.dhp.schema.common; - -/** Main entity types in the Graph */ -public enum MainEntityType { - result, datasource, organization, project -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java deleted file mode 100644 index bff92ecf9..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ /dev/null @@ -1,130 +0,0 @@ - -package eu.dnetlib.dhp.schema.common; - -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.Qualifier; - -public class ModelConstants { - - public static final String ORCID = "orcid"; - public static final String ORCID_PENDING = "orcid_pending"; - public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID"; - - public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies"; - public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies"; - public static final String DNET_PUBLICATION_RESOURCE = "dnet:publication_resource"; - public static final String DNET_ACCESS_MODES = "dnet:access_modes"; - public static final String DNET_LANGUAGES = "dnet:languages"; - public static final String DNET_PID_TYPES = "dnet:pid_types"; - public static final String DNET_DATA_CITE_DATE = "dnet:dataCite_date"; - public static final String DNET_DATA_CITE_RESOURCE = "dnet:dataCite_resource"; - public static final String DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions"; - public static final String DNET_COUNTRY_TYPE = "dnet:countries"; - public static final String DNET_REVIEW_LEVELS = "dnet:review_levels"; - - public static final String SYSIMPORT_CROSSWALK_REPOSITORY = "sysimport:crosswalk:repository"; - public static final String SYSIMPORT_CROSSWALK_ENTITYREGISTRY = "sysimport:crosswalk:entityregistry"; - public static final String USER_CLAIM = "user:claim"; - - public static final String DATASET_RESULTTYPE_CLASSID = "dataset"; - public static final String PUBLICATION_RESULTTYPE_CLASSID = "publication"; - public static final String SOFTWARE_RESULTTYPE_CLASSID = "software"; - public static final String ORP_RESULTTYPE_CLASSID = "other"; - - public static final String RESULT_RESULT = "resultResult"; - /** - * @deprecated Use {@link ModelConstants#RELATIONSHIP} instead. - */ - @Deprecated - public static final String PUBLICATION_DATASET = "publicationDataset"; - public static final String IS_RELATED_TO = "isRelatedTo"; - public static final String SUPPLEMENT = "supplement"; - public static final String IS_SUPPLEMENT_TO = "isSupplementTo"; - public static final String IS_SUPPLEMENTED_BY = "isSupplementedBy"; - public static final String PART = "part"; - public static final String IS_PART_OF = "isPartOf"; - public static final String HAS_PARTS = "hasParts"; - public static final String RELATIONSHIP = "relationship"; - public static final String CITATION = "citation"; - public static final String CITES = "cites"; - public static final String IS_CITED_BY = "isCitedBy"; - public static final String REVIEW = "review"; - public static final String REVIEWS = "reviews"; - public static final String IS_REVIEWED_BY = "isReviewedBy"; - - public static final String RESULT_PROJECT = "resultProject"; - public static final String OUTCOME = "outcome"; - public static final String IS_PRODUCED_BY = "isProducedBy"; - public static final String PRODUCES = "produces"; - - public static final String DATASOURCE_ORGANIZATION = "datasourceOrganization"; - public static final String PROVISION = "provision"; - public static final String IS_PROVIDED_BY = "isProvidedBy"; - public static final String PROVIDES = "provides"; - - public static final String PROJECT_ORGANIZATION = "projectOrganization"; - public static final String PARTICIPATION = "participation"; - public static final String HAS_PARTICIPANT = "hasParticipant"; - public static final String IS_PARTICIPANT = "isParticipant"; - - public static final String RESULT_ORGANIZATION = "resultOrganization"; - public static final String AFFILIATION = "affiliation"; - public static final String IS_AUTHOR_INSTITUTION_OF = "isAuthorInstitutionOf"; - public static final String HAS_AUTHOR_INSTITUTION = "hasAuthorInstitution"; - - public static final String MERGES = "merges"; - - public static final String UNKNOWN = "UNKNOWN"; - public static final String NOT_AVAILABLE = "not available"; - - public static final Qualifier PUBLICATION_DEFAULT_RESULTTYPE = qualifier( - PUBLICATION_RESULTTYPE_CLASSID, PUBLICATION_RESULTTYPE_CLASSID, - DNET_RESULT_TYPOLOGIES, DNET_RESULT_TYPOLOGIES); - - public static final Qualifier DATASET_DEFAULT_RESULTTYPE = qualifier( - DATASET_RESULTTYPE_CLASSID, DATASET_RESULTTYPE_CLASSID, - DNET_RESULT_TYPOLOGIES, DNET_RESULT_TYPOLOGIES); - - public static final Qualifier SOFTWARE_DEFAULT_RESULTTYPE = qualifier( - SOFTWARE_RESULTTYPE_CLASSID, SOFTWARE_RESULTTYPE_CLASSID, - DNET_RESULT_TYPOLOGIES, DNET_RESULT_TYPOLOGIES); - - public static final Qualifier ORP_DEFAULT_RESULTTYPE = qualifier( - ORP_RESULTTYPE_CLASSID, ORP_RESULTTYPE_CLASSID, - DNET_RESULT_TYPOLOGIES, DNET_RESULT_TYPOLOGIES); - - public static final Qualifier REPOSITORY_PROVENANCE_ACTIONS = qualifier( - SYSIMPORT_CROSSWALK_REPOSITORY, SYSIMPORT_CROSSWALK_REPOSITORY, - DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS); - - public static final Qualifier ENTITYREGISTRY_PROVENANCE_ACTION = qualifier( - SYSIMPORT_CROSSWALK_ENTITYREGISTRY, SYSIMPORT_CROSSWALK_ENTITYREGISTRY, - DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS); - - public static final KeyValue UNKNOWN_REPOSITORY = keyValue( - "10|openaire____::55045bd2a65019fd8e6741a755395c8c", "Unknown Repository"); - - public static final Qualifier UNKNOWN_COUNTRY = qualifier(UNKNOWN, "Unknown", DNET_COUNTRY_TYPE, DNET_COUNTRY_TYPE); - - private static Qualifier qualifier( - final String classid, - final String classname, - final String schemeid, - final String schemename) { - final Qualifier q = new Qualifier(); - q.setClassid(classid); - q.setClassname(classname); - q.setSchemeid(schemeid); - q.setSchemename(schemename); - return q; - } - - private static KeyValue keyValue(String key, String value) { - KeyValue kv = new KeyValue(); - kv.setKey(key); - kv.setValue(value); - kv.setDataInfo(new DataInfo()); - return kv; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java deleted file mode 100644 index a92e11b5a..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java +++ /dev/null @@ -1,501 +0,0 @@ - -package eu.dnetlib.dhp.schema.common; - -import static com.google.common.base.Preconditions.checkArgument; - -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.time.Instant; -import java.time.format.DateTimeFormatter; -import java.time.temporal.TemporalAccessor; -import java.util.Date; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.function.Function; - -import org.apache.commons.lang3.StringUtils; - -import com.google.common.collect.Maps; - -import eu.dnetlib.dhp.schema.oaf.*; - -/** Oaf model utility methods. */ -public class ModelSupport { - - /** Defines the mapping between the actual entity type and the main entity type */ - private static Map entityMapping = Maps.newHashMap(); - - static { - entityMapping.put(EntityType.publication, MainEntityType.result); - entityMapping.put(EntityType.dataset, MainEntityType.result); - entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result); - entityMapping.put(EntityType.software, MainEntityType.result); - entityMapping.put(EntityType.datasource, MainEntityType.datasource); - entityMapping.put(EntityType.organization, MainEntityType.organization); - entityMapping.put(EntityType.project, MainEntityType.project); - } - - /** - * Defines the mapping between the actual entity types and the relative classes implementing them - */ - public static final Map entityTypes = Maps.newHashMap(); - - static { - entityTypes.put(EntityType.datasource, Datasource.class); - entityTypes.put(EntityType.organization, Organization.class); - entityTypes.put(EntityType.project, Project.class); - entityTypes.put(EntityType.dataset, Dataset.class); - entityTypes.put(EntityType.otherresearchproduct, OtherResearchProduct.class); - entityTypes.put(EntityType.software, Software.class); - entityTypes.put(EntityType.publication, Publication.class); - } - - public static final Map oafTypes = Maps.newHashMap(); - - static { - oafTypes.put("datasource", Datasource.class); - oafTypes.put("organization", Organization.class); - oafTypes.put("project", Project.class); - oafTypes.put("dataset", Dataset.class); - oafTypes.put("otherresearchproduct", OtherResearchProduct.class); - oafTypes.put("software", Software.class); - oafTypes.put("publication", Publication.class); - oafTypes.put("relation", Relation.class); - } - - public static final Map idPrefixMap = Maps.newHashMap(); - - static { - idPrefixMap.put(Datasource.class, "10"); - idPrefixMap.put(Organization.class, "20"); - idPrefixMap.put(Project.class, "40"); - idPrefixMap.put(Dataset.class, "50"); - idPrefixMap.put(OtherResearchProduct.class, "50"); - idPrefixMap.put(Software.class, "50"); - idPrefixMap.put(Publication.class, "50"); - } - - public static final Map entityIdPrefix = Maps.newHashMap(); - - static { - entityIdPrefix.put("datasource", "10"); - entityIdPrefix.put("organization", "20"); - entityIdPrefix.put("project", "40"); - entityIdPrefix.put("result", "50"); - } - - public static final Map idPrefixEntity = Maps.newHashMap(); - - static { - idPrefixEntity.put("10", "datasource"); - idPrefixEntity.put("20", "organization"); - idPrefixEntity.put("40", "project"); - idPrefixEntity.put("50", "result"); - } - - public static final Map relationInverseMap = Maps.newHashMap(); - - static { - relationInverseMap - .put( - "personResult_authorship_isAuthorOf", new RelationInverse() - .setRelation("isAuthorOf") - .setInverse("hasAuthor") - .setRelType("personResult") - .setSubReltype("authorship")); - relationInverseMap - .put( - "personResult_authorship_hasAuthor", new RelationInverse() - .setInverse("isAuthorOf") - .setRelation("hasAuthor") - .setRelType("personResult") - .setSubReltype("authorship")); - relationInverseMap - .put( - "projectOrganization_participation_isParticipant", new RelationInverse() - .setRelation("isParticipant") - .setInverse("hasParticipant") - .setRelType("projectOrganization") - .setSubReltype("participation")); - relationInverseMap - .put( - "projectOrganization_participation_hasParticipant", new RelationInverse() - .setInverse("isParticipant") - .setRelation("hasParticipant") - .setRelType("projectOrganization") - .setSubReltype("participation")); - relationInverseMap - .put( - "resultOrganization_affiliation_hasAuthorInstitution", new RelationInverse() - .setRelation("hasAuthorInstitution") - .setInverse("isAuthorInstitutionOf") - .setRelType("resultOrganization") - .setSubReltype("affiliation")); - relationInverseMap - .put( - "resultOrganization_affiliation_isAuthorInstitutionOf", new RelationInverse() - .setInverse("hasAuthorInstitution") - .setRelation("isAuthorInstitutionOf") - .setRelType("resultOrganization") - .setSubReltype("affiliation")); - relationInverseMap - .put( - "organizationOrganization_dedup_merges", new RelationInverse() - .setRelation("merges") - .setInverse("isMergedIn") - .setRelType("organizationOrganization") - .setSubReltype("dedup")); - relationInverseMap - .put( - "organizationOrganization_dedup_isMergedIn", new RelationInverse() - .setInverse("merges") - .setRelation("isMergedIn") - .setRelType("organizationOrganization") - .setSubReltype("dedup")); - relationInverseMap - .put( - "organizationOrganization_dedupSimilarity_isSimilarTo", new RelationInverse() - .setInverse("isSimilarTo") - .setRelation("isSimilarTo") - .setRelType("organizationOrganization") - .setSubReltype("dedupSimilarity")); - - relationInverseMap - .put( - "resultProject_outcome_isProducedBy", new RelationInverse() - .setRelation("isProducedBy") - .setInverse("produces") - .setRelType("resultProject") - .setSubReltype("outcome")); - relationInverseMap - .put( - "resultProject_outcome_produces", new RelationInverse() - .setInverse("isProducedBy") - .setRelation("produces") - .setRelType("resultProject") - .setSubReltype("outcome")); - relationInverseMap - .put( - "projectPerson_contactPerson_isContact", new RelationInverse() - .setRelation("isContact") - .setInverse("hasContact") - .setRelType("projectPerson") - .setSubReltype("contactPerson")); - relationInverseMap - .put( - "projectPerson_contactPerson_hasContact", new RelationInverse() - .setInverse("isContact") - .setRelation("hasContact") - .setRelType("personPerson") - .setSubReltype("coAuthorship")); - relationInverseMap - .put( - "personPerson_coAuthorship_isCoauthorOf", new RelationInverse() - .setInverse("isCoAuthorOf") - .setRelation("isCoAuthorOf") - .setRelType("personPerson") - .setSubReltype("coAuthorship")); - relationInverseMap - .put( - "personPerson_dedup_merges", new RelationInverse() - .setInverse("isMergedIn") - .setRelation("merges") - .setRelType("personPerson") - .setSubReltype("dedup")); - relationInverseMap - .put( - "personPerson_dedup_isMergedIn", new RelationInverse() - .setInverse("merges") - .setRelation("isMergedIn") - .setRelType("personPerson") - .setSubReltype("dedup")); - relationInverseMap - .put( - "personPerson_dedupSimilarity_isSimilarTo", new RelationInverse() - .setInverse("isSimilarTo") - .setRelation("isSimilarTo") - .setRelType("personPerson") - .setSubReltype("dedupSimilarity")); - relationInverseMap - .put( - "datasourceOrganization_provision_isProvidedBy", new RelationInverse() - .setInverse("provides") - .setRelation("isProvidedBy") - .setRelType("datasourceOrganization") - .setSubReltype("provision")); - relationInverseMap - .put( - "datasourceOrganization_provision_provides", new RelationInverse() - .setInverse("isProvidedBy") - .setRelation("provides") - .setRelType("datasourceOrganization") - .setSubReltype("provision")); - relationInverseMap - .put( - "resultResult_similarity_hasAmongTopNSimilarDocuments", new RelationInverse() - .setInverse("isAmongTopNSimilarDocuments") - .setRelation("hasAmongTopNSimilarDocuments") - .setRelType("resultResult") - .setSubReltype("similarity")); - relationInverseMap - .put( - "resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse() - .setInverse("hasAmongTopNSimilarDocuments") - .setRelation("isAmongTopNSimilarDocuments") - .setRelType("resultResult") - .setSubReltype("similarity")); - relationInverseMap - .put( - "resultResult_relationship_isRelatedTo", new RelationInverse() - .setInverse("isRelatedTo") - .setRelation("isRelatedTo") - .setRelType("resultResult") - .setSubReltype("relationship")); - relationInverseMap - .put( - "resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse() - .setInverse("hasAmongTopNSimilarDocuments") - .setRelation("isAmongTopNSimilarDocuments") - .setRelType("resultResult") - .setSubReltype("similarity")); - relationInverseMap - .put( - "resultResult_supplement_isSupplementTo", new RelationInverse() - .setInverse("isSupplementedBy") - .setRelation("isSupplementTo") - .setRelType("resultResult") - .setSubReltype("supplement")); - relationInverseMap - .put( - "resultResult_supplement_isSupplementedBy", new RelationInverse() - .setInverse("isSupplementTo") - .setRelation("isSupplementedBy") - .setRelType("resultResult") - .setSubReltype("supplement")); - relationInverseMap - .put( - "resultResult_part_isPartOf", new RelationInverse() - .setInverse("hasPart") - .setRelation("isPartOf") - .setRelType("resultResult") - .setSubReltype("part")); - relationInverseMap - .put( - "resultResult_part_hasPart", new RelationInverse() - .setInverse("isPartOf") - .setRelation("hasPart") - .setRelType("resultResult") - .setSubReltype("part")); - relationInverseMap - .put( - "resultResult_dedup_merges", new RelationInverse() - .setInverse("isMergedIn") - .setRelation("merges") - .setRelType("resultResult") - .setSubReltype("dedup")); - relationInverseMap - .put( - "resultResult_dedup_isMergedIn", new RelationInverse() - .setInverse("merges") - .setRelation("isMergedIn") - .setRelType("resultResult") - .setSubReltype("dedup")); - relationInverseMap - .put( - "resultResult_dedupSimilarity_isSimilarTo", new RelationInverse() - .setInverse("isSimilarTo") - .setRelation("isSimilarTo") - .setRelType("resultResult") - .setSubReltype("dedupSimilarity")); - - } - - private static final String schemeTemplate = "dnet:%s_%s_relations"; - - private ModelSupport() { - } - - public static String getIdPrefix(Class clazz) { - return idPrefixMap.get(clazz); - } - - /** - * Checks subclass-superclass relationship. - * - * @param subClazzObject Subclass object instance - * @param superClazzObject Superclass object instance - * @param Subclass type - * @param Superclass type - * @return True if X is a subclass of Y - */ - public static Boolean isSubClass( - X subClazzObject, Y superClazzObject) { - return isSubClass(subClazzObject.getClass(), superClazzObject.getClass()); - } - - /** - * Checks subclass-superclass relationship. - * - * @param subClazzObject Subclass object instance - * @param superClazz Superclass class - * @param Subclass type - * @param Superclass type - * @return True if X is a subclass of Y - */ - public static Boolean isSubClass( - X subClazzObject, Class superClazz) { - return isSubClass(subClazzObject.getClass(), superClazz); - } - - /** - * Checks subclass-superclass relationship. - * - * @param subClazz Subclass class - * @param superClazz Superclass class - * @param Subclass type - * @param Superclass type - * @return True if X is a subclass of Y - */ - public static Boolean isSubClass( - Class subClazz, Class superClazz) { - return superClazz.isAssignableFrom(subClazz); - } - - /** - * Lists all the OAF model classes - * - * @param - * @return - */ - public static Class[] getOafModelClasses() { - return new Class[] { - Author.class, - Context.class, - Country.class, - DataInfo.class, - Dataset.class, - Datasource.class, - ExternalReference.class, - ExtraInfo.class, - Field.class, - GeoLocation.class, - Instance.class, - Journal.class, - KeyValue.class, - Oaf.class, - OafEntity.class, - OAIProvenance.class, - Organization.class, - OriginDescription.class, - OtherResearchProduct.class, - Project.class, - Publication.class, - Qualifier.class, - Relation.class, - Result.class, - Software.class, - StructuredProperty.class - }; - } - - public static String getMainType(final EntityType type) { - return entityMapping.get(type).name(); - } - - public static boolean isResult(EntityType type) { - return MainEntityType.result.name().equals(getMainType(type)); - } - - public static String getScheme(final String sourceType, final String targetType) { - return String - .format( - schemeTemplate, - entityMapping.get(EntityType.valueOf(sourceType)).name(), - entityMapping.get(EntityType.valueOf(targetType)).name()); - } - - public static String tableIdentifier(String dbName, String tableName) { - - checkArgument(StringUtils.isNotBlank(dbName), "DB name cannot be empty"); - checkArgument(StringUtils.isNotBlank(tableName), "table name cannot be empty"); - - return String.format("%s.%s", dbName, tableName); - } - - public static String tableIdentifier(String dbName, Class clazz) { - - checkArgument(Objects.nonNull(clazz), "clazz is needed to derive the table name, thus cannot be null"); - - return tableIdentifier(dbName, clazz.getSimpleName().toLowerCase()); - } - - public static Function idFn() { - return x -> { - if (isSubClass(x, Relation.class)) { - return idFnForRelation(x); - } - return idFnForOafEntity(x); - }; - } - - private static String idFnForRelation(T t) { - Relation r = (Relation) t; - return Optional - .ofNullable(r.getSource()) - .map( - source -> Optional - .ofNullable(r.getTarget()) - .map( - target -> Optional - .ofNullable(r.getRelType()) - .map( - relType -> Optional - .ofNullable(r.getSubRelType()) - .map( - subRelType -> Optional - .ofNullable(r.getRelClass()) - .map( - relClass -> String - .join( - source, - target, - relType, - subRelType, - relClass)) - .orElse( - String - .join( - source, - target, - relType, - subRelType))) - .orElse(String.join(source, target, relType))) - .orElse(String.join(source, target))) - .orElse(source)) - .orElse(null); - } - - private static String idFnForOafEntity(T t) { - return ((OafEntity) t).getId(); - } - - public static String oldest(String dateA, String dateB) throws ParseException { - - if (StringUtils.isBlank(dateA)) { - return dateB; - } - if (StringUtils.isBlank(dateB)) { - return dateA; - } - if (StringUtils.isNotBlank(dateA) && StringUtils.isNotBlank(dateB)) { - - final Date a = Date.from(Instant.from(DateTimeFormatter.ISO_INSTANT.parse(dateA))); - final Date b = Date.from(Instant.from(DateTimeFormatter.ISO_INSTANT.parse(dateB))); - - return a.before(b) ? dateA : dateB; - } else { - return null; - } - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/RelationInverse.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/RelationInverse.java deleted file mode 100644 index 4757c637e..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/RelationInverse.java +++ /dev/null @@ -1,46 +0,0 @@ - -package eu.dnetlib.dhp.schema.common; - -public class RelationInverse { - private String relation; - private String inverse; - private String relType; - private String subReltype; - - public String getRelType() { - return relType; - } - - public RelationInverse setRelType(String relType) { - this.relType = relType; - return this; - } - - public String getSubReltype() { - return subReltype; - } - - public RelationInverse setSubReltype(String subReltype) { - this.subReltype = subReltype; - return this; - } - - public String getRelation() { - return relation; - } - - public RelationInverse setRelation(String relation) { - this.relation = relation; - return this; - } - - public String getInverse() { - return inverse; - } - - public RelationInverse setInverse(String inverse) { - this.inverse = inverse; - return this; - } - -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/APC.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/APC.java deleted file mode 100644 index 7f5dcb397..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/APC.java +++ /dev/null @@ -1,29 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf; - -import java.io.Serializable; - -/** - * Used to refer to the Article Processing Charge information. Not dumped in this release. It contains two parameters: - - * currency of type String to store the currency of the APC - amount of type String to stores the charged amount - */ -public class APC implements Serializable { - private String currency; - private String amount; - - public String getCurrency() { - return currency; - } - - public void setCurrency(String currency) { - this.currency = currency; - } - - public String getAmount() { - return amount; - } - - public void setAmount(String amount) { - this.amount = amount; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/AccessRight.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/AccessRight.java deleted file mode 100644 index f28c544f6..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/AccessRight.java +++ /dev/null @@ -1,31 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf; - -/** - * AccessRight. Used to represent the result access rights. It extends the eu.dnet.lib.dhp.schema.dump.oaf.Qualifier - * element with a parameter scheme of type String to store the scheme. Values for this element are found against the - * COAR access right scheme. The classid of the element accessright in eu.dnetlib.dhp.schema.oaf.Result is used to get - * the COAR corresponding code whose value will be used to set the code parameter. The COAR label corresponding to the - * COAR code will be used to set the label parameter. The scheme value will always be the one referring to the COAR - * access right scheme - */ -public class AccessRight extends Qualifier { - - private String scheme; - - public String getScheme() { - return scheme; - } - - public void setScheme(String scheme) { - this.scheme = scheme; - } - - public static AccessRight newInstance(String code, String label, String scheme) { - AccessRight ar = new AccessRight(); - ar.setCode(code); - ar.setLabel(label); - ar.setScheme(scheme); - return ar; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Author.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Author.java deleted file mode 100644 index 34920bcf7..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Author.java +++ /dev/null @@ -1,73 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf; - -import java.io.Serializable; -import java.util.List; - -/** - * Used to represent the generic author of the result. It has six parameters: - name of type String to store the given - * name of the author. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author name - surname of - * type String to store the family name of the author. The value for this parameter corresponds to - * eu.dnetlib.dhp.schema.oaf.Author surname - fullname of type String to store the fullname of the author. The value for - * this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author fullname - rank of type Integer to store the rank on - * the author in the result's authors list. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author - * rank - pid of type eu.dnetlib.dhp.schema.dump.oaf.Pid to store the persistent identifier for the author. For the - * moment only ORCID identifiers will be dumped. - The id element is instantiated by using the following values in the - * eu.dnetlib.dhp.schema.oaf.Result pid: * Qualifier.classid for scheme * value for value - The provenance element is - * instantiated only if the dataInfo is set for the pid in the result to be dumped. The provenance element is - * instantiated by using the following values in the eu.dnetlib.dhp.schema.oaf.Result pid: * - * dataInfo.provenanceaction.classname for provenance * dataInfo.trust for trust - */ -public class Author implements Serializable { - - private String fullname; - - private String name; - - private String surname; - - private Integer rank; - - private Pid pid; - - public String getFullname() { - return fullname; - } - - public void setFullname(String fullname) { - this.fullname = fullname; - } - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getSurname() { - return surname; - } - - public void setSurname(String surname) { - this.surname = surname; - } - - public Integer getRank() { - return rank; - } - - public void setRank(Integer rank) { - this.rank = rank; - } - - public Pid getPid() { - return pid; - } - - public void setPid(Pid pid) { - this.pid = pid; - } - -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Container.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Container.java deleted file mode 100644 index 8699528ca..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Container.java +++ /dev/null @@ -1,136 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf; - -import java.io.Serializable; -import java.util.Objects; - -/** - * To store information about the conference or journal where the result has been presented or published. It contains - * eleven parameters: - name of type String to store the name of the journal or conference. It corresponds to the - * parameter name of eu.dnetlib.dhp.schema.oaf.Journal - issnPrinted ot type String to store the journal printed issn. - * It corresponds to the parameter issnPrinted of eu.dnetlib.dhp.schema.oaf.Journal - issnOnline of type String to store - * the journal online issn. It corresponds to the parameter issnOnline of eu.dnetlib.dhp.schema.oaf.Journal - - * issnLinking of type String to store the journal linking issn. It corresponds to the parameter issnLinking of - * eu.dnetlib.dhp.schema.oaf.Journal - ep of type String to store the end page. It corresponds to the parameter ep of - * eu.dnetlib.dhp.schema.oaf.Journal - iss of type String to store the journal issue. It corresponds to the parameter - * iss of eu.dnetlib.dhp.schema.oaf.Journal - sp of type String to store the start page. It corresponds to the parameter - * sp of eu.dnetlib.dhp.schema.oaf.Journal - vol of type String to store the Volume. It corresponds to the parameter vol - * of eu.dnetlib.dhp.schema.oaf.Journal - edition of type String to store the edition of the journal or conference - * proceeding. It corresponds to the parameter edition of eu.dnetlib.dhp.schema.oaf.Journal - conferenceplace of type - * String to store the place of the conference. It corresponds to the parameter conferenceplace of - * eu.dnetlib.dhp.schema.oaf.Journal - conferencedate of type String to store the date of the conference. It corresponds - * to the parameter conferencedate of eu.dnetlib.dhp.schema.oaf.Journal - */ -public class Container implements Serializable { - - private String name; - - private String issnPrinted; - - private String issnOnline; - - private String issnLinking; - - private String ep; - - private String iss; - - private String sp; - - private String vol; - - private String edition; - - private String conferenceplace; - - private String conferencedate; - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getIssnPrinted() { - return issnPrinted; - } - - public void setIssnPrinted(String issnPrinted) { - this.issnPrinted = issnPrinted; - } - - public String getIssnOnline() { - return issnOnline; - } - - public void setIssnOnline(String issnOnline) { - this.issnOnline = issnOnline; - } - - public String getIssnLinking() { - return issnLinking; - } - - public void setIssnLinking(String issnLinking) { - this.issnLinking = issnLinking; - } - - public String getEp() { - return ep; - } - - public void setEp(String ep) { - this.ep = ep; - } - - public String getIss() { - return iss; - } - - public void setIss(String iss) { - this.iss = iss; - } - - public String getSp() { - return sp; - } - - public void setSp(String sp) { - this.sp = sp; - } - - public String getVol() { - return vol; - } - - public void setVol(String vol) { - this.vol = vol; - } - - public String getEdition() { - return edition; - } - - public void setEdition(String edition) { - this.edition = edition; - } - - public String getConferenceplace() { - return conferenceplace; - } - - public void setConferenceplace(String conferenceplace) { - this.conferenceplace = conferenceplace; - } - - public String getConferencedate() { - return conferencedate; - } - - public void setConferencedate(String conferencedate) { - this.conferencedate = conferencedate; - } - -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/ControlledField.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/ControlledField.java deleted file mode 100644 index cad7b8b5c..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/ControlledField.java +++ /dev/null @@ -1,38 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf; - -import java.io.Serializable; - -/** - * To represent the information described by a scheme and a value in that scheme (i.e. pid). It has two parameters: - - * scheme of type String to store the scheme - value of type String to store the value in that scheme - */ -public class ControlledField implements Serializable { - private String scheme; - private String value; - - public String getScheme() { - return scheme; - } - - public void setScheme(String scheme) { - this.scheme = scheme; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } - - public static ControlledField newInstance(String scheme, String value) { - ControlledField cf = new ControlledField(); - - cf.setScheme(scheme); - cf.setValue(value); - - return cf; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Country.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Country.java deleted file mode 100644 index 3ab4d90fe..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Country.java +++ /dev/null @@ -1,37 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf; - -/** - * Represents the country associated to this result. It extends eu.dnetlib.dhp.schema.dump.oaf.Qualifier with a - * provenance parameter of type eu.dnetlib.dhp.schema.dumo.oaf.Provenance. The country in not mapped if its value in the - * result reprensented in the internal format is Unknown. The value for this element correspond to: - code corresponds - * to the classid of eu.dnetlib.dhp.schema.oaf.Country - label corresponds to the classname of - * eu.dnetlib.dhp.schema.oaf.Country - provenance set only if the dataInfo associated to the Country of the result to be - * dumped is not null. In this case : - provenance corresponds to dataInfo.provenanceaction.classid (to be modified with - * datainfo.provenanceaction.classname) - trust corresponds to dataInfo.trust - */ -public class Country extends Qualifier { - - private Provenance provenance; - - public Provenance getProvenance() { - return provenance; - } - - public void setProvenance(Provenance provenance) { - this.provenance = provenance; - } - - public static Country newInstance(String code, String label, Provenance provenance) { - Country c = new Country(); - c.setProvenance(provenance); - c.setCode(code); - c.setLabel(label); - return c; - } - - public static Country newInstance(String code, String label, String provenance, String trust) { - return newInstance(code, label, Provenance.newInstance(provenance, trust)); - } - -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Funder.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Funder.java deleted file mode 100644 index 16cab22cc..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Funder.java +++ /dev/null @@ -1,36 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf; - -import java.io.Serializable; - -public class Funder implements Serializable { - private String shortName; - - private String name; - - private String jurisdiction; - - public String getJurisdiction() { - return jurisdiction; - } - - public void setJurisdiction(String jurisdiction) { - this.jurisdiction = jurisdiction; - } - - public String getShortName() { - return shortName; - } - - public void setShortName(String shortName) { - this.shortName = shortName; - } - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/GeoLocation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/GeoLocation.java deleted file mode 100644 index 6bd891bbd..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/GeoLocation.java +++ /dev/null @@ -1,53 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf; - -import java.io.Serializable; - -import org.apache.commons.lang3.StringUtils; - -import com.fasterxml.jackson.annotation.JsonIgnore; - -/** - * Represents the geolocation information. It has three parameters: - point of type String to store the point - * information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation point - box ot type String to store the box - * information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation box - place of type String to store the place - * information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation place - */ -public class GeoLocation implements Serializable { - - private String point; - - private String box; - - private String place; - - public String getPoint() { - return point; - } - - public void setPoint(String point) { - this.point = point; - } - - public String getBox() { - return box; - } - - public void setBox(String box) { - this.box = box; - } - - public String getPlace() { - return place; - } - - public void setPlace(String place) { - this.place = place; - } - - @JsonIgnore - public boolean isBlank() { - return StringUtils.isBlank(point) && StringUtils.isBlank(box) && StringUtils.isBlank(place); - } - -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Instance.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Instance.java deleted file mode 100644 index edc6f28f5..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Instance.java +++ /dev/null @@ -1,81 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf; - -import java.io.Serializable; -import java.util.List; - -/** - * Represents the manifestations (i.e. different versions) of the result. For example: the pre-print and the published - * versions are two manifestations of the same research result. It has the following parameters: - license of type - * String to store the license applied to the instance. It corresponds to the value of the licence in the instance to be - * dumped - accessright of type eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store the accessright of the instance. - - * type of type String to store the type of the instance as defined in the corresponding dnet vocabulary - * (dnet:pubication_resource). It corresponds to the instancetype.classname of the instance to be mapped - url of type - * List list of locations where the instance is accessible. It corresponds to url of the instance to be dumped - - * publicationdate of type String to store the publication date of the instance ;// dateofacceptance; - refereed of type - * String to store information abour tthe review status of the instance. Possible values are 'Unknown', - * 'nonPeerReviewed', 'peerReviewed'. It corresponds to refereed.classname of the instance to be dumped - */ -public class Instance implements Serializable { - - private String license; - - private AccessRight accessright; - - private String type; - - private List url; - - private String publicationdate;// dateofacceptance; - - private String refereed; // peer-review status - - public String getLicense() { - return license; - } - - public void setLicense(String license) { - this.license = license; - } - - public AccessRight getAccessright() { - return accessright; - } - - public void setAccessright(AccessRight accessright) { - this.accessright = accessright; - } - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - public List getUrl() { - return url; - } - - public void setUrl(List url) { - this.url = url; - } - - public String getPublicationdate() { - return publicationdate; - } - - public void setPublicationdate(String publicationdate) { - this.publicationdate = publicationdate; - } - - public String getRefereed() { - return refereed; - } - - public void setRefereed(String refereed) { - this.refereed = refereed; - } - -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/KeyValue.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/KeyValue.java deleted file mode 100644 index 849aa4d3c..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/KeyValue.java +++ /dev/null @@ -1,48 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf; - -import java.io.Serializable; - -import org.apache.commons.lang3.StringUtils; - -import com.fasterxml.jackson.annotation.JsonIgnore; - -/** - * To represent the information described by a key and a value. It has two parameters: - key to store the key (generally - * the OpenAIRE id for some entity) - value to store the value (generally the OpenAIRE name for the key) - */ -public class KeyValue implements Serializable { - - private String key; - - private String value; - - public String getKey() { - return key; - } - - public void setKey(String key) { - this.key = key; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } - - public static KeyValue newInstance(String key, String value) { - KeyValue inst = new KeyValue(); - inst.key = key; - inst.value = value; - return inst; - } - - @JsonIgnore - public boolean isBlank() { - return StringUtils.isBlank(key) && StringUtils.isBlank(value); - } - -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Pid.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Pid.java deleted file mode 100644 index 786ddb1d7..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Pid.java +++ /dev/null @@ -1,45 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf; - -import java.io.Serializable; - -/** - * To represent the generic persistent identifier. It has two parameters: - id of type - * eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the scheme and value of the Persistent Identifier. - - * provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store the provenance and trust of the information - */ -public class Pid implements Serializable { - private ControlledField id; - private Provenance provenance; - - public ControlledField getId() { - return id; - } - - public void setId(ControlledField pid) { - this.id = pid; - } - - public Provenance getProvenance() { - return provenance; - } - - public void setProvenance(Provenance provenance) { - this.provenance = provenance; - } - - public static Pid newInstance(ControlledField pid, Provenance provenance) { - Pid p = new Pid(); - p.id = pid; - p.provenance = provenance; - - return p; - } - - public static Pid newInstance(ControlledField pid) { - Pid p = new Pid(); - p.id = pid; - - return p; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Project.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Project.java deleted file mode 100644 index 00cd7a0fb..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Project.java +++ /dev/null @@ -1,51 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf; - -import java.io.Serializable; - -/** - * This class to store the common information about the project that will be dumped for community and for the whole - * graph - private String id to store the id of the project (OpenAIRE id) - private String code to store the grant - * agreement of the project - private String acronym to store the acronym of the project - private String title to store - * the tile of the project - */ -public class Project implements Serializable { - protected String id;// OpenAIRE id - protected String code; - - protected String acronym; - - protected String title; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getAcronym() { - return acronym; - } - - public void setAcronym(String acronym) { - this.acronym = acronym; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Provenance.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Provenance.java deleted file mode 100644 index 28fb3aaa6..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Provenance.java +++ /dev/null @@ -1,41 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf; - -import java.io.Serializable; - -/** - * Indicates the process that produced (or provided) the information, and the trust associated to the information. It - * has two parameters: - provenance of type String to store the provenance of the information, - trust of type String to - * store the trust associated to the information - */ -public class Provenance implements Serializable { - private String provenance; - private String trust; - - public String getProvenance() { - return provenance; - } - - public void setProvenance(String provenance) { - this.provenance = provenance; - } - - public String getTrust() { - return trust; - } - - public void setTrust(String trust) { - this.trust = trust; - } - - public static Provenance newInstance(String provenance, String trust) { - Provenance p = new Provenance(); - p.provenance = provenance; - p.trust = trust; - return p; - } - - public String toString() { - return provenance + trust; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Qualifier.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Qualifier.java deleted file mode 100644 index 348c22b31..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Qualifier.java +++ /dev/null @@ -1,42 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf; - -import java.io.Serializable; - -import org.apache.commons.lang3.StringUtils; - -import com.fasterxml.jackson.annotation.JsonIgnore; - -/** - * To represent the information described by a code and a value It has two parameters: - code to store the code - * (generally the classid of the eu.dnetlib.dhp.schema.oaf.Qualifier element) - label to store the label (generally the - * classname of the eu.dnetlib.dhp.schema.oaf.Qualifier element - */ -public class Qualifier implements Serializable { - - private String code; // the classid in the Qualifier - private String label; // the classname in the Qualifier - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getLabel() { - return label; - } - - public void setLabel(String label) { - this.label = label; - } - - public static Qualifier newInstance(String code, String value) { - Qualifier qualifier = new Qualifier(); - qualifier.setCode(code); - qualifier.setLabel(value); - return qualifier; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Result.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Result.java deleted file mode 100644 index 88ab2c334..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Result.java +++ /dev/null @@ -1,379 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf; - -import java.io.Serializable; -import java.util.List; - -import eu.dnetlib.dhp.schema.dump.oaf.community.Project; - -/** - * To represent the dumped result. It will be extended in the dump for Research Communities - Research - * Initiative/Infrastructures. It has the following parameters: - author of type - * List to describe the authors of a result. For each author in the result - * represented in the internal model one author in the esternal model is produced. - type of type String to represent - * the category of the result. Possible values are publication, dataset, software, other. It corresponds to - * resulttype.classname of the dumped result - language of type eu.dnetlib.dhp.schema.dump.oaf.Qualifier to store - * information about the language of the result. It is dumped as - code corresponds to language.classid - value - * corresponds to language.classname - country of type List to store the country - * list to which the result is associated. For each country in the result respresented in the internal model one country - * in the external model is produces - subjects of type List to store the subjects for - * the result. For each subject in the result represented in the internal model one subject in the external model is - * produced - maintitle of type String to store the main title of the result. It corresponds to the value of the first - * title in the resul to be dumped having classid equals to "main title" - subtitle of type String to store the subtitle - * of the result. It corresponds to the value of the first title in the resul to be dumped having classid equals to - * "subtitle" - description of type List to store the description of the result. It corresponds to the list of - * description.value in the result represented in the internal model - publicationdate of type String to store the - * pubblication date. It corresponds to dateofacceptance.value in the result represented in the internal model - - * publisher of type String to store information about the publisher. It corresponds to publisher.value of the result - * represented in the intrenal model - embargoenddate of type String to store the embargo end date. It corresponds to - * embargoenddate.value of the result represented in the internal model - source of type List See definition of - * Dublin Core field dc:source. It corresponds to the list of source.value in the result represented in the internal - * model - format of type List It corresponds to the list of format.value in the result represented in the - * internal model - contributor of type List to represent contributors for this result. It corresponds to the - * list of contributor.value in the result represented in the internal model - coverage of type String. It corresponds - * to the list of coverage.value in the result represented in the internal model - bestaccessright of type - * eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store informatin about the openest access right associated to the - * manifestations of this research results. It corresponds to the same parameter in the result represented in the - * internal model - container of type eu.dnetlib.dhp.schema/dump.oaf.Container (only for result of type publication). It - * corresponds to the parameter journal of the result represented in the internal model - documentationUrl of type - * List (only for results of type software) to store the URLs to the software documentation. It corresponds to - * the list of documentationUrl.value of the result represented in the internal model - codeRepositoryUrl of type String - * (only for results of type software) to store the URL to the repository with the source code. It corresponds to - * codeRepositoryUrl.value of the result represented in the internal model - programmingLanguage of type String (only - * for results of type software) to store the programming language. It corresponds to programmingLanguaga.classid of the - * result represented in the internal model - contactperson of type List (only for results of type other) to - * store the contact person for this result. It corresponds to the list of contactperson.value of the result represented - * in the internal model - contactgroup of type List (only for results of type other) to store the information - * for the contact group. It corresponds to the list of contactgroup.value of the result represented in the internal - * model - tool of type List (only fro results of type other) to store information about tool useful for the - * interpretation and/or re-used of the research product. It corresponds to the list of tool.value in the result - * represented in the internal modelt - size of type String (only for results of type dataset) to store the size of the - * dataset. It corresponds to size.value in the result represented in the internal model - version of type String (only - * for results of type dataset) to store the version. It corresponds to version.value of the result represented in the - * internal model - geolocation fo type List (only for results of type - * dataset) to store geolocation information. For each geolocation element in the result represented in the internal - * model a GeoLocation in the external model il produced - id of type String to store the OpenAIRE id of the result. It - * corresponds to the id of the result represented in the internal model - originalId of type List to store the - * original ids of the result. It corresponds to the originalId of the result represented in the internal model - pid of - * type List to store the persistent identifiers for the result. For - * each pid in the results represented in the internal model one pid in the external model is produced. The value - * correspondence is: - scheme corresponds to pid.qualifier.classid of the result represented in the internal model - - * value corresponds to the pid.value of the result represented in the internal model - dateofcollection of type String - * to store information about the time OpenAIRE collected the record. It corresponds to dateofcollection of the result - * represented in the internal model - lasteupdatetimestamp of type String to store the timestamp of the last update of - * the record. It corresponds to lastupdatetimestamp of the resord represented in the internal model - */ -public class Result implements Serializable { - - private List author; - - // resulttype allows subclassing results into publications | datasets | software - private String type; // resulttype - - // common fields - private Qualifier language; - - private List country; - - private List subjects; - - private String maintitle; - - private String subtitle; - - private List description; - - private String publicationdate; // dateofacceptance; - - private String publisher; - - private String embargoenddate; - - private List source; - - private List format; - - private List contributor; - - private List coverage; - - private AccessRight bestaccessright; - - private Container container;// Journal - - private List documentationUrl; // software - - private String codeRepositoryUrl; // software - - private String programmingLanguage; // software - - private List contactperson; // orp - - private List contactgroup; // orp - - private List tool; // orp - - private String size; // dataset - - private String version; // dataset - - private List geolocation; // dataset - - private String id; - - private List originalId; - - private List pid; - - private String dateofcollection; - - private Long lastupdatetimestamp; - - public Long getLastupdatetimestamp() { - return lastupdatetimestamp; - } - - public void setLastupdatetimestamp(Long lastupdatetimestamp) { - this.lastupdatetimestamp = lastupdatetimestamp; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public List getOriginalId() { - return originalId; - } - - public void setOriginalId(List originalId) { - this.originalId = originalId; - } - - public List getPid() { - return pid; - } - - public void setPid(List pid) { - this.pid = pid; - } - - public String getDateofcollection() { - return dateofcollection; - } - - public void setDateofcollection(String dateofcollection) { - this.dateofcollection = dateofcollection; - } - - public List getAuthor() { - return author; - } - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - public Container getContainer() { - return container; - } - - public void setContainer(Container container) { - this.container = container; - } - - public void setAuthor(List author) { - this.author = author; - } - - public Qualifier getLanguage() { - return language; - } - - public void setLanguage(Qualifier language) { - this.language = language; - } - - public List getCountry() { - return country; - } - - public void setCountry(List country) { - this.country = country; - } - - public List getSubjects() { - return subjects; - } - - public void setSubjects(List subjects) { - this.subjects = subjects; - } - - public String getMaintitle() { - return maintitle; - } - - public void setMaintitle(String maintitle) { - this.maintitle = maintitle; - } - - public String getSubtitle() { - return subtitle; - } - - public void setSubtitle(String subtitle) { - this.subtitle = subtitle; - } - - public List getDescription() { - return description; - } - - public void setDescription(List description) { - this.description = description; - } - - public String getPublicationdate() { - return publicationdate; - } - - public void setPublicationdate(String publicationdate) { - this.publicationdate = publicationdate; - } - - public String getPublisher() { - return publisher; - } - - public void setPublisher(String publisher) { - this.publisher = publisher; - } - - public String getEmbargoenddate() { - return embargoenddate; - } - - public void setEmbargoenddate(String embargoenddate) { - this.embargoenddate = embargoenddate; - } - - public List getSource() { - return source; - } - - public void setSource(List source) { - this.source = source; - } - - public List getFormat() { - return format; - } - - public void setFormat(List format) { - this.format = format; - } - - public List getContributor() { - return contributor; - } - - public void setContributor(List contributor) { - this.contributor = contributor; - } - - public List getCoverage() { - return coverage; - } - - public void setCoverage(List coverage) { - this.coverage = coverage; - } - - public AccessRight getBestaccessright() { - return bestaccessright; - } - - public void setBestaccessright(AccessRight bestaccessright) { - this.bestaccessright = bestaccessright; - } - - public List getDocumentationUrl() { - return documentationUrl; - } - - public void setDocumentationUrl(List documentationUrl) { - this.documentationUrl = documentationUrl; - } - - public String getCodeRepositoryUrl() { - return codeRepositoryUrl; - } - - public void setCodeRepositoryUrl(String codeRepositoryUrl) { - this.codeRepositoryUrl = codeRepositoryUrl; - } - - public String getProgrammingLanguage() { - return programmingLanguage; - } - - public void setProgrammingLanguage(String programmingLanguage) { - this.programmingLanguage = programmingLanguage; - } - - public List getContactperson() { - return contactperson; - } - - public void setContactperson(List contactperson) { - this.contactperson = contactperson; - } - - public List getContactgroup() { - return contactgroup; - } - - public void setContactgroup(List contactgroup) { - this.contactgroup = contactgroup; - } - - public List getTool() { - return tool; - } - - public void setTool(List tool) { - this.tool = tool; - } - - public String getSize() { - return size; - } - - public void setSize(String size) { - this.size = size; - } - - public String getVersion() { - return version; - } - - public void setVersion(String version) { - this.version = version; - } - - public List getGeolocation() { - return geolocation; - } - - public void setGeolocation(List geolocation) { - this.geolocation = geolocation; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Subject.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Subject.java deleted file mode 100644 index 5c4bbef3c..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Subject.java +++ /dev/null @@ -1,34 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf; - -import java.io.Serializable; - -/** - * To represent keywords associated to the result. It has two parameters: - subject of type - * eu.dnetlib.dhp.schema.dump.oaf.ControlledField to describe the subject. It mapped as: - schema it corresponds to - * qualifier.classid of the dumped subject - value it corresponds to the subject value - provenance of type - * eu.dnetlib.dhp.schema.dump.oaf.Provenance to represent the provenance of the subject. It is dumped only if dataInfo - * is not null. In this case: - provenance corresponds to dataInfo.provenanceaction.classname - trust corresponds to - * dataInfo.trust - */ -public class Subject implements Serializable { - private ControlledField subject; - private Provenance provenance; - - public ControlledField getSubject() { - return subject; - } - - public void setSubject(ControlledField subject) { - this.subject = subject; - } - - public Provenance getProvenance() { - return provenance; - } - - public void setProvenance(Provenance provenance) { - this.provenance = provenance; - } - -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/CommunityInstance.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/CommunityInstance.java deleted file mode 100644 index 6a605d742..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/CommunityInstance.java +++ /dev/null @@ -1,36 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.community; - -import eu.dnetlib.dhp.schema.dump.oaf.Instance; -import eu.dnetlib.dhp.schema.dump.oaf.KeyValue; - -/** - * It extends eu.dnetlib.dhp.dump.oaf.Instance with values related to the community dump. In the Result dump this - * information is not present because it is dumped as a set of relations between the result and the datasource. - - * hostedby of type eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the - * instance can be viewed or downloaded. It is mapped against the hostedby parameter of the instance to be dumped and - - * key corresponds to hostedby.key - value corresponds to hostedby.value - collectedfrom of type - * eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance has been - * collected. It is mapped against the collectedfrom parameter of the instance to be dumped and - key corresponds to - * collectedfrom.key - value corresponds to collectedfrom.value - */ -public class CommunityInstance extends Instance { - private KeyValue hostedby; - private KeyValue collectedfrom; - - public KeyValue getHostedby() { - return hostedby; - } - - public void setHostedby(KeyValue hostedby) { - this.hostedby = hostedby; - } - - public KeyValue getCollectedfrom() { - return collectedfrom; - } - - public void setCollectedfrom(KeyValue collectedfrom) { - this.collectedfrom = collectedfrom; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/CommunityResult.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/CommunityResult.java deleted file mode 100644 index 690a53706..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/CommunityResult.java +++ /dev/null @@ -1,63 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.community; - -import java.util.List; - -import eu.dnetlib.dhp.schema.dump.oaf.KeyValue; -import eu.dnetlib.dhp.schema.dump.oaf.Result; - -/** - * extends eu.dnetlib.dhp.schema.dump.oaf.Result with the following parameters: - projects of type - * List to store the list of projects related to the result. The - * information is added after the result is mapped to the external model - context of type - * List to store information about the RC RI related to the result. - * For each context in the result represented in the internal model one context in the external model is produced - - * collectedfrom of type List to store information about the sources from which - * the record has been collected. For each collectedfrom in the result represented in the internal model one - * collectedfrom in the external model is produced - instance of type - * List to store all the instances associated to the result. - * It corresponds to the same parameter in the result represented in the internal model - */ -public class CommunityResult extends Result { - - private List projects; - - private List context; - - protected List collectedfrom; - - private List instance; - - public List getInstance() { - return instance; - } - - public void setInstance(List instance) { - this.instance = instance; - } - - public List getCollectedfrom() { - return collectedfrom; - } - - public void setCollectedfrom(List collectedfrom) { - this.collectedfrom = collectedfrom; - } - - public List getProjects() { - return projects; - } - - public void setProjects(List projects) { - this.projects = projects; - } - - public List getContext() { - return context; - } - - public void setContext(List context) { - this.context = context; - } - -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Context.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Context.java deleted file mode 100644 index 3ad692b30..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Context.java +++ /dev/null @@ -1,40 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.community; - -import java.util.List; -import java.util.Objects; - -import eu.dnetlib.dhp.schema.dump.oaf.Provenance; -import eu.dnetlib.dhp.schema.dump.oaf.Qualifier; - -/** - * Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with - * OpenAIRE. It extend eu.dnetlib.dhp.shema.dump.oaf.Qualifier with a parameter provenance of type - * List to store the provenances of the association between the result and - * the RC/RI. The values for this element correspond to: - code: it corresponds to the id of the context in the result - * to be mapped. If the context id refers to a RC/RI and contains '::' only the part of the id before the first "::" - * will be used as value for code - label it corresponds to the label associated to the id. The information id taken - * from the profile of the RC/RI - provenance it is set only if the dataInfo associated to the contenxt element of the - * result to be dumped is not null. For each dataInfo one instance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance is - * instantiated if the element datainfo.provenanceaction is not null. In this case - provenance corresponds to - * dataInfo.provenanceaction.classname - trust corresponds to dataInfo.trust - */ -public class Context extends Qualifier { - private List provenance; - - public List getProvenance() { - return provenance; - } - - public void setProvenance(List provenance) { - this.provenance = provenance; - } - - @Override - public int hashCode() { - String provenance = new String(); - this.provenance.forEach(p -> provenance.concat(p.toString())); - return Objects.hash(getCode(), getLabel(), provenance); - } - -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Funder.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Funder.java deleted file mode 100644 index adb41634a..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Funder.java +++ /dev/null @@ -1,23 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.community; - -import java.io.Serializable; - -/** - * To store information about the funder funding the project related to the result. It has the following parameters: - - * shortName of type String to store the funder short name (e.c. AKA). - name of type String to store the funder name - * (e.c. Akademy of Finland) - fundingStream of type String to store the funding stream - jurisdiction of type String to - * store the jurisdiction of the funder - */ -public class Funder extends eu.dnetlib.dhp.schema.dump.oaf.Funder { - - private String fundingStream; - - public String getFundingStream() { - return fundingStream; - } - - public void setFundingStream(String fundingStream) { - this.fundingStream = fundingStream; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Project.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Project.java deleted file mode 100644 index 030b565be..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Project.java +++ /dev/null @@ -1,47 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.community; - -import java.io.Serializable; - -import eu.dnetlib.dhp.schema.dump.oaf.Provenance; - -/** - * To store information about the project related to the result. This information is not directly mapped from the result - * represented in the internal model because it is not there. The mapped result will be enriched with project - * information derived by relation between results and projects. Project extends eu.dnetlib.dhp.schema.dump.oaf.Project - * with the following parameters: - funder of type eu.dnetlib.dhp.schema.dump.oaf.community.Funder to store information - * about the funder funding the project - provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store - * information about the. provenance of the association between the result and the project - */ -public class Project extends eu.dnetlib.dhp.schema.dump.oaf.Project { - - private Funder funder; - - private Provenance provenance; - - public Provenance getProvenance() { - return provenance; - } - - public void setProvenance(Provenance provenance) { - this.provenance = provenance; - } - - public Funder getFunder() { - return funder; - } - - public void setFunder(Funder funders) { - this.funder = funders; - } - - public static Project newInstance(String id, String code, String acronym, String title, Funder funder) { - Project project = new Project(); - project.setAcronym(acronym); - project.setCode(code); - project.setFunder(funder); - project.setId(id); - project.setTitle(title); - return project; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Constants.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Constants.java deleted file mode 100644 index 35cc60c1c..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Constants.java +++ /dev/null @@ -1,21 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.graph; - -import java.io.Serializable; - -public class Constants implements Serializable { - // collectedFrom va con isProvidedBy -> becco da ModelSupport - - public static final String HOSTED_BY = "isHostedBy"; - public static final String HOSTS = "hosts"; - - // community result uso isrelatedto - - public static final String RESULT_ENTITY = "result"; - public static final String DATASOURCE_ENTITY = "datasource"; - public static final String CONTEXT_ENTITY = "context"; - - public static final String CONTEXT_ID = "60"; - public static final String CONTEXT_NS_PREFIX = "context____"; - -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Datasource.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Datasource.java deleted file mode 100644 index 6b2b7b1ab..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Datasource.java +++ /dev/null @@ -1,316 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.graph; - -import java.io.Serializable; -import java.util.List; - -import eu.dnetlib.dhp.schema.dump.oaf.Container; -import eu.dnetlib.dhp.schema.dump.oaf.ControlledField; -import eu.dnetlib.dhp.schema.dump.oaf.KeyValue; - -/** - * To store information about the datasource OpenAIRE collects information from. It contains the following parameters: - - * id of type String to store the OpenAIRE id for the datasource. It corresponds to the parameter id of the datasource - * represented in the internal model - originalId of type List to store the list of original ids associated to - * the datasource. It corresponds to the parameter originalId of the datasource represented in the internal model. The - * null values are filtered out - pid of type List to store the - * persistent identifiers for the datasource. For each pid in the datasource represented in the internal model one pid - * in the external model is produced as : - schema corresponds to pid.qualifier.classid of the datasource represented in - * the internal model - value corresponds to pid.value of the datasource represented in the internal model - - * datasourceType of type eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the datasource type (e.g. - * pubsrepository::institutional, Institutional Repository) as in the dnet vocabulary dnet:datasource_typologies. It - * corresponds to datasourcetype of the datasource represented in the internal model and : - code corresponds to - * datasourcetype.classid - value corresponds to datasourcetype.classname - openairecompatibility of type String to - * store information about the OpenAIRE compatibility of the ingested results (which guidelines they are compliant to). - * It corresponds to openairecompatibility.classname of the datasource represented in the internal model - officialname - * of type Sgtring to store the official name of the datasource. It correspond to officialname.value of the datasource - * represented in the internal model - englishname of type String to store the English name of the datasource. It - * corresponds to englishname.value of the datasource represented in the internal model - websiteurl of type String to - * store the URL of the website of the datasource. It corresponds to websiteurl.value of the datasource represented in - * the internal model - logourl of type String to store the URL of the logo for the datasource. It corresponds to - * logourl.value of the datasource represented in the internal model - dateofvalidation of type String to store the data - * of validation against the guidelines for the datasource records. It corresponds to dateofvalidation.value of the - * datasource represented in the internal model - description of type String to store the description for the - * datasource. It corresponds to description.value of the datasource represented in the internal model - */ -public class Datasource implements Serializable { - - private String id; // string - - private List originalId; // list string - - private List pid; // list - - private ControlledField datasourcetype; // value - - private String openairecompatibility; // value - - private String officialname; // string - - private String englishname; // string - - private String websiteurl; // string - - private String logourl; // string - - private String dateofvalidation; // string - - private String description; // description - - private List subjects; // List - - // opendoar specific fields (od*) - - private List languages; // odlanguages List - - private List contenttypes; // odcontent types List - - // re3data fields - private String releasestartdate; // string - - private String releaseenddate; // string - - private String missionstatementurl; // string - - // {open, restricted or closed} - private String accessrights; // databaseaccesstype string - - // {open, restricted or closed} - private String uploadrights; // datauploadtype string - - // {feeRequired, registration, other} - private String databaseaccessrestriction; // string - - // {feeRequired, registration, other} - private String datauploadrestriction; // string - - private Boolean versioning; // boolean - - private String citationguidelineurl; // string - - // {yes, no, uknown} - - private String pidsystems; // string - - private String certificates; // string - - private List policies; // - - private Container journal; // issn etc del Journal - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public List getOriginalId() { - return originalId; - } - - public void setOriginalId(List originalId) { - this.originalId = originalId; - } - - public List getPid() { - return pid; - } - - public void setPid(List pid) { - this.pid = pid; - } - - public ControlledField getDatasourcetype() { - return datasourcetype; - } - - public void setDatasourcetype(ControlledField datasourcetype) { - this.datasourcetype = datasourcetype; - } - - public String getOpenairecompatibility() { - return openairecompatibility; - } - - public void setOpenairecompatibility(String openairecompatibility) { - this.openairecompatibility = openairecompatibility; - } - - public String getOfficialname() { - return officialname; - } - - public void setOfficialname(String officialname) { - this.officialname = officialname; - } - - public String getEnglishname() { - return englishname; - } - - public void setEnglishname(String englishname) { - this.englishname = englishname; - } - - public String getWebsiteurl() { - return websiteurl; - } - - public void setWebsiteurl(String websiteurl) { - this.websiteurl = websiteurl; - } - - public String getLogourl() { - return logourl; - } - - public void setLogourl(String logourl) { - this.logourl = logourl; - } - - public String getDateofvalidation() { - return dateofvalidation; - } - - public void setDateofvalidation(String dateofvalidation) { - this.dateofvalidation = dateofvalidation; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } - - public List getSubjects() { - return subjects; - } - - public void setSubjects(List subjects) { - this.subjects = subjects; - } - - public List getLanguages() { - return languages; - } - - public void setLanguages(List languages) { - this.languages = languages; - } - - public List getContenttypes() { - return contenttypes; - } - - public void setContenttypes(List contenttypes) { - this.contenttypes = contenttypes; - } - - public String getReleasestartdate() { - return releasestartdate; - } - - public void setReleasestartdate(String releasestartdate) { - this.releasestartdate = releasestartdate; - } - - public String getReleaseenddate() { - return releaseenddate; - } - - public void setReleaseenddate(String releaseenddate) { - this.releaseenddate = releaseenddate; - } - - public String getMissionstatementurl() { - return missionstatementurl; - } - - public void setMissionstatementurl(String missionstatementurl) { - this.missionstatementurl = missionstatementurl; - } - - public String getAccessrights() { - return accessrights; - } - - public void setAccessrights(String accessrights) { - this.accessrights = accessrights; - } - - public String getUploadrights() { - return uploadrights; - } - - public void setUploadrights(String uploadrights) { - this.uploadrights = uploadrights; - } - - public String getDatabaseaccessrestriction() { - return databaseaccessrestriction; - } - - public void setDatabaseaccessrestriction(String databaseaccessrestriction) { - this.databaseaccessrestriction = databaseaccessrestriction; - } - - public String getDatauploadrestriction() { - return datauploadrestriction; - } - - public void setDatauploadrestriction(String datauploadrestriction) { - this.datauploadrestriction = datauploadrestriction; - } - - public Boolean getVersioning() { - return versioning; - } - - public void setVersioning(Boolean versioning) { - this.versioning = versioning; - } - - public String getCitationguidelineurl() { - return citationguidelineurl; - } - - public void setCitationguidelineurl(String citationguidelineurl) { - this.citationguidelineurl = citationguidelineurl; - } - - public String getPidsystems() { - return pidsystems; - } - - public void setPidsystems(String pidsystems) { - this.pidsystems = pidsystems; - } - - public String getCertificates() { - return certificates; - } - - public void setCertificates(String certificates) { - this.certificates = certificates; - } - - public List getPolicies() { - return policies; - } - - public void setPolicies(List policiesr3) { - this.policies = policiesr3; - } - - public Container getJournal() { - return journal; - } - - public void setJournal(Container journal) { - this.journal = journal; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Funder.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Funder.java deleted file mode 100644 index 3b298c81d..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Funder.java +++ /dev/null @@ -1,22 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.graph; - -import java.io.Serializable; - -/** - * To store information about the funder funding the project related to the result. It extends - * eu.dnetlib.dhp.schema.dump.oaf.Funder with the following parameter: - - private - * eu.dnetdlib.dhp.schema.dump.oaf.graph.Fundings funding_stream to store the fundingstream - */ -public class Funder extends eu.dnetlib.dhp.schema.dump.oaf.Funder { - - private Fundings funding_stream; - - public Fundings getFunding_stream() { - return funding_stream; - } - - public void setFunding_stream(Fundings funding_stream) { - this.funding_stream = funding_stream; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Fundings.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Fundings.java deleted file mode 100644 index a74c34778..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Fundings.java +++ /dev/null @@ -1,35 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.graph; - -import java.io.Serializable; - -/** - * To store inforamtion about the funding stream. It has two parameters: - private String id to store the id of the - * fundings stream. The id is created by appending the shortname of the funder to the name of each level in the xml - * representing the fundng stream. For example: if the funder is the European Commission, the funding level 0 name is - * FP7, the funding level 1 name is SP3 and the funding level 2 name is PEOPLE then the id will be: EC::FP7::SP3::PEOPLE - * - private String description to describe the funding stream. It is created by concatenating the description of each - * funding level so for the example above the description would be: SEVENTH FRAMEWORK PROGRAMME - SP3-People - - * Marie-Curie Actions - */ -public class Fundings implements Serializable { - - private String id; - private String description; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Granted.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Granted.java deleted file mode 100644 index 94ace55aa..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Granted.java +++ /dev/null @@ -1,55 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.graph; - -import java.io.Serializable; -import java.util.Optional; - -/** - * To describe the funded amount. It has the following parameters: - private String currency to store the currency of - * the fund - private float totalcost to store the total cost of the project - private float fundedamount to store the - * funded amount by the funder - */ -public class Granted implements Serializable { - private String currency; - private float totalcost; - private float fundedamount; - - public String getCurrency() { - return currency; - } - - public void setCurrency(String currency) { - this.currency = currency; - } - - public float getTotalcost() { - return totalcost; - } - - public void setTotalcost(float totalcost) { - this.totalcost = totalcost; - } - - public float getFundedamount() { - return fundedamount; - } - - public void setFundedamount(float fundedamount) { - this.fundedamount = fundedamount; - } - - public static Granted newInstance(String currency, float totalcost, float fundedamount) { - Granted granted = new Granted(); - granted.currency = currency; - granted.totalcost = totalcost; - granted.fundedamount = fundedamount; - return granted; - } - - public static Granted newInstance(String currency, float fundedamount) { - Granted granted = new Granted(); - granted.currency = currency; - granted.fundedamount = fundedamount; - return granted; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/GraphResult.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/GraphResult.java deleted file mode 100644 index 1675f9ec5..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/GraphResult.java +++ /dev/null @@ -1,24 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.graph; - -import java.util.List; - -import eu.dnetlib.dhp.schema.dump.oaf.Instance; -import eu.dnetlib.dhp.schema.dump.oaf.Result; - -/** - * It extends the eu.dnetlib.dhp.schema.dump.oaf.Result with - instance of type - * List to store all the instances associated to the result. It corresponds to - * the same parameter in the result represented in the internal model - */ -public class GraphResult extends Result { - private List instance; - - public List getInstance() { - return instance; - } - - public void setInstance(List instance) { - this.instance = instance; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/H2020Classification.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/H2020Classification.java deleted file mode 100644 index 4a61663b8..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/H2020Classification.java +++ /dev/null @@ -1,82 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.graph; - -import java.io.Serializable; - -/** - * To store information about the classification for the project. The classification depends on the programme. For example - * H2020-EU.3.4.5.3 can be classified as - * H2020-EU.3. => Societal Challenges (level1) - * H2020-EU.3.4. => Transport (level2) - * H2020-EU.3.4.5. => CLEANSKY2 (level3) - * H2020-EU.3.4.5.3. => IADP Fast Rotorcraft (level4) - * - * We decided to explicitly represent up to three levels in the classification. - * - * H2020Classification has the following parameters: - * - private Programme programme to store the information about the programme related to this classification - * - private String level1 to store the information about the level 1 of the classification (Priority or Pillar of the EC) - * - private String level2 to store the information about the level2 af the classification (Objectives (?)) - * - private String level3 to store the information about the level3 of the classification - * - private String classification to store the entire classification related to the programme - */ -public class H2020Classification implements Serializable { - private Programme programme; - - private String level1; - private String level2; - private String level3; - - private String classification; - - public Programme getProgramme() { - return programme; - } - - public void setProgramme(Programme programme) { - this.programme = programme; - } - - public String getLevel1() { - return level1; - } - - public void setLevel1(String level1) { - this.level1 = level1; - } - - public String getLevel2() { - return level2; - } - - public void setLevel2(String level2) { - this.level2 = level2; - } - - public String getLevel3() { - return level3; - } - - public void setLevel3(String level3) { - this.level3 = level3; - } - - public String getClassification() { - return classification; - } - - public void setClassification(String classification) { - this.classification = classification; - } - - public static H2020Classification newInstance(String programme_code, String programme_description, String level1, - String level2, String level3, String classification) { - H2020Classification h2020classification = new H2020Classification(); - h2020classification.programme = Programme.newInstance(programme_code, programme_description); - h2020classification.level1 = level1; - h2020classification.level2 = level2; - h2020classification.level3 = level3; - h2020classification.classification = classification; - return h2020classification; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Node.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Node.java deleted file mode 100644 index 00f1a29bc..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Node.java +++ /dev/null @@ -1,38 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.graph; - -import java.io.Serializable; - -/** - * To represent the generic node in a relation. It has the following parameters: - private String id the openaire id of - * the entity in the relation - private String type the type of the entity in the relation. Consider the generic - * relation between a Result R and a Project P, the node representing R will have as id the id of R and as type result, - * while the node representing the project will have as id the id of the project and as type project - */ -public class Node implements Serializable { - private String id; - private String type; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - public static Node newInstance(String id, String type) { - Node node = new Node(); - node.id = id; - node.type = type; - return node; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Organization.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Organization.java deleted file mode 100644 index 2407c9cfc..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Organization.java +++ /dev/null @@ -1,86 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.graph; - -import java.io.Serializable; -import java.util.List; - -import eu.dnetlib.dhp.schema.dump.oaf.ControlledField; -import eu.dnetlib.dhp.schema.dump.oaf.Country; -import eu.dnetlib.dhp.schema.dump.oaf.KeyValue; -import eu.dnetlib.dhp.schema.dump.oaf.Qualifier; -import eu.dnetlib.dhp.schema.dump.oaf.community.Project; - -/** - * To represent the generic organizaiton. It has the following parameters: - private String legalshortname to store the - * legalshortname of the organizaiton - private String legalname to store the legal name of the organization - private - * String websiteurl to store the websiteurl of the organization - private List alternativenames to store the - * alternative names of the organization - private Qualifier country to store the country of the organization - private - * String id to store the id of the organization - private List pid to store the list of pids for the - * organization - */ -public class Organization implements Serializable { - private String legalshortname; - private String legalname; - private String websiteurl; - private List alternativenames; - private Qualifier country; - private String id; - private List pid; - - public String getLegalshortname() { - return legalshortname; - } - - public void setLegalshortname(String legalshortname) { - this.legalshortname = legalshortname; - } - - public String getLegalname() { - return legalname; - } - - public void setLegalname(String legalname) { - this.legalname = legalname; - } - - public String getWebsiteurl() { - return websiteurl; - } - - public void setWebsiteurl(String websiteurl) { - this.websiteurl = websiteurl; - } - - public List getAlternativenames() { - return alternativenames; - } - - public void setAlternativenames(List alternativenames) { - this.alternativenames = alternativenames; - } - - public Qualifier getCountry() { - return country; - } - - public void setCountry(Qualifier country) { - this.country = country; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public List getPid() { - return pid; - } - - public void setPid(List pid) { - this.pid = pid; - } - -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Programme.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Programme.java deleted file mode 100644 index 9892790e3..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Programme.java +++ /dev/null @@ -1,36 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.graph; - -import java.io.Serializable; - -/** - * To store information about the ec programme for the project. It has the following parameters: - private String code - * to store the code of the programme - private String description to store the description of the programme - */ -public class Programme implements Serializable { - private String code; - private String description; - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } - - public static Programme newInstance(String code, String description) { - Programme p = new Programme(); - p.code = code; - p.description = description; - return p; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Project.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Project.java deleted file mode 100644 index 612be9d25..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Project.java +++ /dev/null @@ -1,192 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.graph; - -import java.io.Serializable; -import java.util.List; - -/** - * This is the class representing the Project in the model used for the dumps of the whole graph. At the moment the dump - * of the Projects differs from the other dumps because we do not create relations between Funders (Organization) and - * Projects but we put the information about the Funder within the Project representation. We also removed the - * collected from element from the Project. No relation between the Project and the Datasource entity from which it is - * collected will be created. We will never create relations between Project and Datasource. In case some relation will - * be extracted from the Project they will refer the Funder and will be of type ( organization -> funds -> project, - * project -> isFundedBy -> organization) We also removed the duration parameter because the most of times it is set to - * 0. It has the following parameters: - * - private String id to store the id of the project (OpenAIRE id) - * - private String websiteurl to store the websiteurl of the project - * - private String code to store the grant agreement of the project - * - private String acronym to store the acronym of the project - * - private String title to store the tile of the project - * - private String startdate to store the start date - * - private String enddate to store the end date - * - private String callidentifier to store the call indentifier - * - private String keywords to store the keywords - * - private boolean openaccessmandateforpublications to store if the project must accomplish to the open access mandate - * for publications. This value will be set to true if one of the field in the project represented in the internal model - * is set to true - * - private boolean openaccessmandatefordataset to store if the project must accomplish to the open access mandate for - * dataset. It is set to the value in the corresponding filed of the project represented in the internal model - * - private List subject to store the list of subjects of the project - * - private List funding to store the list of funder of the project - * - private String summary to store the summary of the project - * - private Granted granted to store the granted amount - * - private List h2020programme to store the list of programmes the project is related to - */ - -public class Project implements Serializable { - private String id; - - private String websiteurl; - private String code; - private String acronym; - private String title; - private String startdate; - - private String enddate; - - private String callidentifier; - - private String keywords; - - private boolean openaccessmandateforpublications; - - private boolean openaccessmandatefordataset; - private List subject; - - private List funding; - - private String summary; - - private Granted granted; - - private List h2020programme; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getWebsiteurl() { - return websiteurl; - } - - public void setWebsiteurl(String websiteurl) { - this.websiteurl = websiteurl; - } - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getAcronym() { - return acronym; - } - - public void setAcronym(String acronym) { - this.acronym = acronym; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getStartdate() { - return startdate; - } - - public void setStartdate(String startdate) { - this.startdate = startdate; - } - - public String getEnddate() { - return enddate; - } - - public void setEnddate(String enddate) { - this.enddate = enddate; - } - - public String getCallidentifier() { - return callidentifier; - } - - public void setCallidentifier(String callidentifier) { - this.callidentifier = callidentifier; - } - - public String getKeywords() { - return keywords; - } - - public void setKeywords(String keywords) { - this.keywords = keywords; - } - - public boolean isOpenaccessmandateforpublications() { - return openaccessmandateforpublications; - } - - public void setOpenaccessmandateforpublications(boolean openaccessmandateforpublications) { - this.openaccessmandateforpublications = openaccessmandateforpublications; - } - - public boolean isOpenaccessmandatefordataset() { - return openaccessmandatefordataset; - } - - public void setOpenaccessmandatefordataset(boolean openaccessmandatefordataset) { - this.openaccessmandatefordataset = openaccessmandatefordataset; - } - - public List getSubject() { - return subject; - } - - public void setSubject(List subject) { - this.subject = subject; - } - - public List getFunding() { - return funding; - } - - public void setFunding(List funding) { - this.funding = funding; - } - - public String getSummary() { - return summary; - } - - public void setSummary(String summary) { - this.summary = summary; - } - - public Granted getGranted() { - return granted; - } - - public void setGranted(Granted granted) { - this.granted = granted; - } - - public List getH2020programme() { - return h2020programme; - } - - public void setH2020programme(List h2020programme) { - this.h2020programme = h2020programme; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/RelType.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/RelType.java deleted file mode 100644 index 629b30ee4..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/RelType.java +++ /dev/null @@ -1,39 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.graph; - -import java.io.Serializable; - -/** - * To represent the semantics of the generic relation between two entities. It has the following parameters: - private - * String name to store the semantics of the relation (i.e. isAuthorInstitutionOf). It corresponds to the relclass - * parameter in the relation represented in the internal model represented in the internal model - private String type - * to store the type of the relation (i.e. affiliation). It corresponds to the subreltype parameter of the relation - * represented in theinternal model - */ -public class RelType implements Serializable { - private String name; // relclass - private String type; // subreltype - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - public static RelType newInstance(String name, String type) { - RelType rel = new RelType(); - rel.name = name; - rel.type = type; - return rel; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Relation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Relation.java deleted file mode 100644 index e2b126e63..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Relation.java +++ /dev/null @@ -1,67 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.graph; - -import java.io.Serializable; -import java.util.Objects; - -import eu.dnetlib.dhp.schema.dump.oaf.Provenance; - -/** - * To represent the gereric relation between two entities. It has the following parameters: - private Node source to - * represent the entity source of the relation - private Node target to represent the entity target of the relation - - * private RelType reltype to represent the semantics of the relation - private Provenance provenance to represent the - * provenance of the relation - */ -public class Relation implements Serializable { - private Node source; - private Node target; - private RelType reltype; - private Provenance provenance; - - public Node getSource() { - return source; - } - - public void setSource(Node source) { - this.source = source; - } - - public Node getTarget() { - return target; - } - - public void setTarget(Node target) { - this.target = target; - } - - public RelType getReltype() { - return reltype; - } - - public void setReltype(RelType reltype) { - this.reltype = reltype; - } - - public Provenance getProvenance() { - return provenance; - } - - public void setProvenance(Provenance provenance) { - this.provenance = provenance; - } - - @Override - public int hashCode() { - - return Objects.hash(source.getId(), target.getId(), reltype.getType() + ":" + reltype.getName()); - } - - public static Relation newInstance(Node source, Node target, RelType reltype, Provenance provenance) { - Relation relation = new Relation(); - relation.source = source; - relation.target = target; - relation.reltype = reltype; - relation.provenance = provenance; - return relation; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/ResearchCommunity.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/ResearchCommunity.java deleted file mode 100644 index 026042ce9..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/ResearchCommunity.java +++ /dev/null @@ -1,20 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.graph; - -import java.util.List; - -/** - * To represent RC entities. It extends eu.dnetlib.dhp.dump.oaf.grap.ResearchInitiative by adding the parameter subject - * to store the list of subjects related to the community - */ -public class ResearchCommunity extends ResearchInitiative { - private List subject; - - public List getSubject() { - return subject; - } - - public void setSubject(List subject) { - this.subject = subject; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/ResearchInitiative.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/ResearchInitiative.java deleted file mode 100644 index ad4ad8877..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/ResearchInitiative.java +++ /dev/null @@ -1,75 +0,0 @@ - -package eu.dnetlib.dhp.schema.dump.oaf.graph; - -import java.io.Serializable; - -/** - * To represent entity of type RC/RI. It has the following parameters, which are mostly derived by the profile - * - private - * String id to store the openaire id for the entity. Is has as code 00 and will be created as - * 00|context_____::md5(originalId) private - * String originalId to store the id of the context as provided in the profile - * (i.e. mes) - * - private String name to store the name of the context (got from the label attribute in the context - * definition) - * - private String type to store the type of the context (i.e.: research initiative or research community) - * - private String description to store the description of the context as given in the profile - * -private String - * zenodo_community to store the zenodo community associated to the context (main zenodo community) - */ -public class ResearchInitiative implements Serializable { - private String id; // openaireId - private String originalId; // context id - private String name; // context name - private String type; // context type: research initiative or research community - private String description; - private String zenodo_community; - - public String getZenodo_community() { - return zenodo_community; - } - - public void setZenodo_community(String zenodo_community) { - this.zenodo_community = zenodo_community; - } - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getName() { - return name; - } - - public void setName(String label) { - this.name = label; - } - - public String getOriginalId() { - return originalId; - } - - public void setOriginalId(String originalId) { - this.originalId = originalId; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Author.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Author.java deleted file mode 100644 index 231fb1e60..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Author.java +++ /dev/null @@ -1,89 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.*; - -public class Author implements Serializable { - - private String fullname; - - private String name; - - private String surname; - - private Integer rank; - - private List pid; - - private List> affiliation; - - public String getFullname() { - return fullname; - } - - public void setFullname(String fullname) { - this.fullname = fullname; - } - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getSurname() { - return surname; - } - - public void setSurname(String surname) { - this.surname = surname; - } - - public Integer getRank() { - return rank; - } - - public void setRank(Integer rank) { - this.rank = rank; - } - - public List getPid() { - return pid; - } - - public void setPid(List pid) { - this.pid = pid; - } - - public List> getAffiliation() { - return affiliation; - } - - public void setAffiliation(List> affiliation) { - this.affiliation = affiliation; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - Author author = (Author) o; - return Objects.equals(fullname, author.fullname) - && Objects.equals(name, author.name) - && Objects.equals(surname, author.surname) - && Objects.equals(rank, author.rank) - && Objects.equals(pid, author.pid) - && Objects.equals(affiliation, author.affiliation); - } - - @Override - public int hashCode() { - return Objects.hash(fullname, name, surname, rank, pid, affiliation); - } - -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Context.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Context.java deleted file mode 100644 index 57912c463..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Context.java +++ /dev/null @@ -1,46 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.List; - -public class Context implements Serializable { - private String id; - - private List dataInfo; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public List getDataInfo() { - return dataInfo; - } - - public void setDataInfo(List dataInfo) { - this.dataInfo = dataInfo; - } - - @Override - public int hashCode() { - return id == null ? 0 : id.hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - - Context other = (Context) obj; - - return id.equals(other.getId()); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Country.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Country.java deleted file mode 100644 index e25fdcade..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Country.java +++ /dev/null @@ -1,34 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.util.Objects; - -public class Country extends Qualifier { - - private DataInfo dataInfo; - - public DataInfo getDataInfo() { - return dataInfo; - } - - public void setDataInfo(DataInfo dataInfo) { - this.dataInfo = dataInfo; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - if (!super.equals(o)) - return false; - Country country = (Country) o; - return Objects.equals(dataInfo, country.dataInfo); - } - - @Override - public int hashCode() { - return Objects.hash(super.hashCode(), dataInfo); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java deleted file mode 100644 index 9d572ee30..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/DataInfo.java +++ /dev/null @@ -1,85 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.Objects; - -public class DataInfo implements Serializable { - - private Boolean invisible = false; - private Boolean inferred; - private Boolean deletedbyinference = false; - private String trust; - private String inferenceprovenance; - private Qualifier provenanceaction; - - public Boolean getInvisible() { - return invisible; - } - - public void setInvisible(Boolean invisible) { - this.invisible = invisible; - } - - public Boolean getInferred() { - return inferred; - } - - public void setInferred(Boolean inferred) { - this.inferred = inferred; - } - - public Boolean getDeletedbyinference() { - return deletedbyinference; - } - - public void setDeletedbyinference(Boolean deletedbyinference) { - this.deletedbyinference = deletedbyinference; - } - - public String getTrust() { - return trust; - } - - public void setTrust(String trust) { - this.trust = trust; - } - - public String getInferenceprovenance() { - return inferenceprovenance; - } - - public void setInferenceprovenance(String inferenceprovenance) { - this.inferenceprovenance = inferenceprovenance; - } - - public Qualifier getProvenanceaction() { - return provenanceaction; - } - - public void setProvenanceaction(Qualifier provenanceaction) { - this.provenanceaction = provenanceaction; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - DataInfo dataInfo = (DataInfo) o; - return Objects.equals(invisible, dataInfo.invisible) - && Objects.equals(inferred, dataInfo.inferred) - && Objects.equals(deletedbyinference, dataInfo.deletedbyinference) - && Objects.equals(trust, dataInfo.trust) - && Objects.equals(inferenceprovenance, dataInfo.inferenceprovenance) - && Objects.equals(provenanceaction, dataInfo.provenanceaction); - } - - @Override - public int hashCode() { - return Objects - .hash( - invisible, inferred, deletedbyinference, trust, inferenceprovenance, provenanceaction); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Dataset.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Dataset.java deleted file mode 100644 index b5587c6b7..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Dataset.java +++ /dev/null @@ -1,116 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.List; - -import eu.dnetlib.dhp.schema.common.ModelConstants; - -public class Dataset extends Result implements Serializable { - - private Field storagedate; - - // candidate for removal - private Field device; - - private Field size; - - private Field version; - - private Field lastmetadataupdate; - - private Field metadataversionnumber; - - private List geolocation; - - public Dataset() { - setResulttype(ModelConstants.DATASET_DEFAULT_RESULTTYPE); - } - - public Field getStoragedate() { - return storagedate; - } - - public void setStoragedate(Field storagedate) { - this.storagedate = storagedate; - } - - public Field getDevice() { - return device; - } - - public void setDevice(Field device) { - this.device = device; - } - - public Field getSize() { - return size; - } - - public void setSize(Field size) { - this.size = size; - } - - public Field getVersion() { - return version; - } - - public void setVersion(Field version) { - this.version = version; - } - - public Field getLastmetadataupdate() { - return lastmetadataupdate; - } - - public void setLastmetadataupdate(Field lastmetadataupdate) { - this.lastmetadataupdate = lastmetadataupdate; - } - - public Field getMetadataversionnumber() { - return metadataversionnumber; - } - - public void setMetadataversionnumber(Field metadataversionnumber) { - this.metadataversionnumber = metadataversionnumber; - } - - public List getGeolocation() { - return geolocation; - } - - public void setGeolocation(List geolocation) { - this.geolocation = geolocation; - } - - @Override - public void mergeFrom(OafEntity e) { - super.mergeFrom(e); - - if (!Dataset.class.isAssignableFrom(e.getClass())) { - return; - } - - final Dataset d = (Dataset) e; - - storagedate = d.getStoragedate() != null && compareTrust(this, e) < 0 ? d.getStoragedate() : storagedate; - - device = d.getDevice() != null && compareTrust(this, e) < 0 ? d.getDevice() : device; - - size = d.getSize() != null && compareTrust(this, e) < 0 ? d.getSize() : size; - - version = d.getVersion() != null && compareTrust(this, e) < 0 ? d.getVersion() : version; - - lastmetadataupdate = d.getLastmetadataupdate() != null && compareTrust(this, e) < 0 - ? d.getLastmetadataupdate() - : lastmetadataupdate; - - metadataversionnumber = d.getMetadataversionnumber() != null && compareTrust(this, e) < 0 - ? d.getMetadataversionnumber() - : metadataversionnumber; - - geolocation = mergeLists(geolocation, d.getGeolocation()); - - mergeOAFDataInfo(d); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Datasource.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Datasource.java deleted file mode 100644 index 721798206..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Datasource.java +++ /dev/null @@ -1,472 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.List; - -public class Datasource extends OafEntity implements Serializable { - - private Qualifier datasourcetype; - - private Qualifier openairecompatibility; - - private Field officialname; - - private Field englishname; - - private Field websiteurl; - - private Field logourl; - - private Field contactemail; - - private Field namespaceprefix; - - private Field latitude; - - private Field longitude; - - private Field dateofvalidation; - - private Field description; - - private List subjects; - - // opendoar specific fields (od*) - private Field odnumberofitems; - - private Field odnumberofitemsdate; - - private Field odpolicies; - - private List> odlanguages; - - private List> odcontenttypes; - - private List> accessinfopackage; - - // re3data fields - private Field releasestartdate; - - private Field releaseenddate; - - private Field missionstatementurl; - - private Field dataprovider; - - private Field serviceprovider; - - // {open, restricted or closed} - private Field databaseaccesstype; - - // {open, restricted or closed} - private Field datauploadtype; - - // {feeRequired, registration, other} - private Field databaseaccessrestriction; - - // {feeRequired, registration, other} - private Field datauploadrestriction; - - private Field versioning; - - private Field citationguidelineurl; - - // {yes, no, uknown} - private Field qualitymanagementkind; - - private Field pidsystems; - - private Field certificates; - - private List policies; - - private Journal journal; - - public Qualifier getDatasourcetype() { - return datasourcetype; - } - - public void setDatasourcetype(Qualifier datasourcetype) { - this.datasourcetype = datasourcetype; - } - - public Qualifier getOpenairecompatibility() { - return openairecompatibility; - } - - public void setOpenairecompatibility(Qualifier openairecompatibility) { - this.openairecompatibility = openairecompatibility; - } - - public Field getOfficialname() { - return officialname; - } - - public void setOfficialname(Field officialname) { - this.officialname = officialname; - } - - public Field getEnglishname() { - return englishname; - } - - public void setEnglishname(Field englishname) { - this.englishname = englishname; - } - - public Field getWebsiteurl() { - return websiteurl; - } - - public void setWebsiteurl(Field websiteurl) { - this.websiteurl = websiteurl; - } - - public Field getLogourl() { - return logourl; - } - - public void setLogourl(Field logourl) { - this.logourl = logourl; - } - - public Field getContactemail() { - return contactemail; - } - - public void setContactemail(Field contactemail) { - this.contactemail = contactemail; - } - - public Field getNamespaceprefix() { - return namespaceprefix; - } - - public void setNamespaceprefix(Field namespaceprefix) { - this.namespaceprefix = namespaceprefix; - } - - public Field getLatitude() { - return latitude; - } - - public void setLatitude(Field latitude) { - this.latitude = latitude; - } - - public Field getLongitude() { - return longitude; - } - - public void setLongitude(Field longitude) { - this.longitude = longitude; - } - - public Field getDateofvalidation() { - return dateofvalidation; - } - - public void setDateofvalidation(Field dateofvalidation) { - this.dateofvalidation = dateofvalidation; - } - - public Field getDescription() { - return description; - } - - public void setDescription(Field description) { - this.description = description; - } - - public List getSubjects() { - return subjects; - } - - public void setSubjects(List subjects) { - this.subjects = subjects; - } - - public Field getOdnumberofitems() { - return odnumberofitems; - } - - public void setOdnumberofitems(Field odnumberofitems) { - this.odnumberofitems = odnumberofitems; - } - - public Field getOdnumberofitemsdate() { - return odnumberofitemsdate; - } - - public void setOdnumberofitemsdate(Field odnumberofitemsdate) { - this.odnumberofitemsdate = odnumberofitemsdate; - } - - public Field getOdpolicies() { - return odpolicies; - } - - public void setOdpolicies(Field odpolicies) { - this.odpolicies = odpolicies; - } - - public List> getOdlanguages() { - return odlanguages; - } - - public void setOdlanguages(List> odlanguages) { - this.odlanguages = odlanguages; - } - - public List> getOdcontenttypes() { - return odcontenttypes; - } - - public void setOdcontenttypes(List> odcontenttypes) { - this.odcontenttypes = odcontenttypes; - } - - public List> getAccessinfopackage() { - return accessinfopackage; - } - - public void setAccessinfopackage(List> accessinfopackage) { - this.accessinfopackage = accessinfopackage; - } - - public Field getReleasestartdate() { - return releasestartdate; - } - - public void setReleasestartdate(Field releasestartdate) { - this.releasestartdate = releasestartdate; - } - - public Field getReleaseenddate() { - return releaseenddate; - } - - public void setReleaseenddate(Field releaseenddate) { - this.releaseenddate = releaseenddate; - } - - public Field getMissionstatementurl() { - return missionstatementurl; - } - - public void setMissionstatementurl(Field missionstatementurl) { - this.missionstatementurl = missionstatementurl; - } - - public Field getDataprovider() { - return dataprovider; - } - - public void setDataprovider(Field dataprovider) { - this.dataprovider = dataprovider; - } - - public Field getServiceprovider() { - return serviceprovider; - } - - public void setServiceprovider(Field serviceprovider) { - this.serviceprovider = serviceprovider; - } - - public Field getDatabaseaccesstype() { - return databaseaccesstype; - } - - public void setDatabaseaccesstype(Field databaseaccesstype) { - this.databaseaccesstype = databaseaccesstype; - } - - public Field getDatauploadtype() { - return datauploadtype; - } - - public void setDatauploadtype(Field datauploadtype) { - this.datauploadtype = datauploadtype; - } - - public Field getDatabaseaccessrestriction() { - return databaseaccessrestriction; - } - - public void setDatabaseaccessrestriction(Field databaseaccessrestriction) { - this.databaseaccessrestriction = databaseaccessrestriction; - } - - public Field getDatauploadrestriction() { - return datauploadrestriction; - } - - public void setDatauploadrestriction(Field datauploadrestriction) { - this.datauploadrestriction = datauploadrestriction; - } - - public Field getVersioning() { - return versioning; - } - - public void setVersioning(Field versioning) { - this.versioning = versioning; - } - - public Field getCitationguidelineurl() { - return citationguidelineurl; - } - - public void setCitationguidelineurl(Field citationguidelineurl) { - this.citationguidelineurl = citationguidelineurl; - } - - public Field getQualitymanagementkind() { - return qualitymanagementkind; - } - - public void setQualitymanagementkind(Field qualitymanagementkind) { - this.qualitymanagementkind = qualitymanagementkind; - } - - public Field getPidsystems() { - return pidsystems; - } - - public void setPidsystems(Field pidsystems) { - this.pidsystems = pidsystems; - } - - public Field getCertificates() { - return certificates; - } - - public void setCertificates(Field certificates) { - this.certificates = certificates; - } - - public List getPolicies() { - return policies; - } - - public void setPolicies(List policies) { - this.policies = policies; - } - - public Journal getJournal() { - return journal; - } - - public void setJournal(Journal journal) { - this.journal = journal; - } - - @Override - public void mergeFrom(OafEntity e) { - super.mergeFrom(e); - - if (!Datasource.class.isAssignableFrom(e.getClass())) { - return; - } - - Datasource d = (Datasource) e; - - datasourcetype = d.getDatasourcetype() != null && compareTrust(this, e) < 0 - ? d.getDatasourcetype() - : datasourcetype; - openairecompatibility = d.getOpenairecompatibility() != null && compareTrust(this, e) < 0 - ? d.getOpenairecompatibility() - : openairecompatibility; - officialname = d.getOfficialname() != null && compareTrust(this, e) < 0 - ? d.getOfficialname() - : officialname; - englishname = d.getEnglishname() != null && compareTrust(this, e) < 0 ? d.getEnglishname() : officialname; - websiteurl = d.getWebsiteurl() != null && compareTrust(this, e) < 0 ? d.getWebsiteurl() : websiteurl; - logourl = d.getLogourl() != null && compareTrust(this, e) < 0 ? d.getLogourl() : getLogourl(); - contactemail = d.getContactemail() != null && compareTrust(this, e) < 0 - ? d.getContactemail() - : contactemail; - namespaceprefix = d.getNamespaceprefix() != null && compareTrust(this, e) < 0 - ? d.getNamespaceprefix() - : namespaceprefix; - latitude = d.getLatitude() != null && compareTrust(this, e) < 0 ? d.getLatitude() : latitude; - longitude = d.getLongitude() != null && compareTrust(this, e) < 0 ? d.getLongitude() : longitude; - dateofvalidation = d.getDateofvalidation() != null && compareTrust(this, e) < 0 - ? d.getDateofvalidation() - : dateofvalidation; - description = d.getDescription() != null && compareTrust(this, e) < 0 ? d.getDescription() : description; - subjects = mergeLists(subjects, d.getSubjects()); - - // opendoar specific fields (od*) - odnumberofitems = d.getOdnumberofitems() != null && compareTrust(this, e) < 0 - ? d.getOdnumberofitems() - : odnumberofitems; - odnumberofitemsdate = d.getOdnumberofitemsdate() != null && compareTrust(this, e) < 0 - ? d.getOdnumberofitemsdate() - : odnumberofitemsdate; - odpolicies = d.getOdpolicies() != null && compareTrust(this, e) < 0 ? d.getOdpolicies() : odpolicies; - odlanguages = mergeLists(odlanguages, d.getOdlanguages()); - odcontenttypes = mergeLists(odcontenttypes, d.getOdcontenttypes()); - accessinfopackage = mergeLists(accessinfopackage, d.getAccessinfopackage()); - - // re3data fields - releasestartdate = d.getReleasestartdate() != null && compareTrust(this, e) < 0 - ? d.getReleasestartdate() - : releasestartdate; - releaseenddate = d.getReleaseenddate() != null && compareTrust(this, e) < 0 - ? d.getReleaseenddate() - : releaseenddate; - missionstatementurl = d.getMissionstatementurl() != null && compareTrust(this, e) < 0 - ? d.getMissionstatementurl() - : missionstatementurl; - dataprovider = d.getDataprovider() != null && compareTrust(this, e) < 0 - ? d.getDataprovider() - : dataprovider; - serviceprovider = d.getServiceprovider() != null && compareTrust(this, e) < 0 - ? d.getServiceprovider() - : serviceprovider; - - // {open, restricted or closed} - databaseaccesstype = d.getDatabaseaccesstype() != null && compareTrust(this, e) < 0 - ? d.getDatabaseaccesstype() - : databaseaccesstype; - - // {open, restricted or closed} - datauploadtype = d.getDatauploadtype() != null && compareTrust(this, e) < 0 - ? d.getDatauploadtype() - : datauploadtype; - - // {feeRequired, registration, other} - databaseaccessrestriction = d.getDatabaseaccessrestriction() != null && compareTrust(this, e) < 0 - ? d.getDatabaseaccessrestriction() - : databaseaccessrestriction; - - // {feeRequired, registration, other} - datauploadrestriction = d.getDatauploadrestriction() != null && compareTrust(this, e) < 0 - ? d.getDatauploadrestriction() - : datauploadrestriction; - - versioning = d.getVersioning() != null && compareTrust(this, e) < 0 ? d.getVersioning() : versioning; - citationguidelineurl = d.getCitationguidelineurl() != null && compareTrust(this, e) < 0 - ? d.getCitationguidelineurl() - : citationguidelineurl; - - // {yes, no, unknown} - qualitymanagementkind = d.getQualitymanagementkind() != null && compareTrust(this, e) < 0 - ? d.getQualitymanagementkind() - : qualitymanagementkind; - pidsystems = d.getPidsystems() != null && compareTrust(this, e) < 0 ? d.getPidsystems() : pidsystems; - - certificates = d.getCertificates() != null && compareTrust(this, e) < 0 - ? d.getCertificates() - : certificates; - - policies = mergeLists(policies, d.getPolicies()); - - journal = d.getJournal() != null && compareTrust(this, e) < 0 ? d.getJournal() : journal; - - mergeOAFDataInfo(e); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/ExternalReference.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/ExternalReference.java deleted file mode 100644 index d509b954e..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/ExternalReference.java +++ /dev/null @@ -1,119 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.Objects; - -public class ExternalReference implements Serializable { - // source - private String sitename; - - // title - private String label; - - // text() - private String url; - - // ?? not mapped yet ?? - private String description; - - // type - private Qualifier qualifier; - - // site internal identifier - private String refidentifier; - - // maps the oaf:reference/@query attribute - private String query; - - // ExternalReferences might be also inferred - private DataInfo dataInfo; - - public String getSitename() { - return sitename; - } - - public void setSitename(String sitename) { - this.sitename = sitename; - } - - public String getLabel() { - return label; - } - - public void setLabel(String label) { - this.label = label; - } - - public String getUrl() { - return url; - } - - public void setUrl(String url) { - this.url = url; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } - - public Qualifier getQualifier() { - return qualifier; - } - - public void setQualifier(Qualifier qualifier) { - this.qualifier = qualifier; - } - - public String getRefidentifier() { - return refidentifier; - } - - public void setRefidentifier(String refidentifier) { - this.refidentifier = refidentifier; - } - - public String getQuery() { - return query; - } - - public void setQuery(String query) { - this.query = query; - } - - public DataInfo getDataInfo() { - return dataInfo; - } - - public void setDataInfo(DataInfo dataInfo) { - this.dataInfo = dataInfo; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - ExternalReference that = (ExternalReference) o; - return Objects.equals(sitename, that.sitename) - && Objects.equals(label, that.label) - && Objects.equals(url, that.url) - && Objects.equals(description, that.description) - && Objects.equals(qualifier, that.qualifier) - && Objects.equals(refidentifier, that.refidentifier) - && Objects.equals(query, that.query) - && Objects.equals(dataInfo, that.dataInfo); - } - - @Override - public int hashCode() { - return Objects - .hash( - sitename, label, url, description, qualifier, refidentifier, query, dataInfo); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/ExtraInfo.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/ExtraInfo.java deleted file mode 100644 index 3682cc2aa..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/ExtraInfo.java +++ /dev/null @@ -1,77 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.Objects; - -public class ExtraInfo implements Serializable { - private String name; - - private String typology; - - private String provenance; - - private String trust; - - // json containing a Citation or Statistics - private String value; - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getTypology() { - return typology; - } - - public void setTypology(String typology) { - this.typology = typology; - } - - public String getProvenance() { - return provenance; - } - - public void setProvenance(String provenance) { - this.provenance = provenance; - } - - public String getTrust() { - return trust; - } - - public void setTrust(String trust) { - this.trust = trust; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - ExtraInfo extraInfo = (ExtraInfo) o; - return Objects.equals(name, extraInfo.name) - && Objects.equals(typology, extraInfo.typology) - && Objects.equals(provenance, extraInfo.provenance) - && Objects.equals(trust, extraInfo.trust) - && Objects.equals(value, extraInfo.value); - } - - @Override - public int hashCode() { - return Objects.hash(name, typology, provenance, trust, value); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java deleted file mode 100644 index 8358bc4b3..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Field.java +++ /dev/null @@ -1,45 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.Objects; - -public class Field implements Serializable { - - private T value; - - private DataInfo dataInfo; - - public T getValue() { - return value; - } - - public void setValue(T value) { - this.value = value; - } - - public DataInfo getDataInfo() { - return dataInfo; - } - - public void setDataInfo(DataInfo dataInfo) { - this.dataInfo = dataInfo; - } - - @Override - public int hashCode() { - return getValue() == null ? 0 : getValue().hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - Field other = (Field) obj; - return Objects.equals(getValue(), other.getValue()); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/GeoLocation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/GeoLocation.java deleted file mode 100644 index 7ed313a59..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/GeoLocation.java +++ /dev/null @@ -1,76 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; - -import org.apache.commons.lang3.StringUtils; - -import com.fasterxml.jackson.annotation.JsonIgnore; - -public class GeoLocation implements Serializable { - - private String point; - - private String box; - - private String place; - - public String getPoint() { - return point; - } - - public void setPoint(String point) { - this.point = point; - } - - public String getBox() { - return box; - } - - public void setBox(String box) { - this.box = box; - } - - public String getPlace() { - return place; - } - - public void setPlace(String place) { - this.place = place; - } - - @JsonIgnore - public boolean isBlank() { - return StringUtils.isBlank(point) && StringUtils.isBlank(box) && StringUtils.isBlank(place); - } - - public String toComparableString() { - return isBlank() - ? "" - : String - .format( - "%s::%s%s", - point != null ? point.toLowerCase() : "", - box != null ? box.toLowerCase() : "", - place != null ? place.toLowerCase() : ""); - } - - @Override - public int hashCode() { - return toComparableString().hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - - GeoLocation other = (GeoLocation) obj; - - return toComparableString().equals(other.toComparableString()); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020Classification.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020Classification.java deleted file mode 100644 index 219bdc00d..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020Classification.java +++ /dev/null @@ -1,88 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.Objects; - -/** - * To store information about the classification for the project. The classification depends on the programme. For example - * H2020-EU.3.4.5.3 can be classified as - * H2020-EU.3. => Societal Challenges (level1) - * H2020-EU.3.4. => Transport (level2) - * H2020-EU.3.4.5. => CLEANSKY2 (level3) - * H2020-EU.3.4.5.3. => IADP Fast Rotorcraft (level4) - * - * We decided to explicitly represent up to three levels in the classification. - * - * H2020Classification has the following parameters: - * - private Programme programme to store the information about the programme related to this classification - * - private String level1 to store the information about the level 1 of the classification (Priority or Pillar of the EC) - * - private String level2 to store the information about the level2 af the classification (Objectives (?)) - * - private String level3 to store the information about the level3 of the classification - * - private String classification to store the entire classification related to the programme - */ - -public class H2020Classification implements Serializable { - private H2020Programme h2020Programme; - private String level1; - private String level2; - private String level3; - - private String classification; - - public H2020Programme getH2020Programme() { - return h2020Programme; - } - - public void setH2020Programme(H2020Programme h2020Programme) { - this.h2020Programme = h2020Programme; - } - - public String getLevel1() { - return level1; - } - - public void setLevel1(String level1) { - this.level1 = level1; - } - - public String getLevel2() { - return level2; - } - - public void setLevel2(String level2) { - this.level2 = level2; - } - - public String getLevel3() { - return level3; - } - - public void setLevel3(String level3) { - this.level3 = level3; - } - - public String getClassification() { - return classification; - } - - public void setClassification(String classification) { - this.classification = classification; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - - H2020Classification h2020classification = (H2020Classification) o; - - return Objects.equals(level1, h2020classification.level1) && - Objects.equals(level2, h2020classification.level2) && - Objects.equals(level3, h2020classification.level3) && - Objects.equals(classification, h2020classification.classification) && - h2020Programme.equals(h2020classification.h2020Programme); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020Programme.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020Programme.java deleted file mode 100644 index 101d46d35..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020Programme.java +++ /dev/null @@ -1,44 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.Objects; - -/** - * To store information about the ec programme for the project. It has the following parameters: - * - private String code to store the code of the programme - * - private String description to store the description of the programme - */ - -public class H2020Programme implements Serializable { - private String code; - private String description; - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - - H2020Programme h2020Programme = (H2020Programme) o; - return Objects.equals(code, h2020Programme.code); - } - -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Instance.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Instance.java deleted file mode 100644 index 29d495261..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Instance.java +++ /dev/null @@ -1,152 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.List; - -public class Instance implements Serializable { - - private Field license; - - private Qualifier accessright; - - private Qualifier instancetype; - - private KeyValue hostedby; - - private List url; - - // other research products specifc - private String distributionlocation; - - private KeyValue collectedfrom; - - private Field dateofacceptance; - - // ( article | book ) processing charges. Defined here to cope with possible wrongly typed - // results - private Field processingchargeamount; - - // currency - alphabetic code describe in ISO-4217. Defined here to cope with possible wrongly - // typed results - private Field processingchargecurrency; - - private Qualifier refereed; // peer-review status - - public Field getLicense() { - return license; - } - - public void setLicense(Field license) { - this.license = license; - } - - public Qualifier getAccessright() { - return accessright; - } - - public void setAccessright(Qualifier accessright) { - this.accessright = accessright; - } - - public Qualifier getInstancetype() { - return instancetype; - } - - public void setInstancetype(Qualifier instancetype) { - this.instancetype = instancetype; - } - - public KeyValue getHostedby() { - return hostedby; - } - - public void setHostedby(KeyValue hostedby) { - this.hostedby = hostedby; - } - - public List getUrl() { - return url; - } - - public void setUrl(List url) { - this.url = url; - } - - public String getDistributionlocation() { - return distributionlocation; - } - - public void setDistributionlocation(String distributionlocation) { - this.distributionlocation = distributionlocation; - } - - public KeyValue getCollectedfrom() { - return collectedfrom; - } - - public void setCollectedfrom(KeyValue collectedfrom) { - this.collectedfrom = collectedfrom; - } - - public Field getDateofacceptance() { - return dateofacceptance; - } - - public void setDateofacceptance(Field dateofacceptance) { - this.dateofacceptance = dateofacceptance; - } - - public Field getProcessingchargeamount() { - return processingchargeamount; - } - - public void setProcessingchargeamount(Field processingchargeamount) { - this.processingchargeamount = processingchargeamount; - } - - public Field getProcessingchargecurrency() { - return processingchargecurrency; - } - - public void setProcessingchargecurrency(Field processingchargecurrency) { - this.processingchargecurrency = processingchargecurrency; - } - - public Qualifier getRefereed() { - return refereed; - } - - public void setRefereed(Qualifier refereed) { - this.refereed = refereed; - } - - public String toComparableString() { - return String - .format( - "%s::%s::%s::%s", - hostedby != null && hostedby.getKey() != null ? hostedby.getKey().toLowerCase() : "", - accessright != null && accessright.getClassid() != null ? accessright.getClassid() : "", - instancetype != null && instancetype.getClassid() != null ? instancetype.getClassid() : "", - url != null ? url : ""); - } - - @Override - public int hashCode() { - return toComparableString().hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - - Instance other = (Instance) obj; - - return toComparableString().equals(other.toComparableString()); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Journal.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Journal.java deleted file mode 100644 index 7a375e28b..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Journal.java +++ /dev/null @@ -1,167 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.Objects; - -public class Journal implements Serializable { - - private String name; - - private String issnPrinted; - - private String issnOnline; - - private String issnLinking; - - private String ep; - - private String iss; - - private String sp; - - private String vol; - - private String edition; - - private String conferenceplace; - - private String conferencedate; - - private DataInfo dataInfo; - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getIssnPrinted() { - return issnPrinted; - } - - public void setIssnPrinted(String issnPrinted) { - this.issnPrinted = issnPrinted; - } - - public String getIssnOnline() { - return issnOnline; - } - - public void setIssnOnline(String issnOnline) { - this.issnOnline = issnOnline; - } - - public String getIssnLinking() { - return issnLinking; - } - - public void setIssnLinking(String issnLinking) { - this.issnLinking = issnLinking; - } - - public String getEp() { - return ep; - } - - public void setEp(String ep) { - this.ep = ep; - } - - public String getIss() { - return iss; - } - - public void setIss(String iss) { - this.iss = iss; - } - - public String getSp() { - return sp; - } - - public void setSp(String sp) { - this.sp = sp; - } - - public String getVol() { - return vol; - } - - public void setVol(String vol) { - this.vol = vol; - } - - public String getEdition() { - return edition; - } - - public void setEdition(String edition) { - this.edition = edition; - } - - public String getConferenceplace() { - return conferenceplace; - } - - public void setConferenceplace(String conferenceplace) { - this.conferenceplace = conferenceplace; - } - - public String getConferencedate() { - return conferencedate; - } - - public void setConferencedate(String conferencedate) { - this.conferencedate = conferencedate; - } - - public DataInfo getDataInfo() { - return dataInfo; - } - - public void setDataInfo(DataInfo dataInfo) { - this.dataInfo = dataInfo; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - Journal journal = (Journal) o; - return Objects.equals(name, journal.name) - && Objects.equals(issnPrinted, journal.issnPrinted) - && Objects.equals(issnOnline, journal.issnOnline) - && Objects.equals(issnLinking, journal.issnLinking) - && Objects.equals(ep, journal.ep) - && Objects.equals(iss, journal.iss) - && Objects.equals(sp, journal.sp) - && Objects.equals(vol, journal.vol) - && Objects.equals(edition, journal.edition) - && Objects.equals(conferenceplace, journal.conferenceplace) - && Objects.equals(conferencedate, journal.conferencedate) - && Objects.equals(dataInfo, journal.dataInfo); - } - - @Override - public int hashCode() { - return Objects - .hash( - name, - issnPrinted, - issnOnline, - issnLinking, - ep, - iss, - sp, - vol, - edition, - conferenceplace, - conferencedate, - dataInfo); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/KeyValue.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/KeyValue.java deleted file mode 100644 index 4e2d60138..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/KeyValue.java +++ /dev/null @@ -1,74 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; - -import org.apache.commons.lang3.StringUtils; - -import com.fasterxml.jackson.annotation.JsonIgnore; - -public class KeyValue implements Serializable { - - private String key; - - private String value; - - private DataInfo dataInfo; - - public String getKey() { - return key; - } - - public void setKey(String key) { - this.key = key; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } - - public DataInfo getDataInfo() { - return dataInfo; - } - - public void setDataInfo(DataInfo dataInfo) { - this.dataInfo = dataInfo; - } - - public String toComparableString() { - return isBlank() - ? "" - : String - .format( - "%s::%s", - key != null ? key.toLowerCase() : "", value != null ? value.toLowerCase() : ""); - } - - @JsonIgnore - public boolean isBlank() { - return StringUtils.isBlank(key) && StringUtils.isBlank(value); - } - - @Override - public int hashCode() { - return toComparableString().hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - - KeyValue other = (KeyValue) obj; - - return toComparableString().equals(other.toComparableString()); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Measure.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Measure.java deleted file mode 100644 index c0c14d10d..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Measure.java +++ /dev/null @@ -1,59 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.util.List; - -import com.google.common.base.Objects; - -/** - * Represent a measure, must be further described by a system available resource providing name and descriptions. - */ -public class Measure { - - /** - * Unique measure identifier. - */ - private String id; - - /** - * List of units associated with this measure. KeyValue provides a pair to store the laber (key) and the value, plus - * common provenance information. - */ - private List unit; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public List getUnit() { - return unit; - } - - public void setUnit(List unit) { - this.unit = unit; - } - - public void mergeFrom(Measure m) { - // TODO - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - Measure measure = (Measure) o; - return Objects.equal(id, measure.id) && - Objects.equal(unit, measure.unit); - } - - @Override - public int hashCode() { - return Objects.hashCode(id, unit); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OAIProvenance.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OAIProvenance.java deleted file mode 100644 index 88d74afbf..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OAIProvenance.java +++ /dev/null @@ -1,33 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.Objects; - -public class OAIProvenance implements Serializable { - - private OriginDescription originDescription; - - public OriginDescription getOriginDescription() { - return originDescription; - } - - public void setOriginDescription(OriginDescription originDescription) { - this.originDescription = originDescription; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - OAIProvenance that = (OAIProvenance) o; - return Objects.equals(originDescription, that.originDescription); - } - - @Override - public int hashCode() { - return Objects.hash(originDescription); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java deleted file mode 100644 index 494123fdf..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Oaf.java +++ /dev/null @@ -1,102 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.Collection; -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -public abstract class Oaf implements Serializable { - - /** - * The list of datasource id/name pairs providing this relationship. - */ - protected List collectedfrom; - - private DataInfo dataInfo; - - private Long lastupdatetimestamp; - - public List getCollectedfrom() { - return collectedfrom; - } - - public void setCollectedfrom(List collectedfrom) { - this.collectedfrom = collectedfrom; - } - - public DataInfo getDataInfo() { - return dataInfo; - } - - public void setDataInfo(DataInfo dataInfo) { - this.dataInfo = dataInfo; - } - - public Long getLastupdatetimestamp() { - return lastupdatetimestamp; - } - - public void setLastupdatetimestamp(Long lastupdatetimestamp) { - this.lastupdatetimestamp = lastupdatetimestamp; - } - - public void mergeFrom(Oaf o) { - if (Objects.isNull(o)) { - return; - } - setCollectedfrom( - Stream - .concat( - Optional - .ofNullable(getCollectedfrom()) - .map(Collection::stream) - .orElse(Stream.empty()), - Optional - .ofNullable(o.getCollectedfrom()) - .map(Collection::stream) - .orElse(Stream.empty())) - .distinct() // relies on KeyValue.equals - .collect(Collectors.toList())); - - setLastupdatetimestamp( - Math - .max( - Optional.ofNullable(getLastupdatetimestamp()).orElse(0L), - Optional.ofNullable(o.getLastupdatetimestamp()).orElse(0L))); - } - - public void mergeOAFDataInfo(Oaf o) { - if (o.getDataInfo() != null && compareTrust(this, o) < 0) - dataInfo = o.getDataInfo(); - } - - protected String extractTrust(Oaf e) { - if (e == null || e.getDataInfo() == null || e.getDataInfo().getTrust() == null) - return "0.0"; - return e.getDataInfo().getTrust(); - } - - protected int compareTrust(Oaf a, Oaf b) { - return extractTrust(a).compareTo(extractTrust(b)); - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - Oaf oaf = (Oaf) o; - return Objects.equals(getDataInfo(), oaf.getDataInfo()) - && Objects.equals(lastupdatetimestamp, oaf.lastupdatetimestamp); - } - - @Override - public int hashCode() { - return Objects.hash(dataInfo, lastupdatetimestamp); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java deleted file mode 100644 index 17c3e6bdd..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OafEntity.java +++ /dev/null @@ -1,126 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.*; -import java.util.stream.Collectors; - -public abstract class OafEntity extends Oaf implements Serializable { - - private String id; - - private List originalId; - - private List pid; - - private String dateofcollection; - - private String dateoftransformation; - - private List extraInfo; - - private OAIProvenance oaiprovenance; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public List getOriginalId() { - return originalId; - } - - public void setOriginalId(List originalId) { - this.originalId = originalId; - } - - public List getPid() { - return pid; - } - - public void setPid(List pid) { - this.pid = pid; - } - - public String getDateofcollection() { - return dateofcollection; - } - - public void setDateofcollection(String dateofcollection) { - this.dateofcollection = dateofcollection; - } - - public String getDateoftransformation() { - return dateoftransformation; - } - - public void setDateoftransformation(String dateoftransformation) { - this.dateoftransformation = dateoftransformation; - } - - public List getExtraInfo() { - return extraInfo; - } - - public void setExtraInfo(List extraInfo) { - this.extraInfo = extraInfo; - } - - public OAIProvenance getOaiprovenance() { - return oaiprovenance; - } - - public void setOaiprovenance(OAIProvenance oaiprovenance) { - this.oaiprovenance = oaiprovenance; - } - - public void mergeFrom(OafEntity e) { - super.mergeFrom(e); - - originalId = mergeLists(originalId, e.getOriginalId()); - - pid = mergeLists(pid, e.getPid()); - - if (e.getDateofcollection() != null && compareTrust(this, e) < 0) - dateofcollection = e.getDateofcollection(); - - if (e.getDateoftransformation() != null && compareTrust(this, e) < 0) - dateoftransformation = e.getDateoftransformation(); - - extraInfo = mergeLists(extraInfo, e.getExtraInfo()); - - if (e.getOaiprovenance() != null && compareTrust(this, e) < 0) - oaiprovenance = e.getOaiprovenance(); - } - - protected List mergeLists(final List... lists) { - - return Arrays - .stream(lists) - .filter(Objects::nonNull) - .flatMap(List::stream) - .filter(Objects::nonNull) - .distinct() - .collect(Collectors.toList()); - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - if (!super.equals(o)) - return false; - OafEntity oafEntity = (OafEntity) o; - return Objects.equals(id, oafEntity.id); - } - - @Override - public int hashCode() { - return Objects.hash(super.hashCode(), id); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Organization.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Organization.java deleted file mode 100644 index a5f9bce30..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Organization.java +++ /dev/null @@ -1,214 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.List; - -public class Organization extends OafEntity implements Serializable { - - private Field legalshortname; - - private Field legalname; - - private List> alternativeNames; - - private Field websiteurl; - - private Field logourl; - - private Field eclegalbody; - - private Field eclegalperson; - - private Field ecnonprofit; - - private Field ecresearchorganization; - - private Field echighereducation; - - private Field ecinternationalorganizationeurinterests; - - private Field ecinternationalorganization; - - private Field ecenterprise; - - private Field ecsmevalidated; - - private Field ecnutscode; - - private Qualifier country; - - public Field getLegalshortname() { - return legalshortname; - } - - public void setLegalshortname(Field legalshortname) { - this.legalshortname = legalshortname; - } - - public Field getLegalname() { - return legalname; - } - - public void setLegalname(Field legalname) { - this.legalname = legalname; - } - - public List> getAlternativeNames() { - return alternativeNames; - } - - public void setAlternativeNames(List> alternativeNames) { - this.alternativeNames = alternativeNames; - } - - public Field getWebsiteurl() { - return websiteurl; - } - - public void setWebsiteurl(Field websiteurl) { - this.websiteurl = websiteurl; - } - - public Field getLogourl() { - return logourl; - } - - public void setLogourl(Field logourl) { - this.logourl = logourl; - } - - public Field getEclegalbody() { - return eclegalbody; - } - - public void setEclegalbody(Field eclegalbody) { - this.eclegalbody = eclegalbody; - } - - public Field getEclegalperson() { - return eclegalperson; - } - - public void setEclegalperson(Field eclegalperson) { - this.eclegalperson = eclegalperson; - } - - public Field getEcnonprofit() { - return ecnonprofit; - } - - public void setEcnonprofit(Field ecnonprofit) { - this.ecnonprofit = ecnonprofit; - } - - public Field getEcresearchorganization() { - return ecresearchorganization; - } - - public void setEcresearchorganization(Field ecresearchorganization) { - this.ecresearchorganization = ecresearchorganization; - } - - public Field getEchighereducation() { - return echighereducation; - } - - public void setEchighereducation(Field echighereducation) { - this.echighereducation = echighereducation; - } - - public Field getEcinternationalorganizationeurinterests() { - return ecinternationalorganizationeurinterests; - } - - public void setEcinternationalorganizationeurinterests( - Field ecinternationalorganizationeurinterests) { - this.ecinternationalorganizationeurinterests = ecinternationalorganizationeurinterests; - } - - public Field getEcinternationalorganization() { - return ecinternationalorganization; - } - - public void setEcinternationalorganization(Field ecinternationalorganization) { - this.ecinternationalorganization = ecinternationalorganization; - } - - public Field getEcenterprise() { - return ecenterprise; - } - - public void setEcenterprise(Field ecenterprise) { - this.ecenterprise = ecenterprise; - } - - public Field getEcsmevalidated() { - return ecsmevalidated; - } - - public void setEcsmevalidated(Field ecsmevalidated) { - this.ecsmevalidated = ecsmevalidated; - } - - public Field getEcnutscode() { - return ecnutscode; - } - - public void setEcnutscode(Field ecnutscode) { - this.ecnutscode = ecnutscode; - } - - public Qualifier getCountry() { - return country; - } - - public void setCountry(Qualifier country) { - this.country = country; - } - - @Override - public void mergeFrom(OafEntity e) { - super.mergeFrom(e); - - if (!Organization.class.isAssignableFrom(e.getClass())) { - return; - } - - final Organization o = (Organization) e; - legalshortname = o.getLegalshortname() != null && compareTrust(this, e) < 0 - ? o.getLegalshortname() - : legalshortname; - legalname = o.getLegalname() != null && compareTrust(this, e) < 0 ? o.getLegalname() : legalname; - alternativeNames = mergeLists(o.getAlternativeNames(), alternativeNames); - websiteurl = o.getWebsiteurl() != null && compareTrust(this, e) < 0 ? o.getWebsiteurl() : websiteurl; - logourl = o.getLogourl() != null && compareTrust(this, e) < 0 ? o.getLogourl() : logourl; - eclegalbody = o.getEclegalbody() != null && compareTrust(this, e) < 0 ? o.getEclegalbody() : eclegalbody; - eclegalperson = o.getEclegalperson() != null && compareTrust(this, e) < 0 - ? o.getEclegalperson() - : eclegalperson; - ecnonprofit = o.getEcnonprofit() != null && compareTrust(this, e) < 0 ? o.getEcnonprofit() : ecnonprofit; - ecresearchorganization = o.getEcresearchorganization() != null && compareTrust(this, e) < 0 - ? o.getEcresearchorganization() - : ecresearchorganization; - echighereducation = o.getEchighereducation() != null && compareTrust(this, e) < 0 - ? o.getEchighereducation() - : echighereducation; - ecinternationalorganizationeurinterests = o.getEcinternationalorganizationeurinterests() != null - && compareTrust(this, e) < 0 - ? o.getEcinternationalorganizationeurinterests() - : ecinternationalorganizationeurinterests; - ecinternationalorganization = o.getEcinternationalorganization() != null && compareTrust(this, e) < 0 - ? o.getEcinternationalorganization() - : ecinternationalorganization; - ecenterprise = o.getEcenterprise() != null && compareTrust(this, e) < 0 - ? o.getEcenterprise() - : ecenterprise; - ecsmevalidated = o.getEcsmevalidated() != null && compareTrust(this, e) < 0 - ? o.getEcsmevalidated() - : ecsmevalidated; - ecnutscode = o.getEcnutscode() != null && compareTrust(this, e) < 0 ? o.getEcnutscode() : ecnutscode; - country = o.getCountry() != null && compareTrust(this, e) < 0 ? o.getCountry() : country; - mergeOAFDataInfo(o); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OriginDescription.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OriginDescription.java deleted file mode 100644 index a275fc1a9..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OriginDescription.java +++ /dev/null @@ -1,88 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.Objects; - -public class OriginDescription implements Serializable { - - private String harvestDate; - - private Boolean altered = true; - - private String baseURL; - - private String identifier; - - private String datestamp; - - private String metadataNamespace; - - public String getHarvestDate() { - return harvestDate; - } - - public void setHarvestDate(String harvestDate) { - this.harvestDate = harvestDate; - } - - public Boolean getAltered() { - return altered; - } - - public void setAltered(Boolean altered) { - this.altered = altered; - } - - public String getBaseURL() { - return baseURL; - } - - public void setBaseURL(String baseURL) { - this.baseURL = baseURL; - } - - public String getIdentifier() { - return identifier; - } - - public void setIdentifier(String identifier) { - this.identifier = identifier; - } - - public String getDatestamp() { - return datestamp; - } - - public void setDatestamp(String datestamp) { - this.datestamp = datestamp; - } - - public String getMetadataNamespace() { - return metadataNamespace; - } - - public void setMetadataNamespace(String metadataNamespace) { - this.metadataNamespace = metadataNamespace; - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - OriginDescription that = (OriginDescription) o; - return Objects.equals(harvestDate, that.harvestDate) - && Objects.equals(altered, that.altered) - && Objects.equals(baseURL, that.baseURL) - && Objects.equals(identifier, that.identifier) - && Objects.equals(datestamp, that.datestamp) - && Objects.equals(metadataNamespace, that.metadataNamespace); - } - - @Override - public int hashCode() { - return Objects.hash(harvestDate, altered, baseURL, identifier, datestamp, metadataNamespace); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OtherResearchProduct.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OtherResearchProduct.java deleted file mode 100644 index b04934c23..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/OtherResearchProduct.java +++ /dev/null @@ -1,60 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.List; - -import eu.dnetlib.dhp.schema.common.ModelConstants; - -public class OtherResearchProduct extends Result implements Serializable { - - private List> contactperson; - - private List> contactgroup; - - private List> tool; - - public OtherResearchProduct() { - setResulttype(ModelConstants.ORP_DEFAULT_RESULTTYPE); - } - - public List> getContactperson() { - return contactperson; - } - - public void setContactperson(List> contactperson) { - this.contactperson = contactperson; - } - - public List> getContactgroup() { - return contactgroup; - } - - public void setContactgroup(List> contactgroup) { - this.contactgroup = contactgroup; - } - - public List> getTool() { - return tool; - } - - public void setTool(List> tool) { - this.tool = tool; - } - - @Override - public void mergeFrom(OafEntity e) { - super.mergeFrom(e); - - if (!OtherResearchProduct.class.isAssignableFrom(e.getClass())) { - return; - } - - OtherResearchProduct o = (OtherResearchProduct) e; - - contactperson = mergeLists(contactperson, o.getContactperson()); - contactgroup = mergeLists(contactgroup, o.getContactgroup()); - tool = mergeLists(tool, o.getTool()); - mergeOAFDataInfo(e); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java deleted file mode 100644 index 4be4d5d30..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java +++ /dev/null @@ -1,358 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.List; - -public class Project extends OafEntity implements Serializable { - - private Field websiteurl; - - private Field code; - - private Field acronym; - - private Field title; - - private Field startdate; - - private Field enddate; - - private Field callidentifier; - - private Field keywords; - - private Field duration; - - private Field ecsc39; - - private Field oamandatepublications; - - private Field ecarticle29_3; - - private List subjects; - - private List> fundingtree; - - private Qualifier contracttype; - - private Field optional1; - - private Field optional2; - - private Field jsonextrainfo; - - private Field contactfullname; - - private Field contactfax; - - private Field contactphone; - - private Field contactemail; - - private Field summary; - - private Field currency; - - private Float totalcost; - - private Float fundedamount; - - private String h2020topiccode; - - private String h2020topicdescription; - - private List h2020classification; - - public String getH2020topicdescription() { - return h2020topicdescription; - } - - public void setH2020topicdescription(String h2020topicdescription) { - this.h2020topicdescription = h2020topicdescription; - } - - public String getH2020topiccode() { - return h2020topiccode; - } - - public void setH2020topiccode(String h2020topiccode) { - this.h2020topiccode = h2020topiccode; - } - - public List getH2020classification() { - return h2020classification; - } - - public void setH2020classification(List h2020classification) { - this.h2020classification = h2020classification; - } - - public Field getWebsiteurl() { - return websiteurl; - } - - public void setWebsiteurl(Field websiteurl) { - this.websiteurl = websiteurl; - } - - public Field getCode() { - return code; - } - - public void setCode(Field code) { - this.code = code; - } - - public Field getAcronym() { - return acronym; - } - - public void setAcronym(Field acronym) { - this.acronym = acronym; - } - - public Field getTitle() { - return title; - } - - public void setTitle(Field title) { - this.title = title; - } - - public Field getStartdate() { - return startdate; - } - - public void setStartdate(Field startdate) { - this.startdate = startdate; - } - - public Field getEnddate() { - return enddate; - } - - public void setEnddate(Field enddate) { - this.enddate = enddate; - } - - public Field getCallidentifier() { - return callidentifier; - } - - public void setCallidentifier(Field callidentifier) { - this.callidentifier = callidentifier; - } - - public Field getKeywords() { - return keywords; - } - - public void setKeywords(Field keywords) { - this.keywords = keywords; - } - - public Field getDuration() { - return duration; - } - - public void setDuration(Field duration) { - this.duration = duration; - } - - public Field getEcsc39() { - return ecsc39; - } - - public void setEcsc39(Field ecsc39) { - this.ecsc39 = ecsc39; - } - - public Field getOamandatepublications() { - return oamandatepublications; - } - - public void setOamandatepublications(Field oamandatepublications) { - this.oamandatepublications = oamandatepublications; - } - - public Field getEcarticle29_3() { - return ecarticle29_3; - } - - public void setEcarticle29_3(Field ecarticle29_3) { - this.ecarticle29_3 = ecarticle29_3; - } - - public List getSubjects() { - return subjects; - } - - public void setSubjects(List subjects) { - this.subjects = subjects; - } - - public List> getFundingtree() { - return fundingtree; - } - - public void setFundingtree(List> fundingtree) { - this.fundingtree = fundingtree; - } - - public Qualifier getContracttype() { - return contracttype; - } - - public void setContracttype(Qualifier contracttype) { - this.contracttype = contracttype; - } - - public Field getOptional1() { - return optional1; - } - - public void setOptional1(Field optional1) { - this.optional1 = optional1; - } - - public Field getOptional2() { - return optional2; - } - - public void setOptional2(Field optional2) { - this.optional2 = optional2; - } - - public Field getJsonextrainfo() { - return jsonextrainfo; - } - - public void setJsonextrainfo(Field jsonextrainfo) { - this.jsonextrainfo = jsonextrainfo; - } - - public Field getContactfullname() { - return contactfullname; - } - - public void setContactfullname(Field contactfullname) { - this.contactfullname = contactfullname; - } - - public Field getContactfax() { - return contactfax; - } - - public void setContactfax(Field contactfax) { - this.contactfax = contactfax; - } - - public Field getContactphone() { - return contactphone; - } - - public void setContactphone(Field contactphone) { - this.contactphone = contactphone; - } - - public Field getContactemail() { - return contactemail; - } - - public void setContactemail(Field contactemail) { - this.contactemail = contactemail; - } - - public Field getSummary() { - return summary; - } - - public void setSummary(Field summary) { - this.summary = summary; - } - - public Field getCurrency() { - return currency; - } - - public void setCurrency(Field currency) { - this.currency = currency; - } - - public Float getTotalcost() { - return totalcost; - } - - public void setTotalcost(Float totalcost) { - this.totalcost = totalcost; - } - - public Float getFundedamount() { - return fundedamount; - } - - public void setFundedamount(Float fundedamount) { - this.fundedamount = fundedamount; - } - - @Override - public void mergeFrom(OafEntity e) { - super.mergeFrom(e); - - if (!Project.class.isAssignableFrom(e.getClass())) { - return; - } - - Project p = (Project) e; - - websiteurl = p.getWebsiteurl() != null && compareTrust(this, e) < 0 ? p.getWebsiteurl() : websiteurl; - code = p.getCode() != null && compareTrust(this, e) < 0 ? p.getCode() : code; - acronym = p.getAcronym() != null && compareTrust(this, e) < 0 ? p.getAcronym() : acronym; - title = p.getTitle() != null && compareTrust(this, e) < 0 ? p.getTitle() : title; - startdate = p.getStartdate() != null && compareTrust(this, e) < 0 ? p.getStartdate() : startdate; - enddate = p.getEnddate() != null && compareTrust(this, e) < 0 ? p.getEnddate() : enddate; - callidentifier = p.getCallidentifier() != null && compareTrust(this, e) < 0 - ? p.getCallidentifier() - : callidentifier; - keywords = p.getKeywords() != null && compareTrust(this, e) < 0 ? p.getKeywords() : keywords; - duration = p.getDuration() != null && compareTrust(this, e) < 0 ? p.getDuration() : duration; - ecsc39 = p.getEcsc39() != null && compareTrust(this, e) < 0 ? p.getEcsc39() : ecsc39; - oamandatepublications = p.getOamandatepublications() != null && compareTrust(this, e) < 0 - ? p.getOamandatepublications() - : oamandatepublications; - ecarticle29_3 = p.getEcarticle29_3() != null && compareTrust(this, e) < 0 - ? p.getEcarticle29_3() - : ecarticle29_3; - subjects = mergeLists(subjects, p.getSubjects()); - fundingtree = mergeLists(fundingtree, p.getFundingtree()); - contracttype = p.getContracttype() != null && compareTrust(this, e) < 0 - ? p.getContracttype() - : contracttype; - optional1 = p.getOptional1() != null && compareTrust(this, e) < 0 ? p.getOptional1() : optional1; - optional2 = p.getOptional2() != null && compareTrust(this, e) < 0 ? p.getOptional2() : optional2; - jsonextrainfo = p.getJsonextrainfo() != null && compareTrust(this, e) < 0 - ? p.getJsonextrainfo() - : jsonextrainfo; - contactfullname = p.getContactfullname() != null && compareTrust(this, e) < 0 - ? p.getContactfullname() - : contactfullname; - contactfax = p.getContactfax() != null && compareTrust(this, e) < 0 ? p.getContactfax() : contactfax; - contactphone = p.getContactphone() != null && compareTrust(this, e) < 0 - ? p.getContactphone() - : contactphone; - contactemail = p.getContactemail() != null && compareTrust(this, e) < 0 - ? p.getContactemail() - : contactemail; - summary = p.getSummary() != null && compareTrust(this, e) < 0 ? p.getSummary() : summary; - currency = p.getCurrency() != null && compareTrust(this, e) < 0 ? p.getCurrency() : currency; - totalcost = p.getTotalcost() != null && compareTrust(this, e) < 0 ? p.getTotalcost() : totalcost; - fundedamount = p.getFundedamount() != null && compareTrust(this, e) < 0 - ? p.getFundedamount() - : fundedamount; - - h2020classification = mergeLists(h2020classification, p.getH2020classification()); - - mergeOAFDataInfo(e); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Publication.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Publication.java deleted file mode 100644 index 3058c262b..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Publication.java +++ /dev/null @@ -1,39 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; - -import eu.dnetlib.dhp.schema.common.ModelConstants; - -public class Publication extends Result implements Serializable { - - // publication specific - private Journal journal; - - public Publication() { - setResulttype(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE); - } - - public Journal getJournal() { - return journal; - } - - public void setJournal(Journal journal) { - this.journal = journal; - } - - @Override - public void mergeFrom(OafEntity e) { - super.mergeFrom(e); - - if (!Publication.class.isAssignableFrom(e.getClass())) { - return; - } - - Publication p = (Publication) e; - - if (p.getJournal() != null && compareTrust(this, e) < 0) - journal = p.getJournal(); - mergeOAFDataInfo(e); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Qualifier.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Qualifier.java deleted file mode 100644 index 87ecb55f1..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Qualifier.java +++ /dev/null @@ -1,87 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; - -import org.apache.commons.lang3.StringUtils; - -import com.fasterxml.jackson.annotation.JsonIgnore; - -public class Qualifier implements Serializable { - - private String classid; - private String classname; - private String schemeid; - private String schemename; - - public String getClassid() { - return classid; - } - - public void setClassid(String classid) { - this.classid = classid; - } - - public String getClassname() { - return classname; - } - - public void setClassname(String classname) { - this.classname = classname; - } - - public String getSchemeid() { - return schemeid; - } - - public void setSchemeid(String schemeid) { - this.schemeid = schemeid; - } - - public String getSchemename() { - return schemename; - } - - public void setSchemename(String schemename) { - this.schemename = schemename; - } - - public String toComparableString() { - return isBlank() - ? "" - : String - .format( - "%s::%s::%s::%s", - classid != null ? classid : "", - classname != null ? classname : "", - schemeid != null ? schemeid : "", - schemename != null ? schemename : ""); - } - - @JsonIgnore - public boolean isBlank() { - return StringUtils.isBlank(classid) - && StringUtils.isBlank(classname) - && StringUtils.isBlank(schemeid) - && StringUtils.isBlank(schemename); - } - - @Override - public int hashCode() { - return toComparableString().hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - - Qualifier other = (Qualifier) obj; - - return toComparableString().equals(other.toComparableString()); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java deleted file mode 100644 index 8825d7137..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java +++ /dev/null @@ -1,165 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import eu.dnetlib.dhp.schema.common.ModelSupport; - -import static com.google.common.base.Preconditions.checkArgument; - -import java.text.ParseException; -import java.util.*; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -/** - * Relation models any edge between two nodes in the OpenAIRE graph. It has a source id and a target id pointing to - * graph node identifiers and it is further characterised by the semantic of the link through the fields relType, - * subRelType and relClass. Provenance information is modeled according to the dataInfo element and collectedFrom, while - * individual relationship types can provide extra information via the properties field. - */ -public class Relation extends Oaf { - - /** - * Main relationship classifier, values include 'resultResult', 'resultProject', 'resultOrganization', etc. - */ - private String relType; - - /** - * Further classifies a relationship, values include 'affiliation', 'similarity', 'supplement', etc. - */ - private String subRelType; - - /** - * Indicates the direction of the relationship, values include 'isSupplementTo', 'isSupplementedBy', 'merges, - * 'isMergedIn'. - */ - private String relClass; - - /** - * The source entity id. - */ - private String source; - - /** - * The target entity id. - */ - private String target; - - /** - * Was this relationship authoritatively validated? - */ - private Boolean validated; - - /** - * When was this relationship authoritatively validated. - */ - private String validationDate; - - /** - * List of relation specific properties. Values include 'similarityLevel', indicating the similarity score between a - * pair of publications. - */ - private List properties = new ArrayList<>(); - - public String getRelType() { - return relType; - } - - public void setRelType(final String relType) { - this.relType = relType; - } - - public String getSubRelType() { - return subRelType; - } - - public void setSubRelType(final String subRelType) { - this.subRelType = subRelType; - } - - public String getRelClass() { - return relClass; - } - - public void setRelClass(final String relClass) { - this.relClass = relClass; - } - - public String getSource() { - return source; - } - - public void setSource(final String source) { - this.source = source; - } - - public String getTarget() { - return target; - } - - public void setTarget(final String target) { - this.target = target; - } - - public List getProperties() { - return properties; - } - - public void setProperties(List properties) { - this.properties = properties; - } - - public Boolean getValidated() { - return Objects.nonNull(validated) && validated; - } - - public void setValidated(Boolean validated) { - this.validated = validated; - } - - public String getValidationDate() { - return validationDate; - } - - public void setValidationDate(String validationDate) { - this.validationDate = validationDate; - } - - public void mergeFrom(final Relation r) { - - checkArgument(Objects.equals(getSource(), r.getSource()), "source ids must be equal"); - checkArgument(Objects.equals(getTarget(), r.getTarget()), "target ids must be equal"); - checkArgument(Objects.equals(getRelType(), r.getRelType()), "relType(s) must be equal"); - checkArgument( - Objects.equals(getSubRelType(), r.getSubRelType()), "subRelType(s) must be equal"); - checkArgument(Objects.equals(getRelClass(), r.getRelClass()), "relClass(es) must be equal"); - - setValidated(getValidated() || r.getValidated()); - try { - setValidationDate(ModelSupport.oldest(getValidationDate(), r.getValidationDate())); - } catch (ParseException e) { - throw new IllegalArgumentException(String.format("invalid validation date format in relation [s:%s, t:%s]: %s", getSource(), getTarget(), getValidationDate())); - } - - super.mergeFrom(r); - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - Relation relation = (Relation) o; - return relType.equals(relation.relType) - && subRelType.equals(relation.subRelType) - && relClass.equals(relation.relClass) - && source.equals(relation.source) - && target.equals(relation.target); - } - - @Override - public int hashCode() { - return Objects.hash(relType, subRelType, relClass, source, target, collectedfrom); - } - -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java deleted file mode 100644 index 845c4c982..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java +++ /dev/null @@ -1,351 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; -import java.util.stream.Collectors; - -import eu.dnetlib.dhp.schema.common.LicenseComparator; - -public class Result extends OafEntity implements Serializable { - - private List measures; - - private List author; - - // resulttype allows subclassing results into publications | datasets | software - private Qualifier resulttype; - - // common fields - private Qualifier language; - - private List country; - - private List subject; - - private List title; - - private List relevantdate; - - private List> description; - - private Field dateofacceptance; - - private Field publisher; - - private Field embargoenddate; - - private List> source; - - private List> fulltext; // remove candidate - - private List> format; - - private List> contributor; - - private Qualifier resourcetype; - - private List> coverage; - - private Qualifier bestaccessright; - - private List context; - - private List externalReference; - - private List instance; - - public List getMeasures() { - return measures; - } - - public void setMeasures(List measures) { - this.measures = measures; - } - - public List getAuthor() { - return author; - } - - public void setAuthor(List author) { - this.author = author; - } - - public Qualifier getResulttype() { - return resulttype; - } - - public void setResulttype(Qualifier resulttype) { - this.resulttype = resulttype; - } - - public Qualifier getLanguage() { - return language; - } - - public void setLanguage(Qualifier language) { - this.language = language; - } - - public List getCountry() { - return country; - } - - public void setCountry(List country) { - this.country = country; - } - - public List getSubject() { - return subject; - } - - public void setSubject(List subject) { - this.subject = subject; - } - - public List getTitle() { - return title; - } - - public void setTitle(List title) { - this.title = title; - } - - public List getRelevantdate() { - return relevantdate; - } - - public void setRelevantdate(List relevantdate) { - this.relevantdate = relevantdate; - } - - public List> getDescription() { - return description; - } - - public void setDescription(List> description) { - this.description = description; - } - - public Field getDateofacceptance() { - return dateofacceptance; - } - - public void setDateofacceptance(Field dateofacceptance) { - this.dateofacceptance = dateofacceptance; - } - - public Field getPublisher() { - return publisher; - } - - public void setPublisher(Field publisher) { - this.publisher = publisher; - } - - public Field getEmbargoenddate() { - return embargoenddate; - } - - public void setEmbargoenddate(Field embargoenddate) { - this.embargoenddate = embargoenddate; - } - - public List> getSource() { - return source; - } - - public void setSource(List> source) { - this.source = source; - } - - public List> getFulltext() { - return fulltext; - } - - public void setFulltext(List> fulltext) { - this.fulltext = fulltext; - } - - public List> getFormat() { - return format; - } - - public void setFormat(List> format) { - this.format = format; - } - - public List> getContributor() { - return contributor; - } - - public void setContributor(List> contributor) { - this.contributor = contributor; - } - - public Qualifier getResourcetype() { - return resourcetype; - } - - public void setResourcetype(Qualifier resourcetype) { - this.resourcetype = resourcetype; - } - - public List> getCoverage() { - return coverage; - } - - public void setCoverage(List> coverage) { - this.coverage = coverage; - } - - public Qualifier getBestaccessright() { - return bestaccessright; - } - - public void setBestaccessright(Qualifier bestaccessright) { - this.bestaccessright = bestaccessright; - } - - public List getContext() { - return context; - } - - public void setContext(List context) { - this.context = context; - } - - public List getExternalReference() { - return externalReference; - } - - public void setExternalReference(List externalReference) { - this.externalReference = externalReference; - } - - public List getInstance() { - return instance; - } - - public void setInstance(List instance) { - this.instance = instance; - } - - @Override - public void mergeFrom(OafEntity e) { - super.mergeFrom(e); - - if (!Result.class.isAssignableFrom(e.getClass())) { - return; - } - - Result r = (Result) e; - - measures = mergeLists(measures, r.getMeasures()); - - instance = mergeLists(instance, r.getInstance()); - - if (r.getBestaccessright() != null - && new LicenseComparator().compare(r.getBestaccessright(), bestaccessright) < 0) - bestaccessright = r.getBestaccessright(); - - if (r.getResulttype() != null && compareTrust(this, r) < 0) - resulttype = r.getResulttype(); - - if (r.getLanguage() != null && compareTrust(this, r) < 0) - language = r.getLanguage(); - - country = mergeLists(country, r.getCountry()); - - subject = mergeLists(subject, r.getSubject()); - - // merge title lists: main title with higher trust and distinct between the others - StructuredProperty baseMainTitle = null; - if (title != null) { - baseMainTitle = getMainTitle(title); - if (baseMainTitle != null) { - final StructuredProperty p = baseMainTitle; - title = title.stream().filter(t -> t != p).collect(Collectors.toList()); - } - } - - StructuredProperty newMainTitle = null; - if (r.getTitle() != null) { - newMainTitle = getMainTitle(r.getTitle()); - if (newMainTitle != null) { - final StructuredProperty p = newMainTitle; - r.setTitle(r.getTitle().stream().filter(t -> t != p).collect(Collectors.toList())); - } - } - - if (newMainTitle != null && compareTrust(this, r) < 0) { - baseMainTitle = newMainTitle; - } - - title = mergeLists(title, r.getTitle()); - if (title != null && baseMainTitle != null) { - title.add(baseMainTitle); - } - - relevantdate = mergeLists(relevantdate, r.getRelevantdate()); - - description = longestLists(description, r.getDescription()); - - if (r.getPublisher() != null && compareTrust(this, r) < 0) - publisher = r.getPublisher(); - - if (r.getEmbargoenddate() != null && compareTrust(this, r) < 0) - embargoenddate = r.getEmbargoenddate(); - - source = mergeLists(source, r.getSource()); - - fulltext = mergeLists(fulltext, r.getFulltext()); - - format = mergeLists(format, r.getFormat()); - - contributor = mergeLists(contributor, r.getContributor()); - - if (r.getResourcetype() != null) - resourcetype = r.getResourcetype(); - - coverage = mergeLists(coverage, r.getCoverage()); - - context = mergeLists(context, r.getContext()); - - externalReference = mergeLists(externalReference, r.getExternalReference()); - } - - private List> longestLists(List> a, List> b) { - if (a == null || b == null) - return a == null ? b : a; - if (a.size() == b.size()) { - int msa = a - .stream() - .filter(i -> i != null && i.getValue() != null) - .map(i -> i.getValue().length()) - .max(Comparator.naturalOrder()) - .orElse(0); - int msb = b - .stream() - .filter(i -> i != null && i.getValue() != null) - .map(i -> i.getValue().length()) - .max(Comparator.naturalOrder()) - .orElse(0); - return msa > msb ? a : b; - } - return a.size() > b.size() ? a : b; - } - - private StructuredProperty getMainTitle(List titles) { - // need to check if the list of titles contains more than 1 main title? (in that case, we should chose which - // main title select in the list) - for (StructuredProperty title : titles) { - if (title.getQualifier() != null && title.getQualifier().getClassid() != null) - if (title.getQualifier().getClassid().equals("main title")) - return title; - } - return null; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Software.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Software.java deleted file mode 100644 index d25b5c9ce..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Software.java +++ /dev/null @@ -1,80 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; -import java.util.List; - -import eu.dnetlib.dhp.schema.common.ModelConstants; - -public class Software extends Result implements Serializable { - - private List> documentationUrl; - - // candidate for removal - private List license; - - // candidate for removal - private Field codeRepositoryUrl; - - private Qualifier programmingLanguage; - - public Software() { - setResulttype(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE); - } - - public List> getDocumentationUrl() { - return documentationUrl; - } - - public void setDocumentationUrl(List> documentationUrl) { - this.documentationUrl = documentationUrl; - } - - public List getLicense() { - return license; - } - - public void setLicense(List license) { - this.license = license; - } - - public Field getCodeRepositoryUrl() { - return codeRepositoryUrl; - } - - public void setCodeRepositoryUrl(Field codeRepositoryUrl) { - this.codeRepositoryUrl = codeRepositoryUrl; - } - - public Qualifier getProgrammingLanguage() { - return programmingLanguage; - } - - public void setProgrammingLanguage(Qualifier programmingLanguage) { - this.programmingLanguage = programmingLanguage; - } - - @Override - public void mergeFrom(OafEntity e) { - super.mergeFrom(e); - - if (!Software.class.isAssignableFrom(e.getClass())) { - return; - } - - final Software s = (Software) e; - documentationUrl = mergeLists(documentationUrl, s.getDocumentationUrl()); - - license = mergeLists(license, s.getLicense()); - - codeRepositoryUrl = s.getCodeRepositoryUrl() != null && compareTrust(this, s) < 0 - ? s.getCodeRepositoryUrl() - : codeRepositoryUrl; - - programmingLanguage = s.getProgrammingLanguage() != null && compareTrust(this, s) < 0 - ? s.getProgrammingLanguage() - : programmingLanguage; - - mergeOAFDataInfo(e); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/StructuredProperty.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/StructuredProperty.java deleted file mode 100644 index 1fa0de0be..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/StructuredProperty.java +++ /dev/null @@ -1,60 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.Serializable; - -public class StructuredProperty implements Serializable { - - private String value; - - private Qualifier qualifier; - - private DataInfo dataInfo; - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } - - public Qualifier getQualifier() { - return qualifier; - } - - public void setQualifier(Qualifier qualifier) { - this.qualifier = qualifier; - } - - public DataInfo getDataInfo() { - return dataInfo; - } - - public void setDataInfo(DataInfo dataInfo) { - this.dataInfo = dataInfo; - } - - public String toComparableString() { - return value != null ? value.toLowerCase() : ""; - } - - @Override - public int hashCode() { - return toComparableString().hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - - StructuredProperty other = (StructuredProperty) obj; - - return toComparableString().equals(other.toComparableString()); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorData.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorData.java deleted file mode 100644 index 6c94cdb13..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/AuthorData.java +++ /dev/null @@ -1,72 +0,0 @@ - -package eu.dnetlib.dhp.schema.orcid; - -import java.io.Serializable; -import java.util.List; - -import com.google.common.collect.Lists; - -/** - * This class models the data that are retrieved from orcid publication - */ - -public class AuthorData implements Serializable { - - private String oid; - private String name; - private String surname; - private String creditName; - private String errorCode; - private List otherNames; - - public String getErrorCode() { - return errorCode; - } - - public void setErrorCode(String errorCode) { - this.errorCode = errorCode; - } - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getSurname() { - return surname; - } - - public void setSurname(String surname) { - this.surname = surname; - } - - public String getCreditName() { - return creditName; - } - - public void setCreditName(String creditName) { - this.creditName = creditName; - } - - public String getOid() { - return oid; - } - - public void setOid(String oid) { - this.oid = oid; - } - - public List getOtherNames() { - return otherNames; - } - - public void setOtherNames(List otherNames) { - if (this.otherNames == null) { - this.otherNames = Lists.newArrayList(); - } - this.otherNames = otherNames; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java deleted file mode 100644 index cf372c12a..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/orcid/OrcidDOI.java +++ /dev/null @@ -1,25 +0,0 @@ - -package eu.dnetlib.dhp.schema.orcid; - -import java.util.List; - -public class OrcidDOI { - private String doi; - private List authors; - - public String getDoi() { - return doi; - } - - public void setDoi(String doi) { - this.doi = doi; - } - - public List getAuthors() { - return authors; - } - - public void setAuthors(List authors) { - this.authors = authors; - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIDataset.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIDataset.java deleted file mode 100644 index 421b4ecaa..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIDataset.java +++ /dev/null @@ -1,89 +0,0 @@ - -package eu.dnetlib.dhp.schema.scholexplorer; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.commons.lang3.StringUtils; - -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.OafEntity; - -public class DLIDataset extends Dataset { - - private String originalObjIdentifier; - - private List dlicollectedfrom; - - private String completionStatus; - - public String getCompletionStatus() { - return completionStatus; - } - - public void setCompletionStatus(String completionStatus) { - this.completionStatus = completionStatus; - } - - public List getDlicollectedfrom() { - return dlicollectedfrom; - } - - public void setDlicollectedfrom(List dlicollectedfrom) { - this.dlicollectedfrom = dlicollectedfrom; - } - - public String getOriginalObjIdentifier() { - return originalObjIdentifier; - } - - public void setOriginalObjIdentifier(String originalObjIdentifier) { - this.originalObjIdentifier = originalObjIdentifier; - } - - @Override - public void mergeFrom(OafEntity e) { - super.mergeFrom(e); - DLIDataset p = (DLIDataset) e; - if (StringUtils.isBlank(completionStatus) && StringUtils.isNotBlank(p.completionStatus)) - completionStatus = p.completionStatus; - if ("complete".equalsIgnoreCase(p.completionStatus)) - completionStatus = "complete"; - dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom()); - } - - private List mergeProvenance( - final List a, final List b) { - Map result = new HashMap<>(); - if (a != null) - a - .forEach( - p -> { - if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) { - if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) - && StringUtils.isNotBlank(p.getCompletionStatus())) { - result.put(p.getId(), p); - } - - } else if (p != null && p.getId() != null && !result.containsKey(p.getId())) - result.put(p.getId(), p); - }); - if (b != null) - b - .forEach( - p -> { - if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) { - if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) - && StringUtils.isNotBlank(p.getCompletionStatus())) { - result.put(p.getId(), p); - } - - } else if (p != null && p.getId() != null && !result.containsKey(p.getId())) - result.put(p.getId(), p); - }); - - return new ArrayList<>(result.values()); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIPublication.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIPublication.java deleted file mode 100644 index c899a899c..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIPublication.java +++ /dev/null @@ -1,87 +0,0 @@ - -package eu.dnetlib.dhp.schema.scholexplorer; - -import java.io.Serializable; -import java.util.*; - -import org.apache.commons.lang3.StringUtils; - -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.Publication; - -public class DLIPublication extends Publication implements Serializable { - - private String originalObjIdentifier; - - private List dlicollectedfrom; - - private String completionStatus; - - public String getCompletionStatus() { - return completionStatus; - } - - public void setCompletionStatus(String completionStatus) { - this.completionStatus = completionStatus; - } - - public List getDlicollectedfrom() { - return dlicollectedfrom; - } - - public void setDlicollectedfrom(List dlicollectedfrom) { - this.dlicollectedfrom = dlicollectedfrom; - } - - public String getOriginalObjIdentifier() { - return originalObjIdentifier; - } - - public void setOriginalObjIdentifier(String originalObjIdentifier) { - this.originalObjIdentifier = originalObjIdentifier; - } - - @Override - public void mergeFrom(OafEntity e) { - super.mergeFrom(e); - DLIPublication p = (DLIPublication) e; - if (StringUtils.isBlank(completionStatus) && StringUtils.isNotBlank(p.completionStatus)) - completionStatus = p.completionStatus; - if ("complete".equalsIgnoreCase(p.completionStatus)) - completionStatus = "complete"; - dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom()); - } - - private List mergeProvenance( - final List a, final List b) { - Map result = new HashMap<>(); - if (a != null) - a - .forEach( - p -> { - if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) { - if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) - && StringUtils.isNotBlank(p.getCompletionStatus())) { - result.put(p.getId(), p); - } - - } else if (p != null && p.getId() != null && !result.containsKey(p.getId())) - result.put(p.getId(), p); - }); - if (b != null) - b - .forEach( - p -> { - if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) { - if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) - && StringUtils.isNotBlank(p.getCompletionStatus())) { - result.put(p.getId(), p); - } - - } else if (p != null && p.getId() != null && !result.containsKey(p.getId())) - result.put(p.getId(), p); - }); - - return new ArrayList<>(result.values()); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIUnknown.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIUnknown.java deleted file mode 100644 index 5da599427..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIUnknown.java +++ /dev/null @@ -1,132 +0,0 @@ - -package eu.dnetlib.dhp.schema.scholexplorer; - -import java.io.Serializable; -import java.util.*; -import java.util.stream.Collectors; - -import org.apache.commons.lang3.StringUtils; - -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; - -public class DLIUnknown extends Oaf implements Serializable { - - private String id; - - private List pid; - - private String dateofcollection; - - private String dateoftransformation; - - private List dlicollectedfrom; - - private String completionStatus = "incomplete"; - - public String getCompletionStatus() { - return completionStatus; - } - - public void setCompletionStatus(String completionStatus) { - this.completionStatus = completionStatus; - } - - public List getDlicollectedfrom() { - return dlicollectedfrom; - } - - public void setDlicollectedfrom(List dlicollectedfrom) { - this.dlicollectedfrom = dlicollectedfrom; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public List getPid() { - return pid; - } - - public void setPid(List pid) { - this.pid = pid; - } - - public String getDateofcollection() { - return dateofcollection; - } - - public void setDateofcollection(String dateofcollection) { - this.dateofcollection = dateofcollection; - } - - public String getDateoftransformation() { - return dateoftransformation; - } - - public void setDateoftransformation(String dateoftransformation) { - this.dateoftransformation = dateoftransformation; - } - - public void mergeFrom(DLIUnknown p) { - if ("complete".equalsIgnoreCase(p.completionStatus)) - completionStatus = "complete"; - dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom()); - if (StringUtils.isEmpty(id) && StringUtils.isNoneEmpty(p.getId())) - id = p.getId(); - if (StringUtils.isEmpty(dateofcollection) && StringUtils.isNoneEmpty(p.getDateofcollection())) - dateofcollection = p.getDateofcollection(); - - if (StringUtils.isEmpty(dateoftransformation) && StringUtils.isNoneEmpty(p.getDateoftransformation())) - dateofcollection = p.getDateoftransformation(); - pid = mergeLists(pid, p.getPid()); - } - - protected List mergeLists(final List... lists) { - - return Arrays - .stream(lists) - .filter(Objects::nonNull) - .flatMap(List::stream) - .filter(Objects::nonNull) - .distinct() - .collect(Collectors.toList()); - } - - private List mergeProvenance( - final List a, final List b) { - Map result = new HashMap<>(); - if (a != null) - a - .forEach( - p -> { - if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) { - if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) - && StringUtils.isNotBlank(p.getCompletionStatus())) { - result.put(p.getId(), p); - } - - } else if (p != null && p.getId() != null && !result.containsKey(p.getId())) - result.put(p.getId(), p); - }); - if (b != null) - b - .forEach( - p -> { - if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) { - if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) - && StringUtils.isNotBlank(p.getCompletionStatus())) { - result.put(p.getId(), p); - } - - } else if (p != null && p.getId() != null && !result.containsKey(p.getId())) - result.put(p.getId(), p); - }); - - return new ArrayList<>(result.values()); - } -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/OafUtils.scala b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/OafUtils.scala deleted file mode 100644 index 27eec77fa..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/OafUtils.scala +++ /dev/null @@ -1,90 +0,0 @@ -package eu.dnetlib.dhp.schema.scholexplorer - -import eu.dnetlib.dhp.schema.oaf.{DataInfo, Field, KeyValue, Qualifier, StructuredProperty} - -object OafUtils { - - - - def generateKeyValue(key: String, value: String): KeyValue = { - val kv: KeyValue = new KeyValue() - kv.setKey(key) - kv.setValue(value) - kv.setDataInfo(generateDataInfo("0.9")) - kv - } - - - def generateDataInfo(trust: String = "0.9", invisibile: Boolean = false): DataInfo = { - val di = new DataInfo - di.setDeletedbyinference(false) - di.setInferred(false) - di.setInvisible(false) - di.setTrust(trust) - di.setProvenanceaction(createQualifier("sysimport:actionset", "dnet:provenanceActions")) - di - } - - def createQualifier(cls: String, sch: String): Qualifier = { - createQualifier(cls, cls, sch, sch) - } - - - def createQualifier(classId: String, className: String, schemeId: String, schemeName: String): Qualifier = { - val q: Qualifier = new Qualifier - q.setClassid(classId) - q.setClassname(className) - q.setSchemeid(schemeId) - q.setSchemename(schemeName) - q - } - - - def asField[T](value: T): Field[T] = { - val tmp = new Field[T] - tmp.setValue(value) - tmp - - - } - - def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String): StructuredProperty = { - val sp = new StructuredProperty - sp.setQualifier(createQualifier(classId,className, schemeId, schemeName)) - sp.setValue(value) - sp - - } - - - - def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String, dataInfo: DataInfo): StructuredProperty = { - val sp = new StructuredProperty - sp.setQualifier(createQualifier(classId,className, schemeId, schemeName)) - sp.setValue(value) - sp.setDataInfo(dataInfo) - sp - - } - - def createSP(value: String, classId: String, schemeId: String): StructuredProperty = { - val sp = new StructuredProperty - sp.setQualifier(createQualifier(classId, schemeId)) - sp.setValue(value) - sp - - } - - - - def createSP(value: String, classId: String, schemeId: String, dataInfo: DataInfo): StructuredProperty = { - val sp = new StructuredProperty - sp.setQualifier(createQualifier(classId, schemeId)) - sp.setValue(value) - sp.setDataInfo(dataInfo) - sp - - } - - -} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/ProvenaceInfo.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/ProvenaceInfo.java deleted file mode 100644 index b1188f064..000000000 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/ProvenaceInfo.java +++ /dev/null @@ -1,47 +0,0 @@ - -package eu.dnetlib.dhp.schema.scholexplorer; - -import java.io.Serializable; - -public class ProvenaceInfo implements Serializable { - - private String id; - - private String name; - - private String completionStatus; - - private String collectionMode = "collected"; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getCompletionStatus() { - return completionStatus; - } - - public void setCompletionStatus(String completionStatus) { - this.completionStatus = completionStatus; - } - - public String getCollectionMode() { - return collectionMode; - } - - public void setCollectionMode(String collectionMode) { - this.collectionMode = collectionMode; - } -} diff --git a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/action/AtomicActionTest.java b/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/action/AtomicActionTest.java deleted file mode 100644 index 4d31591a0..000000000 --- a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/action/AtomicActionTest.java +++ /dev/null @@ -1,40 +0,0 @@ - -package eu.dnetlib.dhp.schema.action; - -import static org.junit.jupiter.api.Assertions.*; - -import java.io.IOException; - -import org.apache.commons.lang3.StringUtils; -import org.junit.jupiter.api.Test; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.schema.oaf.Relation; - -/** @author claudio.atzori */ -public class AtomicActionTest { - - @Test - public void serializationTest() throws IOException { - - Relation rel = new Relation(); - rel.setSource("1"); - rel.setTarget("2"); - rel.setRelType("resultResult"); - rel.setSubRelType("dedup"); - rel.setRelClass("merges"); - - AtomicAction aa1 = new AtomicAction(Relation.class, rel); - - final ObjectMapper mapper = new ObjectMapper(); - String json = mapper.writeValueAsString(aa1); - - assertTrue(StringUtils.isNotBlank(json)); - - AtomicAction aa2 = mapper.readValue(json, AtomicAction.class); - - assertEquals(aa1.getClazz(), aa2.getClazz()); - assertEquals(aa1.getPayload(), aa2.getPayload()); - } -} diff --git a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/common/ModelSupportTest.java b/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/common/ModelSupportTest.java deleted file mode 100644 index 73e8c47ff..000000000 --- a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/common/ModelSupportTest.java +++ /dev/null @@ -1,37 +0,0 @@ - -package eu.dnetlib.dhp.schema.common; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; - -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Result; - -public class ModelSupportTest { - - @Nested - class IsSubClass { - - @Test - public void shouldReturnFalseWhenSubClassDoesNotExtendSuperClass() { - // when - Boolean result = ModelSupport.isSubClass(Relation.class, OafEntity.class); - - // then - assertFalse(result); - } - - @Test - public void shouldReturnTrueWhenSubClassExtendsSuperClass() { - // when - Boolean result = ModelSupport.isSubClass(Result.class, OafEntity.class); - - // then - assertTrue(result); - } - } -} diff --git a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MeasureTest.java b/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MeasureTest.java deleted file mode 100644 index 26b4407c9..000000000 --- a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MeasureTest.java +++ /dev/null @@ -1,57 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import java.io.IOException; -import java.util.List; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Lists; - -public class MeasureTest { - - public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() - .setSerializationInclusion(JsonInclude.Include.NON_NULL); - - @Test - public void testMeasureSerialization() throws IOException { - - Measure popularity = new Measure(); - popularity.setId("popularity"); - popularity - .setUnit( - Lists - .newArrayList( - unit("score", "0.5"))); - - Measure influence = new Measure(); - influence.setId("influence"); - influence - .setUnit( - Lists - .newArrayList( - unit("score", "0.3"))); - - List m = Lists.newArrayList(popularity, influence); - - String s = OBJECT_MAPPER.writeValueAsString(m); - System.out.println(s); - - List mm = OBJECT_MAPPER.readValue(s, new TypeReference>() { - }); - - Assertions.assertNotNull(mm); - } - - private KeyValue unit(String key, String value) { - KeyValue unit = new KeyValue(); - unit.setKey(key); - unit.setValue(value); - return unit; - } - -} diff --git a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java b/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java deleted file mode 100644 index f5b9bf028..000000000 --- a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/oaf/MergeTest.java +++ /dev/null @@ -1,138 +0,0 @@ - -package eu.dnetlib.dhp.schema.oaf; - -import static org.junit.jupiter.api.Assertions.*; - -import java.util.Arrays; -import java.util.List; - -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class MergeTest { - - OafEntity oaf; - - @BeforeEach - public void setUp() { - oaf = new Publication(); - } - - @Test - public void mergeListsTest() { - - // string list merge test - List a = Arrays.asList("a", "b", "c", "e"); - List b = Arrays.asList("a", "b", "c", "d"); - List c = null; - - System.out.println("merge result 1 = " + oaf.mergeLists(a, b)); - - System.out.println("merge result 2 = " + oaf.mergeLists(a, c)); - - System.out.println("merge result 3 = " + oaf.mergeLists(c, c)); - } - - @Test - public void mergePublicationCollectedFromTest() { - - Publication a = new Publication(); - Publication b = new Publication(); - - a.setCollectedfrom(Arrays.asList(setKV("a", "open"), setKV("b", "closed"))); - b.setCollectedfrom(Arrays.asList(setKV("A", "open"), setKV("b", "Open"))); - - a.mergeFrom(b); - - assertNotNull(a.getCollectedfrom()); - assertEquals(3, a.getCollectedfrom().size()); - } - - @Test - public void mergePublicationSubjectTest() { - - Publication a = new Publication(); - Publication b = new Publication(); - - a.setSubject(Arrays.asList(setSP("a", "open", "classe"), setSP("b", "open", "classe"))); - b.setSubject(Arrays.asList(setSP("A", "open", "classe"), setSP("c", "open", "classe"))); - - a.mergeFrom(b); - - assertNotNull(a.getSubject()); - assertEquals(3, a.getSubject().size()); - } - - @Test - public void mergeRelationTest() { - - Relation a = createRel(null, null); - Relation b = createRel(null, null); - a.mergeFrom(b); - assertEquals(a, b); - - a = createRel(true, null); - b = createRel(null, null); - a.mergeFrom(b); - assertEquals(true, a.getValidated()); - - a = createRel(true, null); - b = createRel(false, null); - a.mergeFrom(b); - assertEquals(true, a.getValidated()); - - a = createRel(true, null); - b = createRel(true, "2016-04-05T12:41:19.202Z"); - a.mergeFrom(b); - assertEquals("2016-04-05T12:41:19.202Z", a.getValidationDate()); - - a = createRel(true, "2016-05-07T12:41:19.202Z"); - b = createRel(true, "2016-04-05T12:41:19.202Z"); - a.mergeFrom(b); - assertEquals("2016-04-05T12:41:19.202Z", a.getValidationDate()); - } - - @Test - public void mergeRelationTestParseException() { - assertThrows(IllegalArgumentException.class, () -> { - Relation a = createRel(true, "2016-04-05"); - Relation b = createRel(true, "2016-04-05"); - a.mergeFrom(b); - }); - } - - private Relation createRel(Boolean validated, String validationDate) { - Relation rel = new Relation(); - rel.setSource("1"); - rel.setTarget("2"); - rel.setRelType("reltype"); - rel.setSubRelType("subreltype"); - rel.setRelClass("relclass"); - rel.setValidated(validated); - rel.setValidationDate(validationDate); - return rel; - } - - private KeyValue setKV(final String key, final String value) { - - KeyValue k = new KeyValue(); - - k.setKey(key); - k.setValue(value); - - return k; - } - - private StructuredProperty setSP( - final String value, final String schema, final String classname) { - StructuredProperty s = new StructuredProperty(); - s.setValue(value); - Qualifier q = new Qualifier(); - q.setClassname(classname); - q.setClassid(classname); - q.setSchemename(schema); - q.setSchemeid(schema); - s.setQualifier(q); - return s; - } -} diff --git a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/scholexplorer/DLItest.java b/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/scholexplorer/DLItest.java deleted file mode 100644 index e4596fcdd..000000000 --- a/dhp-schemas/src/test/java/eu/dnetlib/dhp/schema/scholexplorer/DLItest.java +++ /dev/null @@ -1,83 +0,0 @@ - -package eu.dnetlib.dhp.schema.scholexplorer; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Collections; - -import org.junit.jupiter.api.Test; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.SerializationFeature; - -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; - -public class DLItest { - - @Test - public void testMergePublication() throws JsonProcessingException { - DLIPublication a1 = new DLIPublication(); - a1.setPid(Arrays.asList(createSP("123456", "pdb", "dnet:pid_types"))); - a1.setTitle(Collections.singletonList(createSP("Un Titolo", "title", "dnetTitle"))); - a1.setDlicollectedfrom(Arrays.asList(createCollectedFrom("znd", "Zenodo", "complete"))); - a1.setCompletionStatus("complete"); - - DLIPublication a = new DLIPublication(); - a - .setPid( - Arrays - .asList( - createSP("10.11", "doi", "dnet:pid_types"), - createSP("123456", "pdb", "dnet:pid_types"))); - a.setTitle(Collections.singletonList(createSP("A Title", "title", "dnetTitle"))); - a - .setDlicollectedfrom( - Arrays - .asList( - createCollectedFrom("dct", "datacite", "complete"), - createCollectedFrom("dct", "datacite", "incomplete"))); - a.setCompletionStatus("incomplete"); - - a.mergeFrom(a1); - - ObjectMapper mapper = new ObjectMapper(); - System.out.println(mapper.writeValueAsString(a)); - } - - @Test - public void testDeserialization() throws IOException { - - final String json = "{\"dataInfo\":{\"invisible\":false,\"inferred\":null,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":null,\"provenanceaction\":null},\"lastupdatetimestamp\":null,\"id\":\"60|bd9352547098929a394655ad1a44a479\",\"originalId\":[\"bd9352547098929a394655ad1a44a479\"],\"collectedfrom\":[{\"key\":\"dli_________::datacite\",\"value\":\"Datasets in Datacite\",\"dataInfo\":null,\"blank\":false}],\"pid\":[{\"value\":\"10.7925/DRS1.DUCHAS_5078760\",\"qualifier\":{\"classid\":\"doi\",\"classname\":\"doi\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\",\"blank\":false},\"dataInfo\":null}],\"dateofcollection\":\"2020-01-09T08:29:31.885Z\",\"dateoftransformation\":null,\"extraInfo\":null,\"oaiprovenance\":null,\"author\":[{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Ireland. Department of Arts, Culture, and the Gaeltacht\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"University College Dublin\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"National Folklore Foundation\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null}],\"resulttype\":null,\"language\":null,\"country\":null,\"subject\":[{\"value\":\"Recreation\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Entertainments and recreational activities\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Siamsaíocht agus caitheamh aimsire\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null}],\"title\":[{\"value\":\"Games We Play\",\"qualifier\":null,\"dataInfo\":null}],\"relevantdate\":[{\"value\":\"1938-09-28\",\"qualifier\":{\"classid\":\"date\",\"classname\":\"date\",\"schemeid\":\"dnet::date\",\"schemename\":\"dnet::date\",\"blank\":false},\"dataInfo\":null}],\"description\":[{\"value\":\"Story collected by Breda Mc Donnell, a student at Tenure school (Tinure, Co. Louth) (no informant identified).\",\"dataInfo\":null}],\"dateofacceptance\":null,\"publisher\":{\"value\":\"University College Dublin\",\"dataInfo\":null},\"embargoenddate\":null,\"source\":null,\"fulltext\":null,\"format\":null,\"contributor\":null,\"resourcetype\":null,\"coverage\":null,\"refereed\":null,\"context\":null,\"processingchargeamount\":null,\"processingchargecurrency\":null,\"externalReference\":null,\"instance\":[],\"storagedate\":null,\"device\":null,\"size\":null,\"version\":null,\"lastmetadataupdate\":null,\"metadataversionnumber\":null,\"geolocation\":null,\"dlicollectedfrom\":[{\"id\":\"dli_________::datacite\",\"name\":\"Datasets in Datacite\",\"completionStatus\":\"complete\",\"collectionMode\":\"resolved\"}],\"completionStatus\":\"complete\"}"; - - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - DLIDataset dliDataset = mapper.readValue(json, DLIDataset.class); - mapper.enable(SerializationFeature.INDENT_OUTPUT); - System.out.println(mapper.writeValueAsString(dliDataset)); - } - - private ProvenaceInfo createCollectedFrom( - final String id, final String name, final String completionStatus) { - ProvenaceInfo p = new ProvenaceInfo(); - p.setId(id); - p.setName(name); - p.setCompletionStatus(completionStatus); - return p; - } - - private StructuredProperty createSP( - final String value, final String className, final String schemeName) { - StructuredProperty p = new StructuredProperty(); - p.setValue(value); - Qualifier schema = new Qualifier(); - schema.setClassname(className); - schema.setClassid(className); - schema.setSchemename(schemeName); - schema.setSchemeid(schemeName); - p.setQualifier(schema); - return p; - } -} diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index 0b4d25700..6dc33e76b 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -59,7 +59,6 @@ eu.dnetlib.dhp dhp-schemas - ${project.version} diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index cf0fa0efe..4b16d9011 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -35,7 +35,6 @@ eu.dnetlib.dhp dhp-schemas - ${project.version} diff --git a/dhp-workflows/dhp-blacklist/pom.xml b/dhp-workflows/dhp-blacklist/pom.xml index 9c25f7b29..185c53ab7 100644 --- a/dhp-workflows/dhp-blacklist/pom.xml +++ b/dhp-workflows/dhp-blacklist/pom.xml @@ -17,7 +17,6 @@ eu.dnetlib.dhp dhp-schemas - ${project.version} diff --git a/dhp-workflows/dhp-broker-events/pom.xml b/dhp-workflows/dhp-broker-events/pom.xml index 75cc0ea09..4ae818e3c 100644 --- a/dhp-workflows/dhp-broker-events/pom.xml +++ b/dhp-workflows/dhp-broker-events/pom.xml @@ -44,7 +44,6 @@ eu.dnetlib.dhp dhp-schemas - ${project.version} eu.dnetlib diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index 03ddbcf4c..f76c9656b 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -58,7 +58,6 @@ eu.dnetlib.dhp dhp-schemas - ${project.version} com.arakelian diff --git a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml index aa4070b01..7e8c7bf60 100644 --- a/dhp-workflows/dhp-dedup-scholexplorer/pom.xml +++ b/dhp-workflows/dhp-dedup-scholexplorer/pom.xml @@ -59,7 +59,6 @@ eu.dnetlib.dhp dhp-schemas - ${project.version} diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index 624dd7b31..f9f932909 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -66,7 +66,6 @@ eu.dnetlib.dhp dhp-schemas - ${project.version} com.jayway.jsonpath diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index d0ab77cc5..4bac77c5d 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -27,7 +27,6 @@ eu.dnetlib.dhp dhp-schemas - ${project.version} org.apache.spark diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 3e1d84c01..346a54cc3 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -81,7 +81,6 @@ eu.dnetlib.dhp dhp-schemas - ${project.version} com.jayway.jsonpath diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml index b287e9c88..3420f6607 100644 --- a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml @@ -60,7 +60,6 @@ eu.dnetlib.dhp dhp-schemas - ${project.version} org.apache.httpcomponents diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index 0d44d8e5e..6446a542e 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -164,7 +164,6 @@ eu.dnetlib.dhp dhp-schemas - ${project.version} diff --git a/pom.xml b/pom.xml index 3e0626aed..789903586 100644 --- a/pom.xml +++ b/pom.xml @@ -19,7 +19,6 @@ dhp-build - dhp-schemas dhp-common dhp-workflows @@ -121,6 +120,11 @@ + + eu.dnetlib.dhp + dhp-schemas + ${dhp.schemas.version} + org.apache.hadoop hadoop-hdfs @@ -692,6 +696,7 @@ 3.6.0 2.22.2 2.0.1 + 2.2.4 cdh5.9.2 2.6.0-${dhp.cdh.version} 4.1.0-${dhp.cdh.version} From dd2e0a81f4a6f8ecefba096c324f73fac8f2ebaa Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 27 Apr 2021 12:08:43 +0200 Subject: [PATCH 03/16] added dnet45-bootstrap-snapshot and dnet45-bootstrap-release repositories --- pom.xml | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/pom.xml b/pom.xml index 789903586..5791ba5e7 100644 --- a/pom.xml +++ b/pom.xml @@ -59,31 +59,47 @@ - cloudera - Cloudera Repository - https://repository.cloudera.com/artifactory/cloudera-repos + dnet45-snapshots + D-Net 45 snapshots + https://maven.d4science.org/nexus/content/repositories/dnet45-snapshots + default + + true + + + false + + + + dnet45-bootstrap-snapshot + D-Net 45 Bootstrap Snapshot + https://maven.d4science.org/nexus/content/repositories/dnet45-bootstrap-snapshot/ + + false + + + true + + default + + + dnet45-bootstrap-release + D-Net 45 Bootstrap Release + https://maven.d4science.org/nexus/content/repositories/dnet45-bootstrap-release/ true false + default - dnet45-releases-old - http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases + cloudera + Cloudera Repository + https://repository.cloudera.com/artifactory/cloudera-repos - false - - - false - - - - dnet45-snapshots-old - http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots - - false + true false From e6075bb917fb47b1f6cb7e3d7f5bb62cd4017987 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 27 Apr 2021 15:15:08 +0200 Subject: [PATCH 04/16] updated json schema for results - added instances and accessright definition --- .../dump/complete/schema/result_schema.json | 74 ++++++++++++++++++- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json index 867fd5a77..a5d36afd2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json @@ -1,6 +1,23 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { + "AccessRight":{ + "type":"object", + "properties":{ + "code": { + "type": "string", + "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" + }, + "label": { + "type": "string", + "description": "Label for the access mode" + }, + "scheme": { + "type": "string", + "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" + } + } + }, "ControlledField": { "type": "object", "properties": { @@ -266,6 +283,59 @@ ] } }, + "instance":{ + "description":"Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version", + "type":"array", + "items":{ + "type":"object", + "properties":{ + "accessright":{ + "allOf":[ + { + "$ref":"#/definitions/AccessRight" + }, + { + "description":"The accessright of this instance. One result may have multiple instances, each with a different accessright" + } + ] + }, + "articleprocessingcharge":{ + "description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative.", + "type":"object", + "properties":{ + "amount":{ + "type":"string" + }, + "currency":{ + "type":"string" + } + } + }, + "license":{ + "type":"string" + }, + "publicationdate":{ + "type":"string" + }, + "refereed":{ + "description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)", + "type":"string" + }, + "type":{ + "type":"string", + "description":"The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)" + }, + "url":{ + "description":"URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ", + "type":"array", + "items":{ + "type":"string" + } + } + } + + } + }, "programmingLanguage": { "type": "string", "description": "Only for results with type 'software': the programming language" @@ -302,7 +372,7 @@ "subject": { "allOf": [ {"$ref": "#/definitions/ControlledField"}, - {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."}, + {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."} ] } } @@ -327,4 +397,4 @@ "description": "Version of the result" } } -} +} \ No newline at end of file From a801999e7575a3a8cb94a31e09c860f012d4e0e5 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Thu, 29 Apr 2021 12:18:42 +0200 Subject: [PATCH 05/16] fixed query for organisations' pids --- .../eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql index 938744b11..ab060ceeb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql @@ -24,7 +24,7 @@ SELECT d.officialname AS collectedfromname, o.country || '@@@dnet:countries' AS country, 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, - array_remove(array_agg(DISTINCT i.pid || '###' || i.issuertype), NULL) AS pid + array_remove(array_agg(DISTINCT i.pid || '###' || i.issuertype || '@@@' || i.issuertype), NULL) AS pid FROM dsm_organizations o LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom) LEFT OUTER JOIN dsm_organizationpids p ON (p.organization = o.id) From 1adfc41d23f1f28928a5203c1514da45a4fd6078 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 5 May 2021 10:23:32 +0200 Subject: [PATCH 06/16] merged manually changes on stable_id for doiboost into master --- dhp-common/pom.xml | 2 +- .../doiboost/crossref/Crossref2Oaf.scala | 156 +++++++++-------- .../doiboost/crossref/CrossrefDataset.scala | 2 +- .../crossref/SparkMapDumpIntoOAF.scala | 2 +- .../dnetlib/doiboost/mag/MagDataModel.scala | 25 +-- .../mag/SparkImportMagIntoDataset.scala | 8 +- .../doiboost/mag/SparkProcessMAG.scala | 21 ++- .../dnetlib/doiboost/orcid/ORCIDToOAF.scala | 100 ++++++++--- .../orcid/SparkConvertORCIDToOAF.scala | 79 +++------ .../doiboost/uw/SparkMapUnpayWallToOAF.scala | 2 +- .../dhp/doiboost/oozie_app/workflow.xml | 163 +++++++++++------- 11 files changed, 321 insertions(+), 239 deletions(-) diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index c4c8aeb61..4b5a8f2cc 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -6,7 +6,7 @@ eu.dnetlib.dhp dhp 1.2.4-SNAPSHOT - ../ + ../pom.xml dhp-common diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index d1ceb9a07..db28eaf7a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -1,5 +1,6 @@ package eu.dnetlib.doiboost.crossref +import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.utils.DHPUtils import eu.dnetlib.doiboost.DoiBoostMappingUtil._ @@ -13,12 +14,13 @@ import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex +import eu.dnetlib.dhp.schema.scholexplorer.OafUtils case class CrossrefDT(doi: String, json:String, timestamp: Long) {} case class mappingAffiliation(name: String) {} -case class mappingAuthor(given: Option[String], family: String, ORCID: Option[String], affiliation: Option[mappingAffiliation]) {} +case class mappingAuthor(given: Option[String], family: String, sequence:Option[String], ORCID: Option[String], affiliation: Option[mappingAffiliation]) {} case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {} @@ -154,7 +156,12 @@ case object Crossref2Oaf { //Mapping Author val authorList: List[mappingAuthor] = (json \ "author").extractOrElse[List[mappingAuthor]](List()) - result.setAuthor(authorList.map(a => generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull)).asJava) + + + + val sorted_list = authorList.sortWith((a:mappingAuthor, b:mappingAuthor) => a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first")) + + result.setAuthor(sorted_list.zipWithIndex.map{case (a, index) => generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index)}.asJava) // Mapping instance val instance = new Instance() @@ -170,14 +177,14 @@ case object Crossref2Oaf { if(has_review != JNothing) { instance.setRefereed( - createQualifier("0001", "peerReviewed", "dnet:review_levels", "dnet:review_levels")) + OafUtils.createQualifier("0001", "peerReviewed", ModelConstants.DNET_REVIEW_LEVELS, ModelConstants.DNET_REVIEW_LEVELS)) } instance.setAccessright(getRestrictedQualifier()) result.setInstance(List(instance).asJava) - instance.setInstancetype(createQualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), "dnet:publication_resource", "dnet:publication_resource")) - result.setResourcetype(createQualifier(cobjCategory.substring(0, 4),"dnet:dataCite_resource")) + instance.setInstancetype(OafUtils.createQualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) + result.setResourcetype(OafUtils.createQualifier(cobjCategory.substring(0, 4),"dnet:dataCite_resource")) instance.setCollectedfrom(createCrossrefCollectedFrom()) if (StringUtils.isNotBlank(issuedDate)) { @@ -194,13 +201,14 @@ case object Crossref2Oaf { } - def generateAuhtor(given: String, family: String, orcid: String): Author = { + def generateAuhtor(given: String, family: String, orcid: String, index:Int): Author = { val a = new Author a.setName(given) a.setSurname(family) a.setFullname(s"$given $family") + a.setRank(index+1) if (StringUtils.isNotBlank(orcid)) - a.setPid(List(createSP(orcid, ORCID_PENDING, PID_TYPES, generateDataInfo())).asJava) + a.setPid(List(createSP(orcid, ModelConstants.ORCID_PENDING, ModelConstants.DNET_PID_TYPES, generateDataInfo())).asJava) a } @@ -221,7 +229,7 @@ case object Crossref2Oaf { val result = generateItemFromType(objectType, objectSubType) if (result == null) return List() - val cOBJCategory = mappingCrossrefSubType.getOrElse(objectType, mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")); + val cOBJCategory = mappingCrossrefSubType.getOrElse(objectType, mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")) mappingResult(result, json, cOBJCategory) @@ -299,77 +307,77 @@ case object Crossref2Oaf { if (funders != null) - funders.foreach(funder => { - if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) { - funder.DOI.get match { - case "10.13039/100010663" | - "10.13039/100010661" | - "10.13039/501100007601" | - "10.13039/501100000780" | - "10.13039/100010665" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) - case "10.13039/100011199" | - "10.13039/100004431" | - "10.13039/501100004963" | - "10.13039/501100000780" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward) - case "10.13039/501100000781" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward) - generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) - case "10.13039/100000001" => generateSimpleRelationFromAward(funder, "nsf_________", a => a) - case "10.13039/501100001665" => generateSimpleRelationFromAward(funder, "anr_________", a => a) - case "10.13039/501100002341" => generateSimpleRelationFromAward(funder, "aka_________", a => a) - case "10.13039/501100001602" => generateSimpleRelationFromAward(funder, "aka_________", a => a.replace("SFI", "")) - case "10.13039/501100000923" => generateSimpleRelationFromAward(funder, "arc_________", a => a) - case "10.13039/501100000038"=> val targetId = getProjectId("nserc_______" , "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId, targetId, "isProducedBy" ) - queue += generateRelation(targetId, sourceId, "produces" ) - case "10.13039/501100000155"=> val targetId = getProjectId("sshrc_______" , "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId,targetId, "isProducedBy" ) - queue += generateRelation(targetId,sourceId, "produces" ) - case "10.13039/501100000024"=> val targetId = getProjectId("cihr________" , "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId,targetId, "isProducedBy" ) - queue += generateRelation(targetId,sourceId, "produces" ) - case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a) - case "10.13039/501100003448" => generateSimpleRelationFromAward(funder, "gsrt________", extractECAward) - case "10.13039/501100010198" => generateSimpleRelationFromAward(funder, "sgov________", a=>a) - case "10.13039/501100004564" => generateSimpleRelationFromAward(funder, "mestd_______", extractECAward) - case "10.13039/501100003407" => generateSimpleRelationFromAward(funder, "miur________", a=>a) - val targetId = getProjectId("miur________" , "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId,targetId, "isProducedBy" ) - queue += generateRelation(targetId,sourceId, "produces" ) - case "10.13039/501100006588" | - "10.13039/501100004488" => generateSimpleRelationFromAward(funder, "irb_hr______", a=>a.replaceAll("Project No.", "").replaceAll("HRZZ-","") ) - case "10.13039/501100006769"=> generateSimpleRelationFromAward(funder, "rsf_________", a=>a) - case "10.13039/501100001711"=> generateSimpleRelationFromAward(funder, "snsf________", snsfRule) - case "10.13039/501100004410"=> generateSimpleRelationFromAward(funder, "tubitakf____", a =>a) - case "10.10.13039/100004440"=> generateSimpleRelationFromAward(funder, "wt__________", a =>a) - case "10.13039/100004440"=> val targetId = getProjectId("wt__________" , "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId,targetId, "isProducedBy" ) - queue += generateRelation(targetId,sourceId, "produces" ) + funders.foreach(funder => { + if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) { + funder.DOI.get match { + case "10.13039/100010663" | + "10.13039/100010661" | + "10.13039/501100007601" | + "10.13039/501100000780" | + "10.13039/100010665" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) + case "10.13039/100011199" | + "10.13039/100004431" | + "10.13039/501100004963" | + "10.13039/501100000780" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward) + case "10.13039/501100000781" => generateSimpleRelationFromAward(funder, "corda_______", extractECAward) + generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) + case "10.13039/100000001" => generateSimpleRelationFromAward(funder, "nsf_________", a => a) + case "10.13039/501100001665" => generateSimpleRelationFromAward(funder, "anr_________", a => a) + case "10.13039/501100002341" => generateSimpleRelationFromAward(funder, "aka_________", a => a) + case "10.13039/501100001602" => generateSimpleRelationFromAward(funder, "aka_________", a => a.replace("SFI", "")) + case "10.13039/501100000923" => generateSimpleRelationFromAward(funder, "arc_________", a => a) + case "10.13039/501100000038"=> val targetId = getProjectId("nserc_______" , "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, "isProducedBy" ) + queue += generateRelation(targetId, sourceId, "produces" ) + case "10.13039/501100000155"=> val targetId = getProjectId("sshrc_______" , "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId,targetId, "isProducedBy" ) + queue += generateRelation(targetId,sourceId, "produces" ) + case "10.13039/501100000024"=> val targetId = getProjectId("cihr________" , "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId,targetId, "isProducedBy" ) + queue += generateRelation(targetId,sourceId, "produces" ) + case "10.13039/501100002848" => generateSimpleRelationFromAward(funder, "conicytf____", a => a) + case "10.13039/501100003448" => generateSimpleRelationFromAward(funder, "gsrt________", extractECAward) + case "10.13039/501100010198" => generateSimpleRelationFromAward(funder, "sgov________", a=>a) + case "10.13039/501100004564" => generateSimpleRelationFromAward(funder, "mestd_______", extractECAward) + case "10.13039/501100003407" => generateSimpleRelationFromAward(funder, "miur________", a=>a) + val targetId = getProjectId("miur________" , "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId,targetId, "isProducedBy" ) + queue += generateRelation(targetId,sourceId, "produces" ) + case "10.13039/501100006588" | + "10.13039/501100004488" => generateSimpleRelationFromAward(funder, "irb_hr______", a=>a.replaceAll("Project No.", "").replaceAll("HRZZ-","") ) + case "10.13039/501100006769"=> generateSimpleRelationFromAward(funder, "rsf_________", a=>a) + case "10.13039/501100001711"=> generateSimpleRelationFromAward(funder, "snsf________", snsfRule) + case "10.13039/501100004410"=> generateSimpleRelationFromAward(funder, "tubitakf____", a =>a) + case "10.10.13039/100004440"=> generateSimpleRelationFromAward(funder, "wt__________", a =>a) + case "10.13039/100004440"=> val targetId = getProjectId("wt__________" , "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId,targetId, "isProducedBy" ) + queue += generateRelation(targetId,sourceId, "produces" ) - case _ => logger.debug("no match for "+funder.DOI.get ) + case _ => logger.debug("no match for "+funder.DOI.get ) + } + + + } else { + funder.name match { + case "European Union’s Horizon 2020 research and innovation program" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) + case "European Union's" => + generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) + generateSimpleRelationFromAward(funder, "corda_______", extractECAward) + case "The French National Research Agency (ANR)" | + "The French National Research Agency" => generateSimpleRelationFromAward(funder, "anr_________", a => a) + case "CONICYT, Programa de Formación de Capital Humano Avanzado" => generateSimpleRelationFromAward(funder, "conicytf____", extractECAward) + case "Wellcome Trust Masters Fellowship" => val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, "isProducedBy" ) + queue += generateRelation(targetId, sourceId, "produces" ) + case _ => logger.debug("no match for "+funder.name ) + + } } - - } else { - funder.name match { - case "European Union’s Horizon 2020 research and innovation program" => generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) - case "European Union's" => - generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) - generateSimpleRelationFromAward(funder, "corda_______", extractECAward) - case "The French National Research Agency (ANR)" | - "The French National Research Agency" => generateSimpleRelationFromAward(funder, "anr_________", a => a) - case "CONICYT, Programa de Formación de Capital Humano Avanzado" => generateSimpleRelationFromAward(funder, "conicytf____", extractECAward) - case "Wellcome Trust Masters Fellowship" => val targetId = getProjectId("wt__________", "1e5e62235d094afd01cd56e65112fc63") - queue += generateRelation(sourceId, targetId, "isProducedBy" ) - queue += generateRelation(targetId, sourceId, "produces" ) - case _ => logger.debug("no match for "+funder.name ) - - } } - - } - ) + ) queue.toList } @@ -465,4 +473,4 @@ case object Crossref2Oaf { null } -} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala index 4a39a2987..235305fb8 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefDataset.scala @@ -96,4 +96,4 @@ object CrossrefDataset { } -} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala index 0272cb1a6..0036459bf 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala @@ -52,4 +52,4 @@ object SparkMapDumpIntoOAF { } -} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala index 7bb4686cf..09b741b47 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/MagDataModel.scala @@ -1,6 +1,7 @@ package eu.dnetlib.doiboost.mag +import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication, StructuredProperty} import eu.dnetlib.doiboost.DoiBoostMappingUtil import org.json4s @@ -31,11 +32,11 @@ case class MagAffiliation(AffiliationId: Long, Rank: Int, NormalizedName: String case class MagPaperAuthorAffiliation(PaperId: Long, AuthorId: Long, AffiliationId: Option[Long], AuthorSequenceNumber: Int, OriginalAuthor: String, OriginalAffiliation: String) {} -case class MagAuthorAffiliation(author: MagAuthor, affiliation:String) +case class MagAuthorAffiliation(author: MagAuthor, affiliation:String, sequenceNumber:Int) case class MagPaperWithAuthorList(PaperId: Long, authors: List[MagAuthorAffiliation]) {} -case class MagPaperAuthorDenormalized(PaperId: Long, author: MagAuthor, affiliation:String) {} +case class MagPaperAuthorDenormalized(PaperId: Long, author: MagAuthor, affiliation:String, sequenceNumber:Int) {} case class MagPaperUrl(PaperId: Long, SourceType: Option[Int], SourceUrl: Option[String], LanguageCode: Option[String]) {} @@ -202,12 +203,12 @@ case object ConversionUtil { val authorsOAF = authors.authors.map { f: MagAuthorAffiliation => val a: eu.dnetlib.dhp.schema.oaf.Author = new eu.dnetlib.dhp.schema.oaf.Author - - a.setFullname(f.author.DisplayName.get) - + a.setRank(f.sequenceNumber) + if (f.author.DisplayName.isDefined) + a.setFullname(f.author.DisplayName.get) if(f.affiliation!= null) a.setAffiliation(List(asField(f.affiliation)).asJava) - a.setPid(List(createSP(s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", "URL", PID_TYPES)).asJava) + a.setPid(List(createSP(s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", "URL", ModelConstants.DNET_PID_TYPES)).asJava) a } pub.setAuthor(authorsOAF.asJava) @@ -274,7 +275,7 @@ case object ConversionUtil { a.setAffiliation(List(asField(f.affiliation)).asJava) - a.setPid(List(createSP(s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", "URL", PID_TYPES)).asJava) + a.setPid(List(createSP(s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", "URL", ModelConstants.DNET_PID_TYPES)).asJava) a @@ -305,12 +306,12 @@ case object ConversionUtil { for {(k: String, v: List[Int]) <- iid} { v.foreach(item => res(item) = k) } - (0 until idl).foreach(i => { - if (res(i) == null) - res(i) = "" - }) + (0 until idl).foreach(i => { + if (res(i) == null) + res(i) = "" + }) return res.mkString(" ") } "" } -} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala index 88fee72b7..de4e07655 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkImportMagIntoDataset.scala @@ -6,10 +6,10 @@ import org.apache.spark.SparkConf import org.apache.spark.sql.{SaveMode, SparkSession} import org.apache.spark.sql.types._ import org.slf4j.{Logger, LoggerFactory} -import org.apache.spark.sql.functions._ object SparkImportMagIntoDataset { val datatypedict = Map( + "bool" -> BooleanType, "int" -> IntegerType, "uint" -> IntegerType, "long" -> LongType, @@ -25,11 +25,10 @@ object SparkImportMagIntoDataset { "AuthorExtendedAttributes" -> Tuple2("mag/AuthorExtendedAttributes.txt", Seq("AuthorId:long", "AttributeType:int", "AttributeValue:string")), "Authors" -> Tuple2("mag/Authors.txt", Seq("AuthorId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "LastKnownAffiliationId:long?", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), "ConferenceInstances" -> Tuple2("mag/ConferenceInstances.txt", Seq("ConferenceInstanceId:long", "NormalizedName:string", "DisplayName:string", "ConferenceSeriesId:long", "Location:string", "OfficialUrl:string", "StartDate:DateTime?", "EndDate:DateTime?", "AbstractRegistrationDate:DateTime?", "SubmissionDeadlineDate:DateTime?", "NotificationDueDate:DateTime?", "FinalVersionDueDate:DateTime?", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "Latitude:float?", "Longitude:float?", "CreatedDate:DateTime")), - "ConferenceSeries" -> Tuple2("mag/ConferenceSeries.txt", Seq("ConferenceSeriesId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "PaperCount:long", "CitationCount:long", "CreatedDate:DateTime")), + "ConferenceSeries" -> Tuple2("mag/ConferenceSeries.txt", Seq("ConferenceSeriesId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), "EntityRelatedEntities" -> Tuple2("advanced/EntityRelatedEntities.txt", Seq("EntityId:long", "EntityType:string", "RelatedEntityId:long", "RelatedEntityType:string", "RelatedType:int", "Score:float")), "FieldOfStudyChildren" -> Tuple2("advanced/FieldOfStudyChildren.txt", Seq("FieldOfStudyId:long", "ChildFieldOfStudyId:long")), "FieldOfStudyExtendedAttributes" -> Tuple2("advanced/FieldOfStudyExtendedAttributes.txt", Seq("FieldOfStudyId:long", "AttributeType:int", "AttributeValue:string")), - // ['FieldOfStudyId:long', 'Rank:uint', 'NormalizedName:string', 'DisplayName:string', 'MainType:string', 'Level:int', 'PaperCount:long', 'PaperFamilyCount:long', 'CitationCount:long', 'CreatedDate:DateTime'] "FieldsOfStudy" -> Tuple2("advanced/FieldsOfStudy.txt", Seq("FieldOfStudyId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "MainType:string", "Level:int", "PaperCount:long", "PaperFamilyCount:long", "CitationCount:long", "CreatedDate:DateTime")), "Journals" -> Tuple2("mag/Journals.txt", Seq("JournalId:long", "Rank:uint", "NormalizedName:string", "DisplayName:string", "Issn:string", "Publisher:string", "Webpage:string", "PaperCount:long", "PaperFamilyCount:long" ,"CitationCount:long", "CreatedDate:DateTime")), "PaperAbstractsInvertedIndex" -> Tuple2("nlp/PaperAbstractsInvertedIndex.txt.*", Seq("PaperId:long", "IndexedAbstract:string")), @@ -37,6 +36,7 @@ object SparkImportMagIntoDataset { "PaperCitationContexts" -> Tuple2("nlp/PaperCitationContexts.txt", Seq("PaperId:long", "PaperReferenceId:long", "CitationContext:string")), "PaperExtendedAttributes" -> Tuple2("mag/PaperExtendedAttributes.txt", Seq("PaperId:long", "AttributeType:int", "AttributeValue:string")), "PaperFieldsOfStudy" -> Tuple2("advanced/PaperFieldsOfStudy.txt", Seq("PaperId:long", "FieldOfStudyId:long", "Score:float")), + "PaperMeSH" -> Tuple2("advanced/PaperMeSH.txt", Seq("PaperId:long", "DescriptorUI:string", "DescriptorName:string", "QualifierUI:string", "QualifierName:string", "IsMajorTopic:bool")), "PaperRecommendations" -> Tuple2("advanced/PaperRecommendations.txt", Seq("PaperId:long", "RecommendedPaperId:long", "Score:float")), "PaperReferences" -> Tuple2("mag/PaperReferences.txt", Seq("PaperId:long", "PaperReferenceId:long")), "PaperResources" -> Tuple2("mag/PaperResources.txt", Seq("PaperId:long", "ResourceType:int", "ResourceUrl:string", "SourceUrl:string", "RelationshipType:int")), @@ -91,4 +91,4 @@ object SparkImportMagIntoDataset { } -} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala index 780e65c1e..0dcef4176 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/mag/SparkProcessMAG.scala @@ -58,16 +58,16 @@ object SparkProcessMAG { val paperAuthorAffiliation = spark.read.load(s"$sourcePath/PaperAuthorAffiliations").as[MagPaperAuthorAffiliation] paperAuthorAffiliation.joinWith(authors, paperAuthorAffiliation("AuthorId").equalTo(authors("AuthorId"))) - .map { case (a: MagPaperAuthorAffiliation, b: MagAuthor) => (a.AffiliationId, MagPaperAuthorDenormalized(a.PaperId, b, null)) } + .map { case (a: MagPaperAuthorAffiliation, b: MagAuthor) => (a.AffiliationId, MagPaperAuthorDenormalized(a.PaperId, b, null, a.AuthorSequenceNumber)) } .joinWith(affiliation, affiliation("AffiliationId").equalTo(col("_1")), "left") .map(s => { val mpa = s._1._2 val af = s._2 if (af != null) { - MagPaperAuthorDenormalized(mpa.PaperId, mpa.author, af.DisplayName) + MagPaperAuthorDenormalized(mpa.PaperId, mpa.author, af.DisplayName, mpa.sequenceNumber) } else mpa - }).groupBy("PaperId").agg(collect_list(struct($"author", $"affiliation")).as("authors")) + }).groupBy("PaperId").agg(collect_list(struct($"author", $"affiliation", $"sequenceNumber")).as("authors")) .write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_1_paper_authors") logger.info("Phase 4) create First Version of publication Entity with Paper Journal and Authors") @@ -86,7 +86,7 @@ object SparkProcessMAG { var magPubs: Dataset[(String, Publication)] = spark.read.load(s"$workingPath/merge_step_2").as[Publication] - .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] + .map(p => (ConversionUtil.extractMagIdentifier(p.getOriginalId.asScala), p)).as[(String, Publication)] val conference = spark.read.load(s"$sourcePath/ConferenceInstances") @@ -115,10 +115,9 @@ object SparkProcessMAG { .save(s"$workingPath/merge_step_3") -// logger.info("Phase 6) Enrich Publication with description") -// val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract] -// pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract") - + // logger.info("Phase 6) Enrich Publication with description") + // val pa = spark.read.load(s"${parser.get("sourcePath")}/PaperAbstractsInvertedIndex").as[MagPaperAbstract] + // pa.map(ConversionUtil.transformPaperAbstract).write.mode(SaveMode.Overwrite).save(s"${parser.get("targetPath")}/PaperAbstract") val paperAbstract = spark.read.load((s"$workingPath/PaperAbstract")).as[MagPaperAbstract] @@ -127,7 +126,7 @@ object SparkProcessMAG { magPubs.joinWith(paperAbstract, col("_1").equalTo(paperAbstract("PaperId")), "left") .map(item => ConversionUtil.updatePubsWithDescription(item) - ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4") + ).write.mode(SaveMode.Overwrite).save(s"$workingPath/merge_step_4") logger.info("Phase 7) Enrich Publication with FieldOfStudy") @@ -153,9 +152,9 @@ object SparkProcessMAG { val s:RDD[Publication] = spark.read.load(s"$workingPath/mag_publication").as[Publication] .map(p=>Tuple2(p.getId, p)).rdd.reduceByKey((a:Publication, b:Publication) => ConversionUtil.mergePublication(a,b)) - .map(_._2) + .map(_._2) spark.createDataset(s).as[Publication].write.mode(SaveMode.Overwrite).save(s"$targetPath/magPublication") } -} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala index e9773dbbb..e4b808085 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ORCIDToOAF.scala @@ -1,17 +1,25 @@ package eu.dnetlib.doiboost.orcid import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Publication} -import eu.dnetlib.dhp.schema.orcid.OrcidDOI +import eu.dnetlib.dhp.schema.orcid.{AuthorData, OrcidDOI} import eu.dnetlib.doiboost.DoiBoostMappingUtil -import eu.dnetlib.doiboost.DoiBoostMappingUtil.{ORCID, PID_TYPES, createSP, generateDataInfo, generateIdentifier} +import eu.dnetlib.doiboost.DoiBoostMappingUtil.{createSP, generateDataInfo} import org.apache.commons.lang.StringUtils import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ +import org.json4s +import org.json4s.DefaultFormats +import org.json4s.JsonAST._ +import org.json4s.jackson.JsonMethods._ -case class ORCIDItem(oid:String,name:String,surname:String,creditName:String,errorCode:String){} +case class ORCIDItem(doi:String, authors:List[OrcidAuthor]){} +case class OrcidAuthor(oid:String, name:Option[String], surname:Option[String], creditName:Option[String], otherNames:Option[List[String]], errorCode:Option[String]){} +case class OrcidWork(oid:String, doi:String) + @@ -44,17 +52,65 @@ object ORCIDToOAF { } - def convertTOOAF(input:OrcidDOI) :Publication = { - val doi = input.getDoi + def strValid(s:Option[String]) : Boolean = { + s.isDefined && s.get.nonEmpty + } + + def authorValid(author:OrcidAuthor): Boolean ={ + if (strValid(author.name) && strValid(author.surname)) { + return true + } + if (strValid(author.surname)) { + return true + } + if (strValid(author.creditName)) { + return true + + } + false + } + + + def extractDOIWorks(input:String): List[OrcidWork] = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: json4s.JValue = parse(input) + + val oid = (json \ "workDetail" \"oid").extractOrElse[String](null) + if (oid == null) + return List() + val doi:List[(String, String)] = for { + JObject(extIds) <- json \ "workDetail" \"extIds" + JField("type", JString(typeValue)) <- extIds + JField("value", JString(value)) <- extIds + if "doi".equalsIgnoreCase(typeValue) + } yield (typeValue, value) + if (doi.nonEmpty) { + return doi.map(l =>OrcidWork(oid, l._2)) + } + List() + } + + def convertORCIDAuthor(input:String): OrcidAuthor = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: json4s.JValue = parse(input) + + (json \"authorData" ).extractOrElse[OrcidAuthor](null) + } + + + def convertTOOAF(input:ORCIDItem) :Publication = { + val doi = input.doi val pub:Publication = new Publication - pub.setPid(List(createSP(doi.toLowerCase, "doi", PID_TYPES)).asJava) + pub.setPid(List(createSP(doi.toLowerCase, "doi", ModelConstants.DNET_PID_TYPES)).asJava) pub.setDataInfo(generateDataInfo()) - pub.setId(generateIdentifier(pub, doi.toLowerCase)) + + pub.setId(DoiBoostMappingUtil.generateIdentifier(pub, doi.toLowerCase)) + try{ - val l:List[Author]= input.getAuthors.asScala.map(a=> { - generateAuthor(a.getName, a.getSurname, a.getCreditName, a.getOid) - })(collection.breakOut) + val l:List[Author]= input.authors.map(a=> { + generateAuthor(a) + })(collection.breakOut) pub.setAuthor(l.asJava) pub.setCollectedfrom(List(DoiBoostMappingUtil.createORIDCollectedFrom()).asJava) @@ -74,19 +130,23 @@ object ORCIDToOAF { di } - def generateAuthor(given: String, family: String, fullName:String, orcid: String): Author = { + def generateAuthor(o : OrcidAuthor): Author = { val a = new Author - a.setName(given) - a.setSurname(family) - if (fullName!= null && fullName.nonEmpty) - a.setFullname(fullName) - else - a.setFullname(s"$given $family") - if (StringUtils.isNotBlank(orcid)) - a.setPid(List(createSP(orcid, ORCID, PID_TYPES, generateOricPIDDatainfo())).asJava) + if (strValid(o.name)) { + a.setName(o.name.get.capitalize) + } + if (strValid(o.surname)) { + a.setSurname(o.surname.get.capitalize) + } + if(strValid(o.name) && strValid(o.surname)) + a.setFullname(s"${o.name.get.capitalize} ${o.surname.get.capitalize}") + else if (strValid(o.creditName)) + a.setFullname(o.creditName.get) + if (StringUtils.isNotBlank(o.oid)) + a.setPid(List(createSP(o.oid, ModelConstants.ORCID, ModelConstants.DNET_PID_TYPES, generateOricPIDDatainfo())).asJava) a } -} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala index f1c7c58b4..025d68b90 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala @@ -1,72 +1,49 @@ package eu.dnetlib.doiboost.orcid -import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.merge.AuthorMerger import eu.dnetlib.dhp.schema.oaf.Publication -import eu.dnetlib.dhp.schema.orcid.OrcidDOI -import eu.dnetlib.doiboost.mag.ConversionUtil import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD -import org.apache.spark.sql.expressions.Aggregator -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} object SparkConvertORCIDToOAF { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - def getPublicationAggregator(): Aggregator[(String, Publication), Publication, Publication] = new Aggregator[(String, Publication), Publication, Publication]{ + def run(spark:SparkSession,sourcePath:String,workingPath:String, targetPath:String):Unit = { + import spark.implicits._ + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - override def zero: Publication = new Publication() + val inputRDD:RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s!= null).filter(s => ORCIDToOAF.authorValid(s)) - override def reduce(b: Publication, a: (String, Publication)): Publication = { - b.mergeFrom(a._2) - b.setAuthor(AuthorMerger.mergeAuthor(a._2.getAuthor, b.getAuthor)) - if (b.getId == null) - b.setId(a._2.getId) - b - } + spark.createDataset(inputRDD).as[OrcidAuthor].write.mode(SaveMode.Overwrite).save(s"$workingPath/author") + val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s!= null) - override def merge(wx: Publication, wy: Publication): Publication = { - wx.mergeFrom(wy) - wx.setAuthor(AuthorMerger.mergeAuthor(wy.getAuthor, wx.getAuthor)) - if(wx.getId == null && wy.getId.nonEmpty) - wx.setId(wy.getId) - wx - } - override def finish(reduction: Publication): Publication = reduction + spark.createDataset(res).as[OrcidWork].write.mode(SaveMode.Overwrite).save(s"$workingPath/works") - override def bufferEncoder: Encoder[Publication] = - Encoders.kryo(classOf[Publication]) + val authors :Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] - override def outputEncoder: Encoder[Publication] = - Encoders.kryo(classOf[Publication]) + val works :Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] + + works.joinWith(authors, authors("oid").equalTo(works("oid"))) + .map(i =>{ + val doi = i._1.doi + val author = i._2 + (doi, author) + }).groupBy(col("_1").alias("doi")) + .agg(collect_list(col("_2")).alias("authors")) + .write.mode(SaveMode.Overwrite).save(s"$workingPath/orcidworksWithAuthor") + + val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] + + logger.info("Converting ORCID to OAF") + dataset.map(o => ORCIDToOAF.convertTOOAF(o)).write.mode(SaveMode.Overwrite).save(targetPath) } -def run(spark:SparkSession,sourcePath:String, targetPath:String):Unit = { - implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - implicit val mapOrcid: Encoder[OrcidDOI] = Encoders.kryo[OrcidDOI] - implicit val tupleForJoinEncoder: Encoder[(String, Publication)] = Encoders.tuple(Encoders.STRING, mapEncoderPubs) - - val mapper = new ObjectMapper() - mapper.getDeserializationConfig.withFeatures(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES) - - val dataset:Dataset[OrcidDOI] = spark.createDataset(spark.sparkContext.textFile(sourcePath).map(s => mapper.readValue(s,classOf[OrcidDOI]))) - - logger.info("Converting ORCID to OAF") - dataset.map(o => ORCIDToOAF.convertTOOAF(o)).filter(p=>p!=null) - .map(d => (d.getId, d)) - .groupByKey(_._1)(Encoders.STRING) - .agg(getPublicationAggregator().toColumn) - .map(p => p._2) - .write.mode(SaveMode.Overwrite).save(targetPath) -} - def main(args: Array[String]): Unit = { - - val conf: SparkConf = new SparkConf() val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json"))) parser.parseArgument(args) @@ -78,11 +55,11 @@ def run(spark:SparkSession,sourcePath:String, targetPath:String):Unit = { .master(parser.get("master")).getOrCreate() - val sourcePath = parser.get("sourcePath") + val workingPath = parser.get("workingPath") val targetPath = parser.get("targetPath") - run(spark, sourcePath, targetPath) + run(spark, sourcePath, workingPath, targetPath) } -} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala index a72e4b0d6..9ac6a0838 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala @@ -40,4 +40,4 @@ object SparkMapUnpayWallToOAF { d.write.mode(SaveMode.Overwrite).save(targetPath) } -} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml index 3f5805b62..77aa595f5 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml @@ -54,6 +54,11 @@ + + MAGDumpPath + the MAG dump working path + + inputPathMAG the MAG working path @@ -69,6 +74,11 @@ inputPathOrcid + the ORCID input path + + + + workingPathOrcid the ORCID working path @@ -121,24 +131,27 @@ - - yarn-cluster - cluster - GenerateCrossrefDataset - eu.dnetlib.doiboost.crossref.CrossrefDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --workingPath${inputPathCrossref} - --masteryarn-cluster - - - + + yarn-cluster + cluster + GenerateCrossrefDataset + eu.dnetlib.doiboost.crossref.CrossrefDataset + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --workingPath${inputPathCrossref} + --masteryarn-cluster + + + @@ -147,6 +160,43 @@ + + + + + + + + + + + + + + + + + + + yarn-cluster + cluster + Convert Mag to Dataset + eu.dnetlib.doiboost.mag.SparkImportMagIntoDataset + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${MAGDumpPath} + --targetPath${inputPathMAG}/dataset + --masteryarn-cluster + @@ -164,46 +214,15 @@ --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${inputPathCrossref}/crossref_ds --targetPath${workingPath} --masteryarn-cluster - - - - - - - - - - - - - - - - - - - - yarn-cluster - cluster - Convert Mag to Dataset - eu.dnetlib.doiboost.mag.SparkImportMagIntoDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - ${sparkExtraOPT} - - --sourcePath${inputPathMAG}/input - --targetPath${inputPathMAG}/dataset - --masteryarn-cluster - @@ -216,11 +235,14 @@ eu.dnetlib.doiboost.mag.SparkProcessMAG dhp-doiboost-${projectVersion}.jar - --executor-memory=${sparkExecutorMemory} + --executor-memory=${sparkExecutorIntersectionMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${inputPathMAG}/dataset --workingPath${inputPathMAG}/process @@ -245,10 +267,14 @@ --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${inputPathUnpayWall}/uw_extracted - --targetPath${workingPath} + --targetPath${workingPath}/uwPublication --masteryarn-cluster @@ -268,10 +294,14 @@ --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${inputPathOrcid} - --targetPath${workingPath} + --workingPath${workingPathOrcid} + --targetPath${workingPath}/orcidPublication --masteryarn-cluster @@ -291,11 +321,15 @@ --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --hostedByMapPath${hostedByMapPath} - --affiliationPath${inputPathMAG}/process/Affiliations - --paperAffiliationPath${inputPathMAG}/process/PaperAuthorAffiliations + --affiliationPath${inputPathMAG}/dataset/Affiliations + --paperAffiliationPath${inputPathMAG}/dataset/PaperAuthorAffiliations --workingPath${workingPath} --masteryarn-cluster @@ -316,7 +350,10 @@ --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --dbPublicationPath${workingPath}/doiBoostPublicationFiltered --dbDatasetPath${workingPath}/crossrefDataset From 50fc128ff7c5750f0348e0898ff815f084d609c0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 5 May 2021 11:23:46 +0200 Subject: [PATCH 07/16] alternative way to set timeouts for the ISLookup client --- .../dhp/utils/ISLookupClientFactory.java | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java index 9552eb2b3..9af390f9c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java @@ -1,11 +1,11 @@ package eu.dnetlib.dhp.utils; -import java.util.Map; - -import javax.xml.ws.BindingProvider; - +import org.apache.cxf.endpoint.Client; +import org.apache.cxf.frontend.ClientProxy; import org.apache.cxf.jaxws.JaxWsProxyFactoryBean; +import org.apache.cxf.transport.http.HTTPConduit; +import org.apache.cxf.transports.http.configuration.HTTPClientPolicy; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,20 +31,23 @@ public class ISLookupClientFactory { final T service = (T) jaxWsProxyFactory.create(); - if (service instanceof BindingProvider) { + Client client = ClientProxy.getClient(service); + if (client != null) { + HTTPConduit conduit = (HTTPConduit) client.getConduit(); + HTTPClientPolicy policy = new HTTPClientPolicy(); + log .info( - "setting timeouts for {} to requestTimeout: {}, connectTimeout: {}", - BindingProvider.class.getName(), requestTimeout, connectTimeout); + String + .format( + "setting connectTimeout to %s, requestTimeout to %s for service %s", + connectTimeout, + requestTimeout, + clazz.getCanonicalName())); - Map requestContext = ((BindingProvider) service).getRequestContext(); - - requestContext.put("com.sun.xml.internal.ws.request.timeout", requestTimeout); - requestContext.put("com.sun.xml.internal.ws.connect.timeout", connectTimeout); - requestContext.put("com.sun.xml.ws.request.timeout", requestTimeout); - requestContext.put("com.sun.xml.ws.connect.timeout", connectTimeout); - requestContext.put("javax.xml.ws.client.receiveTimeout", requestTimeout); - requestContext.put("javax.xml.ws.client.connectionTimeout", connectTimeout); + policy.setConnectionTimeout(connectTimeout); + policy.setReceiveTimeout(requestTimeout); + conduit.setClient(policy); } return service; From 8c96a82a03c4a9b01944dbc34175c6a07c481e69 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 5 May 2021 15:30:06 +0200 Subject: [PATCH 08/16] fixed mapping applied to ODF records. Added unit test to verify the mapping for OpenTrials --- .../raw/AbstractMdRecordToOafMapper.java | 26 ++-- .../dhp/oa/graph/raw/OdfToOafMapper.java | 15 ++- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 120 ++++++++++++++++-- .../dhp/oa/graph/raw/odf_opentrial.xml | 75 +++++++++++ 4 files changed, 206 insertions(+), 30 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_opentrial.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index cccf15398..aa9e4a11f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -24,13 +24,7 @@ import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.oaiIProvenance; import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; +import java.util.*; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; @@ -38,6 +32,9 @@ import org.dom4j.DocumentFactory; import org.dom4j.DocumentHelper; import org.dom4j.Node; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.common.LicenseComparator; import eu.dnetlib.dhp.schema.common.ModelConstants; @@ -330,7 +327,7 @@ public abstract class AbstractMdRecordToOafMapper { r.setDataInfo(info); r.setLastupdatetimestamp(lastUpdateTimestamp); r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false)); - r.setOriginalId(Arrays.asList(findOriginalId(doc))); + r.setOriginalId(findOriginalId(doc)); r.setCollectedfrom(Arrays.asList(collectedFrom)); r.setPid(prepareResultPids(doc, info)); r.setDateofcollection(doc.valueOf("//dr:dateOfCollection|//dri:dateOfCollection")); @@ -493,16 +490,23 @@ public abstract class AbstractMdRecordToOafMapper { return null; } - private String findOriginalId(final Document doc) { + private List findOriginalId(final Document doc) { final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); if (n != null) { final String id = n.valueOf("./*[local-name()='identifier']"); if (StringUtils.isNotBlank(id)) { - return id; + return Lists.newArrayList(id); } } - return doc.valueOf("//*[local-name()='header']/*[local-name()='identifier']"); + List idList = doc + .selectNodes( + "normalize-space(//*[local-name()='header']/*[local-name()='identifier' or local-name()='recordIdentifier']/text())"); + Set originalIds = Sets.newHashSet(idList); + if (originalIds.isEmpty()) { + throw new IllegalStateException("missing originalID on " + doc.asXML()); + } + return Lists.newArrayList(originalIds); } protected Qualifier prepareQualifier(final Node node, final String xpath, final String schemeId) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index c2c2cb645..6e0161be6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -119,7 +119,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); - instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); + final String distributionlocation = doc.valueOf("//oaf:distributionlocation"); + instance.setDistributionlocation(StringUtils.isNotBlank(distributionlocation) ? distributionlocation : null); instance .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES)); instance.setLicense(field(doc.valueOf("//oaf:license"), info)); @@ -200,12 +201,12 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List> prepareFormats(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//*[local-name()=':format']", info); + return prepareListFields(doc, "//*[local-name()='format']", info); } @Override protected Field preparePublisher(final Document doc, final DataInfo info) { - return prepareField(doc, "//*[local-name()=':publisher']", info); + return prepareField(doc, "//*[local-name()='publisher']", info); } @Override @@ -220,7 +221,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Qualifier prepareLanguages(final Document doc) { - return prepareQualifier(doc, "//*[local-name()=':language']", DNET_LANGUAGES); + return prepareQualifier(doc, "//*[local-name()='language']", DNET_LANGUAGES); } @Override @@ -287,9 +288,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { for (final Object o : doc.selectNodes("//*[local-name()='geoLocation']")) { final GeoLocation loc = new GeoLocation(); - loc.setBox(((Node) o).valueOf("./*[local-name()=':geoLocationBox']")); - loc.setPlace(((Node) o).valueOf("./*[local-name()=':geoLocationPlace']")); - loc.setPoint(((Node) o).valueOf("./*[local-name()=':geoLocationPoint']")); + loc.setBox(((Node) o).valueOf("./*[local-name()='geoLocationBox']")); + loc.setPlace(((Node) o).valueOf("./*[local-name()='geoLocationPlace']")); + loc.setPoint(((Node) o).valueOf("./*[local-name()='geoLocationPoint']")); res.add(loc); } return res; diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index ab956a378..3d90794a9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,10 +1,7 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.lenient; import java.io.IOException; @@ -25,14 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Software; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) @@ -256,6 +246,112 @@ public class MappersTest { assertEquals(r2.getValidationDate(), "2020-01-01"); } + @Test + void testOpentrial() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_opentrial.xml")); + + final List list = new OdfToOafMapper(vocs, false).processMdRecord(xml); + + assertEquals(1, list.size()); + assertTrue(list.get(0) instanceof Dataset); + final Dataset d = (Dataset) list.get(0); + + assertNotNull(d.getDateofcollection()); + assertEquals("2019-03-27T15:15:22.22Z", d.getDateofcollection()); + + assertNotNull(d.getDateoftransformation()); + assertEquals("2019-04-17T16:04:20.586Z", d.getDateoftransformation()); + + assertNotNull(d.getDataInfo()); + assertFalse(d.getDataInfo().getInvisible()); + assertFalse(d.getDataInfo().getDeletedbyinference()); + assertEquals("0.9", d.getDataInfo().getTrust()); + + assertEquals("", d.getDataInfo().getInferenceprovenance()); + + assertEquals("sysimport:crosswalk:datasetarchive", d.getDataInfo().getProvenanceaction().getClassid()); + assertEquals("sysimport:crosswalk:datasetarchive", d.getDataInfo().getProvenanceaction().getClassname()); + assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemeid()); + assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemename()); + + assertValidId(d.getId()); + assertTrue(d.getOriginalId().size() == 1); + assertEquals("feabb67c-1fd1-423b-aec6-606d04ce53c6", d.getOriginalId().get(0)); + assertValidId(d.getCollectedfrom().get(0).getKey()); + + assertNotNull(d.getTitle()); + assertEquals(1, d.getTitle().size()); + assertEquals( + "Validation of the Goodstrength System for Assessment of Abdominal Wall Strength in Patients With Incisional Hernia", + d.getTitle().get(0).getValue()); + + assertNotNull(d.getDescription()); + assertEquals(1, d.getDescription().size()); + assertTrue(StringUtils.isNotBlank(d.getDescription().get(0).getValue())); + + assertTrue(d.getAuthor().size() == 1); + assertEquals("Jensen, Kristian K", d.getAuthor().get(0).getFullname()); + assertEquals("Kristian K.", d.getAuthor().get(0).getName()); + assertEquals("Jensen", d.getAuthor().get(0).getSurname()); + + assertNotNull(d.getAuthor().get(0).getPid()); + assertTrue(d.getAuthor().get(0).getPid().isEmpty()); + + assertNotNull(d.getPid()); + assertEquals(1, d.getPid().size()); + assertEquals("NCT02321059", d.getPid().get(0).getValue()); + assertEquals("nct", d.getPid().get(0).getQualifier().getClassid()); + assertEquals("ClinicalTrials.gov Identifier", d.getPid().get(0).getQualifier().getClassname()); + assertEquals(ModelConstants.DNET_PID_TYPES, d.getPid().get(0).getQualifier().getSchemeid()); + assertEquals(ModelConstants.DNET_PID_TYPES, d.getPid().get(0).getQualifier().getSchemename()); + + assertNotNull(d.getPublisher()); + assertEquals("nct", d.getPublisher().getValue()); + + assertTrue(d.getSubject().isEmpty()); + assertTrue(d.getContext().isEmpty()); + + assertNotNull(d.getInstance()); + assertTrue(d.getInstance().size() == 1); + + Instance i = d.getInstance().get(0); + + assertNotNull(i.getAccessright()); + assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright().getSchemeid()); + assertEquals(ModelConstants.DNET_ACCESS_MODES, i.getAccessright().getSchemename()); + assertEquals("OPEN", i.getAccessright().getClassid()); + assertEquals("Open Access", i.getAccessright().getClassname()); + + assertNotNull(i.getCollectedfrom()); + assertEquals("10|openaire____::b292fc2d7de505f78e3cae1b06ea8548", i.getCollectedfrom().getKey()); + assertEquals("OpenTrials", i.getCollectedfrom().getValue()); + + assertNotNull(i.getHostedby()); + assertEquals("10|openaire____::b292fc2d7de505f78e3cae1b06ea8548", i.getHostedby().getKey()); + assertEquals("OpenTrials", i.getHostedby().getValue()); + + assertNotNull(i.getInstancetype()); + assertEquals("0037", i.getInstancetype().getClassid()); + assertEquals("Clinical Trial", i.getInstancetype().getClassname()); + assertEquals(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getSchemeid()); + assertEquals(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getSchemename()); + + assertNull(i.getLicense()); + assertNotNull(i.getDateofacceptance()); + assertEquals("2014-11-11", i.getDateofacceptance().getValue()); + + assertNull(i.getDistributionlocation()); + assertNull(i.getProcessingchargeamount()); + assertNull(i.getProcessingchargecurrency()); + + assertNotNull(i.getUrl()); + assertEquals(2, i.getUrl().size()); + assertTrue(i.getUrl().contains("http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059")); + assertTrue(i.getUrl().contains("https://clinicaltrials.gov/ct2/show/NCT02321059")); + + assertEquals("UNKNOWN", i.getRefereed().getClassid()); + } + @Test void testSoftware() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_software.xml")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_opentrial.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_opentrial.xml new file mode 100644 index 000000000..97e966385 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_opentrial.xml @@ -0,0 +1,75 @@ + + + + opentrials__::0000bf8e63d3d7e6b88421eabafae3f6 + feabb67c-1fd1-423b-aec6-606d04ce53c6 + 2019-03-27T15:15:22.22Z + opentrials__ + 2019-04-17T16:04:20.586Z + + + + https://clinicaltrials.gov/ct2/show/NCT02321059 + + http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059 + NCT02321059 + + + + Jensen, Kristian K + + + + Validation of the Goodstrength System for Assessment of Abdominal Wall Strength in Patients With Incisional Hernia + + nct + + Denmark + + 0037 + + Patients with an incisional hernia in the midline and controls with an intact abdominal wall are examined twice with one week apart, in order to establish the test-retest reliability and internal and external validity of the Goodstrength trunk dynamometer. + + + OPEN + 0037 + 2014-11-11 + + + + + false + false + 0.9 + + + + + + + + + file:///var/lib/dnet/data/opentrials/opentrials.csv + + + + + + + false + false + 0.9 + + + + + \ No newline at end of file From 7dc824fc237ceaebe3d310a5057228ab114203dd Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 7 May 2021 12:53:50 +0200 Subject: [PATCH 09/16] imported changes in stable_id into master --- .../orcid/SparkConvertORCIDToOAF.scala | 27 +- .../doiboost/orcid/SparkPreprocessORCID.scala | 56 ++++ .../doiboost/convert_map_to_oaf_params.json | 6 +- .../doiboost/crossref/oozie_app/workflow.xml | 101 ------- .../intersection/oozie_app/config-default.xml | 38 --- .../intersection/oozie_app/workflow.xml | 96 ------- .../dhp/doiboost/mag/oozie_app/workflow.xml | 92 ------ .../oozie_app/config-default.xml | 0 .../preprocess/oozie_app/workflow.xml | 194 +++++++++++++ .../dhp/doiboost/preprocess_orcid_params.json | 6 + .../oozie_app/config-default.xml | 4 +- .../doiboost/process/oozie_app/workflow.xml | 262 ++++++++++++++++++ .../unpaywall/oozie_app/config-default.xml | 38 --- .../doiboost/unpaywall/oozie_app/workflow.xml | 55 ---- 14 files changed, 526 insertions(+), 449 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml rename dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/{crossref => preprocess}/oozie_app/config-default.xml (100%) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json rename dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/{mag => process}/oozie_app/config-default.xml (91%) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala index 025d68b90..6109322ae 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala @@ -12,31 +12,10 @@ import org.slf4j.{Logger, LoggerFactory} object SparkConvertORCIDToOAF { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - def run(spark:SparkSession,sourcePath:String,workingPath:String, targetPath:String):Unit = { + def run(spark:SparkSession,workingPath:String, targetPath:String):Unit = { import spark.implicits._ implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - val inputRDD:RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s!= null).filter(s => ORCIDToOAF.authorValid(s)) - - spark.createDataset(inputRDD).as[OrcidAuthor].write.mode(SaveMode.Overwrite).save(s"$workingPath/author") - - val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s!= null) - - spark.createDataset(res).as[OrcidWork].write.mode(SaveMode.Overwrite).save(s"$workingPath/works") - - val authors :Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] - - val works :Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] - - works.joinWith(authors, authors("oid").equalTo(works("oid"))) - .map(i =>{ - val doi = i._1.doi - val author = i._2 - (doi, author) - }).groupBy(col("_1").alias("doi")) - .agg(collect_list(col("_2")).alias("authors")) - .write.mode(SaveMode.Overwrite).save(s"$workingPath/orcidworksWithAuthor") - val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] logger.info("Converting ORCID to OAF") @@ -55,10 +34,10 @@ object SparkConvertORCIDToOAF { .master(parser.get("master")).getOrCreate() - val sourcePath = parser.get("sourcePath") + val workingPath = parser.get("workingPath") val targetPath = parser.get("targetPath") - run(spark, sourcePath, workingPath, targetPath) + run(spark, workingPath, targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala new file mode 100644 index 000000000..71b2231d7 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala @@ -0,0 +1,56 @@ +package eu.dnetlib.doiboost.orcid + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.Publication +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql.functions.{col, collect_list} +import org.slf4j.{Logger, LoggerFactory} + +class SparkPreprocessORCID { + val logger: Logger = LoggerFactory.getLogger(getClass) + + def run(spark:SparkSession,sourcePath:String,workingPath:String):Unit = { + import spark.implicits._ + + val inputRDD:RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s!= null).filter(s => ORCIDToOAF.authorValid(s)) + + spark.createDataset(inputRDD).as[OrcidAuthor].write.mode(SaveMode.Overwrite).save(s"$workingPath/author") + + val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s!= null) + + spark.createDataset(res).as[OrcidWork].write.mode(SaveMode.Overwrite).save(s"$workingPath/works") + + val authors :Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] + + val works :Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] + + works.joinWith(authors, authors("oid").equalTo(works("oid"))) + .map(i =>{ + val doi = i._1.doi + val author = i._2 + (doi, author) + }).groupBy(col("_1").alias("doi")) + .agg(collect_list(col("_2")).alias("authors")) + .write.mode(SaveMode.Overwrite).save(s"$workingPath/orcidworksWithAuthor") + } + + def main(args: Array[String]): Unit = { + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + val sourcePath = parser.get("sourcePath") + val workingPath = parser.get("workingPath") + run(spark, sourcePath, workingPath) + } + +} diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json index 312bd0751..c97231fdd 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json @@ -1,6 +1,6 @@ [ - {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true}, - {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the working dir path", "paramRequired": true}, - {"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working dir path", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the target dir path", "paramRequired": true}, + {"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml deleted file mode 100644 index 63c2e9ef2..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml +++ /dev/null @@ -1,101 +0,0 @@ - - - - workingPath - the working dir base path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - timestamp - Timestamp for incremental Harvesting - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.crossref.CrossrefImporter - -t${workingPath}/input/crossref/index_update - -n${nameNode} - -ts${timestamp} - - - - - - - - yarn-cluster - cluster - ExtractCrossrefToOAF - eu.dnetlib.doiboost.crossref.CrossrefDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --workingPath/data/doiboost/input/crossref - --masteryarn-cluster - - - - - - - - - - - - - - - - - - yarn-cluster - cluster - ConvertCrossrefToOAF - eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --sourcePath${workingPath}/input/crossref/crossref_ds - --targetPath${workingPath}/process/ - --masteryarn-cluster - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/config-default.xml deleted file mode 100644 index cf617a84c..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/config-default.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml deleted file mode 100644 index dcde62c9d..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml +++ /dev/null @@ -1,96 +0,0 @@ - - - - hostedByMapPath - the Hosted By Map Path - - - affiliationPath - the Affliation Path - - - paperAffiliationPath - the paperAffiliation Path - - - workingDirPath - the Working Path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - yarn-cluster - cluster - Create DOIBoost Infospace - eu.dnetlib.doiboost.SparkGenerateDoiBoost - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --hostedByMapPath${hostedByMapPath} - --affiliationPath${affiliationPath} - --paperAffiliationPath${paperAffiliationPath} - --workingDirPath${workingDirPath} - --masteryarn-cluster - - - - - - - - - yarn-cluster - cluster - Generate DOIBoost ActionSet - eu.dnetlib.doiboost.SparkGenerateDOIBoostActionSet - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --dbPublicationPath${workingDirPath}/doiBoostPublicationFiltered - --dbDatasetPath${workingDirPath}/crossrefDataset - --crossRefRelation${workingDirPath}/crossrefRelation - --dbaffiliationRelationPath${workingDirPath}/doiBoostPublicationAffiliation - -do${workingDirPath}/doiBoostOrganization - --targetPath${workingDirPath}/actionDataSet - --masteryarn-cluster - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml deleted file mode 100644 index 9d19dddc7..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml +++ /dev/null @@ -1,92 +0,0 @@ - - - - sourcePath - the working dir base path - - - targetPath - the working dir base path - - - workingPath - the working dir base path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - yarn-cluster - cluster - Convert Mag to Dataset - eu.dnetlib.doiboost.mag.SparkImportMagIntoDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - ${sparkExtraOPT} - - --sourcePath${sourcePath} - --targetPath${workingPath} - --masteryarn-cluster - - - - - - - - - - yarn-cluster - cluster - Convert Mag to OAF Dataset - eu.dnetlib.doiboost.mag.SparkPreProcessMAG - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --sourcePath${workingPath} - --workingPath${workingPath}/process - --targetPath${targetPath} - --masteryarn-cluster - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/config-default.xml rename to dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml new file mode 100644 index 000000000..09feecf3a --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -0,0 +1,194 @@ + + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorIntersectionMemory + memory for individual executor + + + + sparkExecutorCores + number of cores used by single executor + + + + inputPathCrossref + the Crossref input path + + + crossrefTimestamp + Timestamp for the Crossref incremental Harvesting + + + esServer + elasticsearch server url for the Crossref Harvesting + + + esIndex + elasticsearch index name for the Crossref Harvesting + + + + MAGDumpPath + the MAG dump working path + + + + inputPathMAG + the MAG working path + + + + inputPathOrcid + the ORCID input path + + + + workingPathOrcid + the ORCID working path + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + eu.dnetlib.doiboost.crossref.CrossrefImporter + --targetPath${inputPathCrossref}/index_update + --namenode${nameNode} + --esServer${esServer} + --esIndex${esIndex} + --timestamp${crossrefTimestamp} + + + + + + + + + + + yarn-cluster + cluster + GenerateCrossrefDataset + eu.dnetlib.doiboost.crossref.CrossrefDataset + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --workingPath${inputPathCrossref} + --masteryarn-cluster + + + + + + + + + + + + + + + + + + + + + + + + + + + + + yarn-cluster + cluster + Convert Mag to Dataset + eu.dnetlib.doiboost.mag.SparkImportMagIntoDataset + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${MAGDumpPath} + --targetPath${inputPathMAG}/dataset + --masteryarn-cluster + + + + + + + + + + + yarn-cluster + cluster + Convert ORCID to Dataset + eu.dnetlib.doiboost.orcid.SparkPreprocessORCID + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${inputPathOrcid} + --workingPath${workingPathOrcid} + --masteryarn-cluster + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json new file mode 100644 index 000000000..08444d732 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json @@ -0,0 +1,6 @@ +[ + {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working dir path", "paramRequired": true}, + {"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/config-default.xml similarity index 91% rename from dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/config-default.xml rename to dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/config-default.xml index 59e5c059f..508202e30 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/config-default.xml @@ -16,8 +16,8 @@ spark2 - oozie.wf.rerun.failnodes - false + oozie.launcher.mapreduce.user.classpath.first + true hive_metastore_uris diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml new file mode 100644 index 000000000..e5e29323e --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -0,0 +1,262 @@ + + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorIntersectionMemory + memory for individual executor + + + + sparkExecutorCores + number of cores used by single executor + + + + workingPath + the working Path + + + + hostedByMapPath + the hostedByMap Path + + + outputPath + the Path of the sequence file action set + + + + + + inputPathCrossref + the Crossref input path + + + + + inputPathMAG + the MAG working path + + + + + + inputPathUnpayWall + the UnpayWall working path + + + + + inputPathOrcid + the ORCID input path + + + + workingPathOrcid + the ORCID working path + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + ${wf:conf('resumeFrom') eq 'PreprocessMag'} + ${wf:conf('resumeFrom') eq 'PreprocessUW'} + ${wf:conf('resumeFrom') eq 'PreprocessORCID'} + ${wf:conf('resumeFrom') eq 'CreateDOIBoost'} + ${wf:conf('resumeFrom') eq 'GenerateActionSet'} + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn-cluster + cluster + ConvertCrossrefToOAF + eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${inputPathCrossref}/crossref_ds + --targetPath${workingPath} + --masteryarn-cluster + + + + + + + + yarn-cluster + cluster + Convert Mag to OAF Dataset + eu.dnetlib.doiboost.mag.SparkProcessMAG + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorIntersectionMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${inputPathMAG}/dataset + --workingPath${inputPathMAG}/process_p + --targetPath${workingPath} + --masteryarn-cluster + + + + + + + + + + yarn-cluster + cluster + Convert UnpayWall to Dataset + eu.dnetlib.doiboost.uw.SparkMapUnpayWallToOAF + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${inputPathUnpayWall}/uw_extracted + --targetPath${workingPath}/uwPublication + --masteryarn-cluster + + + + + + + + + yarn-cluster + cluster + Convert ORCID to Dataset + eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --workingPath${workingPathOrcid} + --targetPath${workingPath}/orcidPublication + --masteryarn-cluster + + + + + + + + + yarn-cluster + cluster + Create DOIBoost Infospace + eu.dnetlib.doiboost.SparkGenerateDoiBoost + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorIntersectionMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --hostedByMapPath${hostedByMapPath} + --affiliationPath${inputPathMAG}/dataset/Affiliations + --paperAffiliationPath${inputPathMAG}/dataset/PaperAuthorAffiliations + --workingPath${workingPath} + --masteryarn-cluster + + + + + + + + + yarn-cluster + cluster + Generate DOIBoost ActionSet + eu.dnetlib.doiboost.SparkGenerateDOIBoostActionSet + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --dbPublicationPath${workingPath}/doiBoostPublicationFiltered + --dbDatasetPath${workingPath}/crossrefDataset + --crossRefRelation${workingPath}/crossrefRelation + --dbaffiliationRelationPath${workingPath}/doiBoostPublicationAffiliation + --dbOrganizationPath${workingPath}/doiBoostOrganization + --targetPath${workingPath}/actionDataSet + --sFilePath${outputPath} + --masteryarn-cluster + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/config-default.xml deleted file mode 100644 index cf617a84c..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/config-default.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/workflow.xml deleted file mode 100644 index d2a69752e..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/workflow.xml +++ /dev/null @@ -1,55 +0,0 @@ - - - - sourcePath - the working dir base path - - - targetPath - the working dir base path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - yarn-cluster - cluster - Convert UnpayWall to Dataset - eu.dnetlib.doiboost.uw.SparkMapUnpayWallToOAF - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --sourcePath${sourcePath}/uw_extracted - --targetPath${targetPath} - --masteryarn-cluster - - - - - - - \ No newline at end of file From 25254885b9b2b3a0e9c5c7bd88f2f72f15ffc06c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 7 May 2021 17:31:32 +0200 Subject: [PATCH 10/16] [ActionManagement] reduced number of xqueries used to access ActionSet info --- .../dnetlib/dhp/actionmanager/ISClient.java | 102 +++++++----------- 1 file changed, 39 insertions(+), 63 deletions(-) diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java index 0f0d21e11..5a80c0b53 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java @@ -3,20 +3,23 @@ package eu.dnetlib.dhp.actionmanager; import java.io.Serializable; import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; -import java.util.NoSuchElementException; +import java.util.*; import java.util.stream.Collectors; +import org.apache.commons.lang3.tuple.Triple; import org.dom4j.Document; +import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import eu.dnetlib.actionmanager.rmi.ActionManagerException; import eu.dnetlib.actionmanager.set.ActionManagerSet; @@ -25,6 +28,7 @@ import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJo import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import scala.Tuple2; public class ISClient implements Serializable { @@ -40,80 +44,52 @@ public class ISClient implements Serializable { public List getLatestRawsetPaths(String setIds) { - List ids = Lists - .newArrayList( + final Set ids = Sets + .newHashSet( Splitter .on(INPUT_ACTION_SET_ID_SEPARATOR) .omitEmptyStrings() .trimResults() .split(setIds)); - - return ids - .stream() - .map(id -> getSet(isLookup, id)) - .map(as -> as.getPathToLatest()) - .collect(Collectors.toCollection(ArrayList::new)); - } - - private ActionManagerSet getSet(ISLookUpService isLookup, final String setId) { - - final String q = "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') " - + "where $x//SET/@id = '" - + setId - + "' return $x"; - try { final String basePath = getBasePathHDFS(isLookup); - final String setProfile = isLookup.getResourceProfileByQuery(q); - return getActionManagerSet(basePath, setProfile); - } catch (ISLookUpException | ActionManagerException e) { - throw new RuntimeException("Error accessing Sets, using query: " + q); + + // + final String xquery = "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') " + + + "return "; + return Optional + .ofNullable(isLookup.quickSearchProfile(xquery)) + .map( + sets -> sets + .stream() + .map(set -> parseSetInfo(set)) + .filter(t -> ids.contains(t.getLeft())) + .map(t -> buildDirectory(basePath, t)) + .collect(Collectors.toList())) + .orElseThrow(() -> new IllegalStateException("empty set list")); + } catch (ActionManagerException | ISLookUpException e) { + throw new IllegalStateException("unable to query ActionSets info from the IS"); } } - private ActionManagerSet getActionManagerSet(final String basePath, final String profile) - throws ActionManagerException { - final SAXReader reader = new SAXReader(); - final ActionManagerSet set = new ActionManagerSet(); - + private Triple parseSetInfo(String set) { try { - final Document doc = reader.read(new StringReader(profile)); - - set.setId(doc.valueOf("//SET/@id").trim()); - set.setName(doc.valueOf("//SET").trim()); - set.setImpact(ImpactTypes.valueOf(doc.valueOf("//IMPACT").trim())); - set - .setLatest( - doc.valueOf("//RAW_SETS/LATEST/@id"), - doc.valueOf("//RAW_SETS/LATEST/@creationDate"), - doc.valueOf("//RAW_SETS/LATEST/@lastUpdate")); - set.setDirectory(doc.valueOf("//SET/@directory")); - final List expiredNodes = doc.selectNodes("//RAW_SETS/EXPIRED"); - if (expiredNodes != null) { - for (int i = 0; i < expiredNodes.size(); i++) { - Element ex = (Element) expiredNodes.get(i); - set - .addExpired( - ex.attributeValue("id"), - ex.attributeValue("creationDate"), - ex.attributeValue("lastUpdate")); - } - } - - final StringBuilder sb = new StringBuilder(); - sb.append(basePath); - sb.append("/"); - sb.append(doc.valueOf("//SET/@directory")); - sb.append("/"); - sb.append(doc.valueOf("//RAW_SETS/LATEST/@id")); - set.setPathToLatest(sb.toString()); - - return set; - } catch (Exception e) { - throw new ActionManagerException("Error creating set from profile: " + profile, e); + Document doc = new SAXReader().read(new StringReader(set)); + return Triple + .of( + doc.valueOf("//SET/@id"), + doc.valueOf("//SET/@directory"), + doc.valueOf("//SET/@latest")); + } catch (DocumentException e) { + throw new IllegalStateException(e); } } + private String buildDirectory(String basePath, Triple t) { + return Joiner.on("/").join(basePath, t.getMiddle(), t.getRight()); + } + private String getBasePathHDFS(ISLookUpService isLookup) throws ActionManagerException { return queryServiceProperty(isLookup, "basePath"); } From 3925eb6a79b9714e58d38a07a655699e8ffff5aa Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 10 May 2021 13:58:23 +0200 Subject: [PATCH 11/16] MDStoreManager model classes moved in dhp-schemas --- .../mdstore/manager/common/model/MDStore.java | 119 --------------- .../common/model/MDStoreCurrentVersion.java | 51 ------- .../manager/common/model/MDStoreVersion.java | 99 ------------ .../manager/common/model/MDStoreWithInfo.java | 143 ------------------ 4 files changed, 412 deletions(-) delete mode 100644 dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStore.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreCurrentVersion.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreVersion.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreWithInfo.java diff --git a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStore.java b/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStore.java deleted file mode 100644 index 68fc024af..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStore.java +++ /dev/null @@ -1,119 +0,0 @@ - -package eu.dnetlib.data.mdstore.manager.common.model; - -import java.io.Serializable; -import java.util.UUID; - -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; - -@Entity -@Table(name = "mdstores") -public class MDStore implements Serializable { - - /** */ - private static final long serialVersionUID = 3160530489149700055L; - - @Id - @Column(name = "id") - private String id; - - @Column(name = "format") - private String format; - - @Column(name = "layout") - private String layout; - - @Column(name = "interpretation") - private String interpretation; - - @Column(name = "datasource_name") - private String datasourceName; - - @Column(name = "datasource_id") - private String datasourceId; - - @Column(name = "api_id") - private String apiId; - - public String getId() { - return id; - } - - public void setId(final String id) { - this.id = id; - } - - public String getFormat() { - return format; - } - - public void setFormat(final String format) { - this.format = format; - } - - public String getLayout() { - return layout; - } - - public void setLayout(final String layout) { - this.layout = layout; - } - - public String getInterpretation() { - return interpretation; - } - - public void setInterpretation(final String interpretation) { - this.interpretation = interpretation; - } - - public String getDatasourceName() { - return datasourceName; - } - - public void setDatasourceName(final String datasourceName) { - this.datasourceName = datasourceName; - } - - public String getDatasourceId() { - return datasourceId; - } - - public void setDatasourceId(final String datasourceId) { - this.datasourceId = datasourceId; - } - - public String getApiId() { - return apiId; - } - - public void setApiId(final String apiId) { - this.apiId = apiId; - } - - public static MDStore newInstance( - final String format, final String layout, final String interpretation) { - return newInstance(format, layout, interpretation, null, null, null); - } - - public static MDStore newInstance( - final String format, - final String layout, - final String interpretation, - final String dsName, - final String dsId, - final String apiId) { - final MDStore md = new MDStore(); - md.setId("md-" + UUID.randomUUID()); - md.setFormat(format); - md.setLayout(layout); - md.setInterpretation(interpretation); - md.setDatasourceName(dsName); - md.setDatasourceId(dsId); - md.setApiId(apiId); - return md; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreCurrentVersion.java b/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreCurrentVersion.java deleted file mode 100644 index f74ab39be..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreCurrentVersion.java +++ /dev/null @@ -1,51 +0,0 @@ - -package eu.dnetlib.data.mdstore.manager.common.model; - -import java.io.Serializable; - -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; - -@Entity -@Table(name = "mdstore_current_versions") -public class MDStoreCurrentVersion implements Serializable { - - /** */ - private static final long serialVersionUID = -4757725888593745773L; - - @Id - @Column(name = "mdstore") - private String mdstore; - - @Column(name = "current_version") - private String currentVersion; - - public String getMdstore() { - return mdstore; - } - - public void setMdstore(final String mdstore) { - this.mdstore = mdstore; - } - - public String getCurrentVersion() { - return currentVersion; - } - - public void setCurrentVersion(final String currentVersion) { - this.currentVersion = currentVersion; - } - - public static MDStoreCurrentVersion newInstance(final String mdId, final String versionId) { - final MDStoreCurrentVersion cv = new MDStoreCurrentVersion(); - cv.setMdstore(mdId); - cv.setCurrentVersion(versionId); - return cv; - } - - public static MDStoreCurrentVersion newInstance(final MDStoreVersion v) { - return newInstance(v.getMdstore(), v.getId()); - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreVersion.java b/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreVersion.java deleted file mode 100644 index 7ef24f191..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreVersion.java +++ /dev/null @@ -1,99 +0,0 @@ - -package eu.dnetlib.data.mdstore.manager.common.model; - -import java.io.Serializable; -import java.util.Date; - -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import javax.persistence.Temporal; -import javax.persistence.TemporalType; - -@Entity -@Table(name = "mdstore_versions") -public class MDStoreVersion implements Serializable { - - /** */ - private static final long serialVersionUID = -4763494442274298339L; - - @Id - @Column(name = "id") - private String id; - - @Column(name = "mdstore") - private String mdstore; - - @Column(name = "writing") - private boolean writing; - - @Column(name = "readcount") - private int readCount = 0; - - @Column(name = "lastupdate") - @Temporal(TemporalType.TIMESTAMP) - private Date lastUpdate; - - @Column(name = "size") - private long size = 0; - - public static MDStoreVersion newInstance(final String mdId, final boolean writing) { - final MDStoreVersion t = new MDStoreVersion(); - t.setId(mdId + "-" + new Date().getTime()); - t.setMdstore(mdId); - t.setLastUpdate(null); - t.setWriting(writing); - t.setReadCount(0); - t.setSize(0); - return t; - } - - public String getId() { - return id; - } - - public void setId(final String id) { - this.id = id; - } - - public String getMdstore() { - return mdstore; - } - - public void setMdstore(final String mdstore) { - this.mdstore = mdstore; - } - - public boolean isWriting() { - return writing; - } - - public void setWriting(final boolean writing) { - this.writing = writing; - } - - public int getReadCount() { - return readCount; - } - - public void setReadCount(final int readCount) { - this.readCount = readCount; - } - - public Date getLastUpdate() { - return lastUpdate; - } - - public void setLastUpdate(final Date lastUpdate) { - this.lastUpdate = lastUpdate; - } - - public long getSize() { - return size; - } - - public void setSize(final long size) { - this.size = size; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreWithInfo.java b/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreWithInfo.java deleted file mode 100644 index 438359241..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/data/mdstore/manager/common/model/MDStoreWithInfo.java +++ /dev/null @@ -1,143 +0,0 @@ - -package eu.dnetlib.data.mdstore.manager.common.model; - -import java.io.Serializable; -import java.util.Date; - -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import javax.persistence.Temporal; -import javax.persistence.TemporalType; - -@Entity -@Table(name = "mdstores_with_info") -public class MDStoreWithInfo implements Serializable { - - /** */ - private static final long serialVersionUID = -8445784770687571492L; - - @Id - @Column(name = "id") - private String id; - - @Column(name = "format") - private String format; - - @Column(name = "layout") - private String layout; - - @Column(name = "interpretation") - private String interpretation; - - @Column(name = "datasource_name") - private String datasourceName; - - @Column(name = "datasource_id") - private String datasourceId; - - @Column(name = "api_id") - private String apiId; - - @Column(name = "current_version") - private String currentVersion; - - @Column(name = "lastupdate") - @Temporal(TemporalType.TIMESTAMP) - private Date lastUpdate; - - @Column(name = "size") - private long size = 0; - - @Column(name = "n_versions") - private long numberOfVersions = 0; - - public String getId() { - return id; - } - - public void setId(final String id) { - this.id = id; - } - - public String getFormat() { - return format; - } - - public void setFormat(final String format) { - this.format = format; - } - - public String getLayout() { - return layout; - } - - public void setLayout(final String layout) { - this.layout = layout; - } - - public String getInterpretation() { - return interpretation; - } - - public void setInterpretation(final String interpretation) { - this.interpretation = interpretation; - } - - public String getDatasourceName() { - return datasourceName; - } - - public void setDatasourceName(final String datasourceName) { - this.datasourceName = datasourceName; - } - - public String getDatasourceId() { - return datasourceId; - } - - public void setDatasourceId(final String datasourceId) { - this.datasourceId = datasourceId; - } - - public String getApiId() { - return apiId; - } - - public void setApiId(final String apiId) { - this.apiId = apiId; - } - - public String getCurrentVersion() { - return currentVersion; - } - - public void setCurrentVersion(final String currentVersion) { - this.currentVersion = currentVersion; - } - - public Date getLastUpdate() { - return lastUpdate; - } - - public void setLastUpdate(final Date lastUpdate) { - this.lastUpdate = lastUpdate; - } - - public long getSize() { - return size; - } - - public void setSize(final long size) { - this.size = size; - } - - public long getNumberOfVersions() { - return numberOfVersions; - } - - public void setNumberOfVersions(final long numberOfVersions) { - this.numberOfVersions = numberOfVersions; - } -} From 54217d73ff32a26041a02a0abc8fdaf1b51fbbcb Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 11 May 2021 09:59:02 +0200 Subject: [PATCH 12/16] removed old parameters from oozie workflow --- .../eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index e5e29323e..d87339387 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -53,11 +53,6 @@ - - inputPathOrcid - the ORCID input path - - workingPathOrcid the ORCID working path @@ -82,7 +77,7 @@ ${wf:conf('resumeFrom') eq 'PreprocessMag'} ${wf:conf('resumeFrom') eq 'PreprocessUW'} - ${wf:conf('resumeFrom') eq 'PreprocessORCID'} + ${wf:conf('resumeFrom') eq 'ProcessORCID'} ${wf:conf('resumeFrom') eq 'CreateDOIBoost'} ${wf:conf('resumeFrom') eq 'GenerateActionSet'} From da9d6f3887fb150efa5fe2c7e0c4fd4e19f870d4 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 11 May 2021 10:45:30 +0200 Subject: [PATCH 13/16] mapping datasource.journal only when an issn is available, null otherwhise --- .../dnetlib/dhp/schema/oaf/OafMapperUtils.java | 18 ++++++++++++------ .../raw/MigrateDbEntitiesApplication.java | 11 ----------- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtils.java index 4a66f91dc..19be8b9e4 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/OafMapperUtils.java @@ -13,6 +13,8 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import com.google.common.base.Joiner; + import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.utils.DHPUtils; @@ -183,7 +185,8 @@ public class OafMapperUtils { final String issnOnline, final String issnLinking, final DataInfo dataInfo) { - return journal( + + return hasIssn(issnPrinted, issnOnline, issnLinking) ? journal( name, issnPrinted, issnOnline, @@ -195,7 +198,7 @@ public class OafMapperUtils { null, null, null, - dataInfo); + dataInfo) : null; } public static Journal journal( @@ -212,10 +215,7 @@ public class OafMapperUtils { final String conferencedate, final DataInfo dataInfo) { - if (StringUtils.isNotBlank(name) - || StringUtils.isNotBlank(issnPrinted) - || StringUtils.isNotBlank(issnOnline) - || StringUtils.isNotBlank(issnLinking)) { + if (StringUtils.isNotBlank(name) || hasIssn(issnPrinted, issnOnline, issnLinking)) { final Journal j = new Journal(); j.setName(name); j.setIssnPrinted(issnPrinted); @@ -235,6 +235,12 @@ public class OafMapperUtils { } } + private static boolean hasIssn(String issnPrinted, String issnOnline, String issnLinking) { + return StringUtils.isNotBlank(issnPrinted) + || StringUtils.isNotBlank(issnOnline) + || StringUtils.isNotBlank(issnLinking); + } + public static DataInfo dataInfo( final Boolean deletedbyinference, final String inferenceprovenance, diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 3adbd244c..f14e966d1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -586,17 +586,6 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i return res; } - private Journal prepareJournal(final ResultSet rs, final DataInfo info) throws SQLException { - if (Objects.isNull(rs)) { - return null; - } else { - - return journal( - rs.getString("officialname"), rs.getString("issnPrinted"), rs.getString("issnOnline"), - rs.getString("issnLinking"), info); - } - } - @Override public void close() throws IOException { super.close(); From d9a0bbda7bc7ce1b1eec05659c56ac5631662807 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 13 May 2021 12:25:14 +0200 Subject: [PATCH 14/16] implemented new phase in doiboost to make the dataset Distinct by ID --- .../doiboost/SparkGenerateDoiBoost.scala | 60 ++++++++++++++++++- .../crossref/SparkMapDumpIntoOAF.scala | 2 +- .../doiboost/uw/SparkMapUnpayWallToOAF.scala | 2 +- .../convert_crossref_to_oaf_params.json | 6 ++ 4 files changed, 66 insertions(+), 4 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_crossref_to_oaf_params.json diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index 11f9828db..462434bbb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -7,6 +7,7 @@ import eu.dnetlib.dhp.schema.oaf.{Organization, Publication, Relation, Dataset = import eu.dnetlib.doiboost.mag.ConversionUtil import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf +import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.sql.functions.col import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} @@ -15,6 +16,9 @@ import scala.collection.JavaConverters._ object SparkGenerateDoiBoost { + + + def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) @@ -33,6 +37,54 @@ object SparkGenerateDoiBoost { val hostedByMapPath = parser.get("hostedByMapPath") val workingDirPath = parser.get("workingPath") + val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable { + override def zero: Publication = new Publication + + override def reduce(b: Publication, a: (String, Publication)): Publication = { + + if (b == null) { + if (a != null && a._2 != null) { + a._2.setId(a._1) + return a._2 + } + } + else { + if (a != null && a._2 != null) { + b.mergeFrom(a._2) + b.setId(a._1) + val authors =AuthorMerger.mergeAuthor(b.getAuthor, a._2.getAuthor) + b.setAuthor(authors) + return b + } + } + new Publication + } + + override def merge(b1: Publication, b2: Publication): Publication = { + if (b1 == null) { + if (b2 != null) + return b2 + } + else { + if (b2 != null ) { + b1.mergeFrom(b2) + val authors =AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor) + b1.setAuthor(authors) + if (b2.getId!= null && b2.getId.nonEmpty) + b1.setId(b2.getId) + return b1 + } + } + new Publication + } + + override def finish(reduction: Publication): Publication = reduction + + override def bufferEncoder: Encoder[Publication] = Encoders.kryo[Publication] + + override def outputEncoder: Encoder[Publication] = Encoders.kryo[Publication] + } + implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication] implicit val mapEncoderOrg: Encoder[Organization] = Encoders.kryo[Organization] @@ -77,6 +129,10 @@ object SparkGenerateDoiBoost { doiBoostPublication.joinWith(hostedByDataset, doiBoostPublication("_1").equalTo(hostedByDataset("_1")), "left") .map(DoiBoostMappingUtil.fixPublication) + .map(p => (p.getId, p)) + .groupByKey(_._1) + .agg(crossrefAggregator.toColumn) + .map(p => p._2) .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationFiltered") val affiliationPath = parser.get("affiliationPath") @@ -139,6 +195,6 @@ object SparkGenerateDoiBoost { else null }).filter(o=> o!=null).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostOrganization") - } + } -} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala index 0036459bf..01bf9dd62 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala @@ -20,7 +20,7 @@ object SparkMapDumpIntoOAF { val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_crossref_to_oaf_params.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala index 9ac6a0838..83205e345 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala @@ -18,7 +18,7 @@ object SparkMapUnpayWallToOAF { val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_crossref_to_oaf_params.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_crossref_to_oaf_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_crossref_to_oaf_params.json new file mode 100644 index 000000000..297452465 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_crossref_to_oaf_params.json @@ -0,0 +1,6 @@ +[ + {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source dir path", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the target dir path", "paramRequired": true}, + {"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} + +] \ No newline at end of file From eeb8bcf07569c6ebd6fb6c7fb13f39acd256dbde Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 18 May 2021 11:10:07 +0200 Subject: [PATCH 15/16] using constants from ModelConstants --- .../java/eu/dnetlib/dhp/export/DLIToOAF.scala | 57 ++++++++++--------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala index 8043236e0..996b4a821 100644 --- a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala @@ -6,6 +6,7 @@ import java.time.LocalDateTime import java.time.format.DateTimeFormatter import eu.dnetlib.dhp.common.PacePerson import eu.dnetlib.dhp.schema.action.AtomicAction +import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.{Author, Dataset, ExternalReference, Field, Instance, KeyValue, Oaf, Publication, Qualifier, Relation, Result, StructuredProperty} import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication} import eu.dnetlib.dhp.utils.DHPUtils @@ -43,18 +44,18 @@ object DLIToOAF { val relationTypeMapping: Map[String, (String, String)] = Map( - "IsReferencedBy" -> ("isRelatedTo", "relationship"), - "References" -> ("isRelatedTo", "relationship"), - "IsRelatedTo" -> ("isRelatedTo", "relationship"), - "IsSupplementedBy" -> ("isSupplementedBy", "supplement"), - "Documents"-> ("isRelatedTo", "relationship"), - "Cites" -> ("cites", "citation"), - "Unknown" -> ("isRelatedTo", "relationship"), - "IsSourceOf" -> ("isRelatedTo", "relationship"), - "IsCitedBy" -> ("IsCitedBy", "citation"), - "Reviews" -> ("reviews", "review"), - "Describes" -> ("isRelatedTo", "relationship"), - "HasAssociationWith" -> ("isRelatedTo", "relationship") + "IsReferencedBy" -> (ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), + "References" -> (ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), + "IsRelatedTo" -> (ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), + "IsSupplementedBy" -> (ModelConstants.IS_SUPPLEMENTED_BY, ModelConstants.SUPPLEMENT), + "Documents"-> (ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), + "Cites" -> (ModelConstants.CITES, ModelConstants.CITATION), + "Unknown" -> (ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), + "IsSourceOf" -> (ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), + "IsCitedBy" -> (ModelConstants.IS_CITED_BY, ModelConstants.CITATION), + "Reviews" -> (ModelConstants.REVIEWS, ModelConstants.REVIEW), + "Describes" -> (ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), + "HasAssociationWith" -> (ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP) ) val expectecdPidType = List("uniprot", "ena", "chembl", "ncbi-n", "ncbi-p", "genbank", "pdb", "url") @@ -83,11 +84,11 @@ object DLIToOAF { val rel_inverse: Map[String, String] = Map( - "isRelatedTo" -> "isRelatedTo", - "isSupplementedBy" -> "isSupplementTo", - "cites" -> "IsCitedBy", - "IsCitedBy" -> "cites", - "reviews" -> "IsReviewedBy" + ModelConstants.IS_RELATED_TO -> ModelConstants.IS_RELATED_TO, + ModelConstants.IS_SUPPLEMENTED_BY -> ModelConstants.IS_SUPPLEMENT_TO, + ModelConstants.CITES -> ModelConstants.IS_CITED_BY, + ModelConstants.IS_CITED_BY -> ModelConstants.CITES, + ModelConstants.REVIEWS -> ModelConstants.IS_REVIEWED_BY ) @@ -158,7 +159,7 @@ object DLIToOAF { result.setUrl(e.url) result.setRefidentifier(e.pid) result.setDataInfo(generateDataInfo()) - result.setQualifier(createQualifier(e.classId, "dnet:externalReference_typologies")) + result.setQualifier(createQualifier(e.classId, ModelConstants.DNET_EXTERNAL_REFERENCE_TYPE)) result }) publication.setExternalReference(eRefs.asJava) @@ -237,7 +238,7 @@ object DLIToOAF { if (inputPublication.getAuthor == null || inputPublication.getAuthor.isEmpty) return null result.setAuthor(inputPublication.getAuthor.asScala.map(convertAuthor).asJava) - result.setResulttype(createQualifier(inputPublication.getResulttype.getClassid, inputPublication.getResulttype.getClassname, "dnet:result_typologies", "dnet:result_typologies")) + result.setResulttype(createQualifier(inputPublication.getResulttype.getClassid, inputPublication.getResulttype.getClassname, ModelConstants.DNET_RESULT_TYPOLOGIES, ModelConstants.DNET_RESULT_TYPOLOGIES)) if (inputPublication.getSubject != null) result.setSubject(inputPublication.getSubject.asScala.map(convertSubject).asJava) @@ -258,7 +259,7 @@ object DLIToOAF { result.setDateofacceptance(asField(inputPublication.getRelevantdate.get(0).getValue)) result.setPublisher(inputPublication.getPublisher) result.setSource(inputPublication.getSource) - result.setBestaccessright(createQualifier("UNKNOWN", "not available", "dnet:access_modes", "dnet:access_modes")) + result.setBestaccessright(createQualifier("UNKNOWN", "not available", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) val dois = result.getPid.asScala.filter(p => "doi".equalsIgnoreCase(p.getQualifier.getClassname)).map(p => p.getValue) if (dois.isEmpty) @@ -316,7 +317,7 @@ object DLIToOAF { if (d.getAuthor == null || d.getAuthor.isEmpty) return null result.setAuthor(d.getAuthor.asScala.map(convertAuthor).asJava) - result.setResulttype(createQualifier(d.getResulttype.getClassid, d.getResulttype.getClassname, "dnet:result_typologies", "dnet:result_typologies")) + result.setResulttype(createQualifier(d.getResulttype.getClassid, d.getResulttype.getClassname, ModelConstants.DNET_RESULT_TYPOLOGIES, ModelConstants.DNET_RESULT_TYPOLOGIES)) if (d.getSubject != null) result.setSubject(d.getSubject.asScala.map(convertSubject).asJava) @@ -337,7 +338,7 @@ object DLIToOAF { result.setDateofacceptance(asField(d.getRelevantdate.get(0).getValue)) result.setPublisher(d.getPublisher) result.setSource(d.getSource) - result.setBestaccessright(createQualifier("UNKNOWN", "not available", "dnet:access_modes", "dnet:access_modes")) + result.setBestaccessright(createQualifier("UNKNOWN", "not available", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) val instance_urls = if (fpids.head.length < 5) s"https://www.rcsb.org/structure/${fpids.head}" else s"https://dx.doi.org/${fpids.head}" @@ -364,13 +365,13 @@ object DLIToOAF { val i = new Instance i.setUrl(List(url).asJava) if (dataset) - i.setInstancetype(createQualifier("0021", "Dataset", "dnet:publication_resource", "dnet:publication_resource")) + i.setInstancetype(createQualifier("0021", "Dataset", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) else - i.setInstancetype(createQualifier("0000", "Unknown", "dnet:publication_resource", "dnet:publication_resource")) + i.setInstancetype(createQualifier("0000", "Unknown", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) if (originalInstance != null && originalInstance.getHostedby != null) i.setHostedby(originalInstance.getHostedby) - i.setAccessright(createQualifier("UNKNOWN", "not available", "dnet:access_modes", "dnet:access_modes")) + i.setAccessright(createQualifier("UNKNOWN", "not available", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) i.setDateofacceptance(doa) i @@ -380,19 +381,19 @@ object DLIToOAF { def patchRelevantDate(d: StructuredProperty): StructuredProperty = { - d.setQualifier(createQualifier("UNKNOWN", "dnet:dataCite_date")) + d.setQualifier(createQualifier("UNKNOWN", ModelConstants.DNET_DATA_CITE_DATE)) d } def patchTitle(t: StructuredProperty): StructuredProperty = { - t.setQualifier(createQualifier("main title", "dnet:dataCite_title")) + t.setQualifier(createQualifier("main title","dnet:dataCite_title")) t } def convertSubject(s: StructuredProperty): StructuredProperty = { - s.setQualifier(createQualifier("keyword", "dnet:subject_classification_typologies")) + s.setQualifier(createQualifier("keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)) s From 239d0f0a9af89cbbf695a838629410c6c22e203d Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 18 May 2021 16:12:11 +0200 Subject: [PATCH 16/16] ROR actionset import workflow backported from branch stable_ids --- .../ror/GenerateRorActionSetJob.java | 215 ++++++++++++++++++ .../dhp/actionmanager/ror/model/Address.java | 122 ++++++++++ .../dhp/actionmanager/ror/model/Country.java | 34 +++ .../ror/model/ExternalIdType.java | 42 ++++ .../ror/model/ExternalIdTypeDeserializer.java | 38 ++++ .../ror/model/GeonamesAdmin.java | 56 +++++ .../actionmanager/ror/model/GeonamesCity.java | 100 ++++++++ .../dhp/actionmanager/ror/model/Label.java | 34 +++ .../dhp/actionmanager/ror/model/License.java | 34 +++ .../actionmanager/ror/model/NameAndCode.java | 34 +++ .../actionmanager/ror/model/Relationship.java | 45 ++++ .../ror/model/RorOrganization.java | 192 ++++++++++++++++ .../ror/action_set_parameters.json | 14 ++ .../ror/oozie_app/config-default.xml | 58 +++++ .../actionmanager/ror/oozie_app/workflow.xml | 55 +++++ .../ror/GenerateRorActionSetJobTest.java | 46 ++++ .../dhp/actionmanager/ror/ror_org.json | 123 ++++++++++ 17 files changed, 1242 insertions(+) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/ror/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/ror/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/ror/ror_org.json diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java new file mode 100644 index 000000000..6e25f1654 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java @@ -0,0 +1,215 @@ + +package eu.dnetlib.dhp.actionmanager.ror; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.dataInfo; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listKeyValues; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.qualifier; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.project.SparkAtomicActionJob; +import eu.dnetlib.dhp.actionmanager.ror.model.ExternalIdType; +import eu.dnetlib.dhp.actionmanager.ror.model.RorOrganization; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import eu.dnetlib.dhp.utils.DHPUtils; +import scala.Tuple2; + +public class GenerateRorActionSetJob { + + private static final Logger log = LoggerFactory.getLogger(GenerateRorActionSetJob.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static final String ROR_NS_PREFIX = "ror_________"; + + private static final List ROR_COLLECTED_FROM = listKeyValues( + "10|openaire____::993a7ae7a863813cf95028b50708e222", "ROR"); + + private static final DataInfo ROR_DATA_INFO = dataInfo( + false, "", false, false, ENTITYREGISTRY_PROVENANCE_ACTION, "0.92"); + + private static final Qualifier ROR_PID_TYPE = qualifier( + "ROR", "ROR", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES); + + public static void main(final String[] args) throws Exception { + + final String jsonConfiguration = IOUtils + .toString( + SparkAtomicActionJob.class + .getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + final Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("inputPath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}: ", outputPath); + + final SparkConf conf = new SparkConf(); + + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); + processRorOrganizations(spark, inputPath, outputPath); + }); + } + + private static void removeOutputDir(final SparkSession spark, final String path) { + HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); + } + + private static void processRorOrganizations(final SparkSession spark, + final String inputPath, + final String outputPath) throws Exception { + + readInputPath(spark, inputPath) + .map( + (MapFunction) GenerateRorActionSetJob::convertRorOrg, + Encoders.bean(Organization.class)) + .toJavaRDD() + .map(o -> new AtomicAction<>(Organization.class, o)) + .mapToPair( + aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), + new Text(OBJECT_MAPPER.writeValueAsString(aa)))) + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class); + } + + protected static Organization convertRorOrg(final RorOrganization r) { + + final Date now = new Date(); + + final Organization o = new Organization(); + + o.setId(String.format("20|%s::%s", ROR_NS_PREFIX, DHPUtils.md5(r.getId()))); + o.setOriginalId(Arrays.asList(String.format("%s::%s", ROR_NS_PREFIX, r.getId()))); + o.setCollectedfrom(ROR_COLLECTED_FROM); + o.setPid(pids(r)); + o.setDateofcollection(now.toString()); + o.setDateoftransformation(now.toString()); + o.setExtraInfo(new ArrayList<>()); // Values not present in the file + o.setOaiprovenance(null); // Values not present in the file + o.setLegalshortname(field(r.getAcronyms().stream().findFirst().orElse(r.getName()), ROR_DATA_INFO)); + o.setLegalname(field(r.getName(), ROR_DATA_INFO)); + o.setAlternativeNames(alternativeNames(r)); + o.setWebsiteurl(field(r.getLinks().stream().findFirst().orElse(null), ROR_DATA_INFO)); + o.setLogourl(null); + o.setEclegalbody(null); + o.setEclegalperson(null); + o.setEcnonprofit(null); + o.setEcresearchorganization(null); + o.setEchighereducation(null); + o.setEcinternationalorganizationeurinterests(null); + o.setEcinternationalorganization(null); + o.setEcenterprise(null); + o.setEcsmevalidated(null); + o.setEcnutscode(null); + if (r.getCountry() != null) { + o + .setCountry( + qualifier( + r.getCountry().getCountryCode(), r + .getCountry() + .getCountryName(), + ModelConstants.DNET_COUNTRY_TYPE, ModelConstants.DNET_COUNTRY_TYPE)); + } else { + o.setCountry(null); + } + o.setDataInfo(ROR_DATA_INFO); + o.setLastupdatetimestamp(now.getTime()); + + return o; + } + + private static List pids(final RorOrganization r) { + final List pids = new ArrayList<>(); + pids.add(structuredProperty(r.getId(), ROR_PID_TYPE, ROR_DATA_INFO)); + + for (final Map.Entry e : r.getExternalIds().entrySet()) { + final String type = e.getKey(); + final List all = e.getValue().getAll(); + if (all != null) { + final Qualifier qualifier = qualifier( + type, type, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES); + for (final String pid : all) { + pids + .add(structuredProperty(pid, qualifier, ROR_DATA_INFO)); + } + } + } + + return pids; + } + + private static List> alternativeNames(final RorOrganization r) { + final Set names = new LinkedHashSet<>(); + names.addAll(r.getAliases()); + names.addAll(r.getAcronyms()); + r.getLabels().forEach(l -> names.add(l.getLabel())); + + return names + .stream() + .filter(StringUtils::isNotBlank) + .map(s -> field(s, ROR_DATA_INFO)) + .collect(Collectors.toList()); + } + + private static Dataset readInputPath( + final SparkSession spark, + final String path) throws Exception { + + try (final FileSystem fileSystem = FileSystem.get(new Configuration()); + final InputStream is = fileSystem.open(new Path(path))) { + final RorOrganization[] arr = OBJECT_MAPPER.readValue(is, RorOrganization[].class); + return spark.createDataset(Arrays.asList(arr), Encoders.bean(RorOrganization.class)); + } + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java new file mode 100644 index 000000000..b566a5501 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java @@ -0,0 +1,122 @@ + +package eu.dnetlib.dhp.actionmanager.ror.model; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class Address implements Serializable { + + @JsonProperty("lat") + private Float lat; + + @JsonProperty("state_code") + private String stateCode; + + @JsonProperty("country_geonames_id") + private Integer countryGeonamesId; + + @JsonProperty("lng") + private Float lng; + + @JsonProperty("state") + private String state; + + @JsonProperty("city") + private String city; + + @JsonProperty("geonames_city") + private GeonamesCity geonamesCity; + + @JsonProperty("postcode") + private String postcode; + + @JsonProperty("primary") + private Boolean primary; + + @JsonProperty("line") + private String line; + + private final static long serialVersionUID = 2444635485253443195L; + + public Float getLat() { + return lat; + } + + public void setLat(final Float lat) { + this.lat = lat; + } + + public String getStateCode() { + return stateCode; + } + + public void setStateCode(final String stateCode) { + this.stateCode = stateCode; + } + + public Integer getCountryGeonamesId() { + return countryGeonamesId; + } + + public void setCountryGeonamesId(final Integer countryGeonamesId) { + this.countryGeonamesId = countryGeonamesId; + } + + public Float getLng() { + return lng; + } + + public void setLng(final Float lng) { + this.lng = lng; + } + + public String getState() { + return state; + } + + public void setState(final String state) { + this.state = state; + } + + public String getCity() { + return city; + } + + public void setCity(final String city) { + this.city = city; + } + + public GeonamesCity getGeonamesCity() { + return geonamesCity; + } + + public void setGeonamesCity(final GeonamesCity geonamesCity) { + this.geonamesCity = geonamesCity; + } + + public String getPostcode() { + return postcode; + } + + public void setPostcode(final String postcode) { + this.postcode = postcode; + } + + public Boolean getPrimary() { + return primary; + } + + public void setPrimary(final Boolean primary) { + this.primary = primary; + } + + public String getLine() { + return line; + } + + public void setLine(final String line) { + this.line = line; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java new file mode 100644 index 000000000..3dab60a9f --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java @@ -0,0 +1,34 @@ + +package eu.dnetlib.dhp.actionmanager.ror.model; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class Country implements Serializable { + + @JsonProperty("country_code") + private String countryCode; + + @JsonProperty("country_name") + private String countryName; + + private final static long serialVersionUID = 4357848706229493627L; + + public String getCountryCode() { + return countryCode; + } + + public void setCountryCode(final String countryCode) { + this.countryCode = countryCode; + } + + public String getCountryName() { + return countryName; + } + + public void setCountryName(final String countryName) { + this.countryName = countryName; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java new file mode 100644 index 000000000..406bfd82c --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java @@ -0,0 +1,42 @@ + +package eu.dnetlib.dhp.actionmanager.ror.model; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; + +@JsonDeserialize(using = ExternalIdTypeDeserializer.class) +public class ExternalIdType implements Serializable { + + private List all; + + private String preferred; + + private final static long serialVersionUID = 2616688352998387611L; + + public ExternalIdType() { + } + + public ExternalIdType(final List all, final String preferred) { + this.all = all; + this.preferred = preferred; + } + + public List getAll() { + return all; + } + + public void setAll(final List all) { + this.all = all; + } + + public String getPreferred() { + return preferred; + } + + public void setPreferred(final String preferred) { + this.preferred = preferred; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java new file mode 100644 index 000000000..3fd0c9250 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java @@ -0,0 +1,38 @@ + +package eu.dnetlib.dhp.actionmanager.ror.model; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.ObjectCodec; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonNode; + +public class ExternalIdTypeDeserializer extends JsonDeserializer { + + @Override + public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) + throws IOException, JsonProcessingException { + final ObjectCodec oc = p.getCodec(); + final JsonNode node = oc.readTree(p); + + final JsonNode allNode = node.get("all"); + + final String preferred = node.get("preferred").asText(); + + final List all = new ArrayList<>(); + + if (allNode.isArray()) { + allNode.elements().forEachRemaining(x -> all.add(x.asText())); + } else { + all.add(allNode.asText()); + } + + return new ExternalIdType(all, preferred); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java new file mode 100644 index 000000000..9616db447 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java @@ -0,0 +1,56 @@ + +package eu.dnetlib.dhp.actionmanager.ror.model; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class GeonamesAdmin implements Serializable { + + @JsonProperty("ascii_name") + private String asciiName; + + @JsonProperty("id") + private Integer id; + + @JsonProperty("name") + private String name; + + @JsonProperty("code") + private String code; + + private final static long serialVersionUID = 7294958526269195673L; + + public String getAsciiName() { + return asciiName; + } + + public void setAsciiName(final String asciiName) { + this.asciiName = asciiName; + } + + public Integer getId() { + return id; + } + + public void setId(final Integer id) { + this.id = id; + } + + public String getName() { + return name; + } + + public void setName(final String name) { + this.name = name; + } + + public String getCode() { + return code; + } + + public void setCode(final String code) { + this.code = code; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java new file mode 100644 index 000000000..2b0487168 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java @@ -0,0 +1,100 @@ + +package eu.dnetlib.dhp.actionmanager.ror.model; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class GeonamesCity implements Serializable { + + @JsonProperty("geonames_admin1") + private GeonamesAdmin geonamesAdmin1; + + @JsonProperty("geonames_admin2") + private GeonamesAdmin geonamesAdmin2; + + @JsonProperty("city") + private String city; + + @JsonProperty("id") + private Integer id; + + @JsonProperty("nuts_level1") + private NameAndCode nutsLevel1; + + @JsonProperty("nuts_level2") + private NameAndCode nutsLevel2; + + @JsonProperty("nuts_level3") + private NameAndCode nutsLevel3; + + @JsonProperty("license") + private License license; + + private final static long serialVersionUID = -8389480201526252955L; + + public NameAndCode getNutsLevel2() { + return nutsLevel2; + } + + public void setNutsLevel2(final NameAndCode nutsLevel2) { + this.nutsLevel2 = nutsLevel2; + } + + public GeonamesAdmin getGeonamesAdmin2() { + return geonamesAdmin2; + } + + public void setGeonamesAdmin2(final GeonamesAdmin geonamesAdmin2) { + this.geonamesAdmin2 = geonamesAdmin2; + } + + public GeonamesAdmin getGeonamesAdmin1() { + return geonamesAdmin1; + } + + public void setGeonamesAdmin1(final GeonamesAdmin geonamesAdmin1) { + this.geonamesAdmin1 = geonamesAdmin1; + } + + public String getCity() { + return city; + } + + public void setCity(final String city) { + this.city = city; + } + + public Integer getId() { + return id; + } + + public void setId(final Integer id) { + this.id = id; + } + + public NameAndCode getNutsLevel1() { + return nutsLevel1; + } + + public void setNutsLevel1(final NameAndCode nutsLevel1) { + this.nutsLevel1 = nutsLevel1; + } + + public NameAndCode getNutsLevel3() { + return nutsLevel3; + } + + public void setNutsLevel3(final NameAndCode nutsLevel3) { + this.nutsLevel3 = nutsLevel3; + } + + public License getLicense() { + return license; + } + + public void setLicense(final License license) { + this.license = license; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java new file mode 100644 index 000000000..61eb0339d --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java @@ -0,0 +1,34 @@ + +package eu.dnetlib.dhp.actionmanager.ror.model; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class Label implements Serializable { + + @JsonProperty("iso639") + private String iso639; + + @JsonProperty("label") + private String label; + + private final static long serialVersionUID = -6576156103297850809L; + + public String getIso639() { + return iso639; + } + + public void setIso639(final String iso639) { + this.iso639 = iso639; + } + + public String getLabel() { + return label; + } + + public void setLabel(final String label) { + this.label = label; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java new file mode 100644 index 000000000..bdc8f4c42 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java @@ -0,0 +1,34 @@ + +package eu.dnetlib.dhp.actionmanager.ror.model; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class License implements Serializable { + + @JsonProperty("attribution") + private String attribution; + + @JsonProperty("license") + private String license; + + private final static long serialVersionUID = -194308261058176439L; + + public String getAttribution() { + return attribution; + } + + public void setAttribution(final String attribution) { + this.attribution = attribution; + } + + public String getLicense() { + return license; + } + + public void setLicense(final String license) { + this.license = license; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java new file mode 100644 index 000000000..61d7eb8e6 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java @@ -0,0 +1,34 @@ + +package eu.dnetlib.dhp.actionmanager.ror.model; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class NameAndCode implements Serializable { + + @JsonProperty("name") + private String name; + + @JsonProperty("code") + private String code; + + private final static long serialVersionUID = 5459836979206140843L; + + public String getName() { + return name; + } + + public void setName(final String name) { + this.name = name; + } + + public String getCode() { + return code; + } + + public void setCode(final String code) { + this.code = code; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java new file mode 100644 index 000000000..8b73db98f --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java @@ -0,0 +1,45 @@ + +package eu.dnetlib.dhp.actionmanager.ror.model; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class Relationship implements Serializable { + + @JsonProperty("type") + private String type; + + @JsonProperty("id") + private String id; + + @JsonProperty("label") + private String label; + + private final static long serialVersionUID = 7847399503395576960L; + + public String getType() { + return type; + } + + public void setType(final String type) { + this.type = type; + } + + public String getId() { + return id; + } + + public void setId(final String id) { + this.id = id; + } + + public String getLabel() { + return label; + } + + public void setLabel(final String label) { + this.label = label; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java new file mode 100644 index 000000000..94de34fee --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java @@ -0,0 +1,192 @@ + +package eu.dnetlib.dhp.actionmanager.ror.model; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class RorOrganization implements Serializable { + + @JsonProperty("ip_addresses") + private List ipAddresses = new ArrayList<>(); + + @JsonProperty("aliases") + private List aliases = new ArrayList<>(); + + @JsonProperty("acronyms") + private List acronyms = new ArrayList<>(); + + @JsonProperty("links") + private List links = new ArrayList<>(); + + @JsonProperty("country") + private Country country; + + @JsonProperty("name") + private String name; + + @JsonProperty("wikipedia_url") + private String wikipediaUrl; + + @JsonProperty("addresses") + private List
addresses = new ArrayList<>(); + + @JsonProperty("types") + private List types = new ArrayList<>(); + + @JsonProperty("established") + private Integer established; + + @JsonProperty("relationships") + private List relationships = new ArrayList<>(); + + @JsonProperty("email_address") + private String emailAddress; + + @JsonProperty("external_ids") + private Map externalIds = new LinkedHashMap<>(); + + @JsonProperty("id") + private String id; + + @JsonProperty("labels") + private List