From 1f7fe39c3f2f2418621c1ac393b1bdb8677a2536 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 1 Jul 2021 16:57:57 +0200 Subject: [PATCH 1/4] Update 'README.md' --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7431cda..52fe9a4 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,11 @@ Description of the project -------------------------- This project defines **object schemas** of the OpenAIRE main entities and the relationships that intercur among them. Namely it defines the model for - -- **research product (result)** which subclasses in publication, dataset, other research product, software -- **data source** object describing the data provider (institutional repository, aggregators, cris systems) -- **organization** research bodies managing a data source or participating to a research project -- **project** research project + +- the graph internal representation, defined under the package `eu.dnetlib.dhp.schema.oaf` +- the public graph dump representations, defined under the package `eu.dnetlib.dhp.schema.dump.oaf` +- the scholexplorer content representation, defined under the package `eu.dnetlib.dhp.schema.sx` +- the contents acquired from the netadata aggregation subsystem, defined under the package `eu.dnetlib.dhp.schema.mdstore` +- the ORCID common schemas, defined under the package `eu.dnetlib.dhp.schema.orcid` Te serialization of such objects (data store files) are used to pass data between workflow nodes in the processing pipeline. From fc913472a1408efffaa3350716e5250b5893d585 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 5 Jul 2021 11:11:45 +0200 Subject: [PATCH 2/4] code formatting --- .../eu/dnetlib/dhp/schema/sx/scholix/ScholixResource.java | 6 ++---- .../eu/dnetlib/dhp/schema/sx/summary/ScholixSummary.java | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixResource.java b/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixResource.java index 8665cb8..e173e2a 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixResource.java +++ b/src/main/java/eu/dnetlib/dhp/schema/sx/scholix/ScholixResource.java @@ -1,15 +1,13 @@ package eu.dnetlib.dhp.schema.sx.scholix; - - -import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary; - import java.io.Serializable; import java.util.Collections; import java.util.List; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary; + public class ScholixResource implements Serializable { private List identifier; diff --git a/src/main/java/eu/dnetlib/dhp/schema/sx/summary/ScholixSummary.java b/src/main/java/eu/dnetlib/dhp/schema/sx/summary/ScholixSummary.java index fc146b2..6eb9300 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/sx/summary/ScholixSummary.java +++ b/src/main/java/eu/dnetlib/dhp/schema/sx/summary/ScholixSummary.java @@ -1,10 +1,11 @@ package eu.dnetlib.dhp.schema.sx.summary; -import com.fasterxml.jackson.annotation.JsonProperty; import java.io.Serializable; import java.util.List; +import com.fasterxml.jackson.annotation.JsonProperty; + public class ScholixSummary implements Serializable { private String id; private List localIdentifier; From a7472744ecdf80f4f07d7ba6c338282a9a3911d2 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 5 Jul 2021 11:12:44 +0200 Subject: [PATCH 3/4] added resultResult relation types, inspired by Datacite --- pom.xml | 2 +- .../dhp/schema/common/ModelConstants.java | 82 +++++++++++++------ 2 files changed, 56 insertions(+), 28 deletions(-) diff --git a/pom.xml b/pom.xml index ed53320..be36f83 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ eu.dnetlib.dhp dhp-schemas jar - 2.6.14-SNAPSHOT + 2.7.14-SNAPSHOT diff --git a/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java index ddb80ee..b5535fa 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java +++ b/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java @@ -55,54 +55,82 @@ public class ModelConstants { public static final String SOFTWARE_RESULTTYPE_CLASSID = "software"; public static final String ORP_RESULTTYPE_CLASSID = "other"; - public static final String RESULT_RESULT = "resultResult"; + public static final String RESULT_RESULT = "resultResult"; // relType /** * @deprecated Use {@link ModelConstants#RELATIONSHIP} instead. */ @Deprecated - public static final String PUBLICATION_DATASET = "publicationDataset"; - public static final String IS_RELATED_TO = "isRelatedTo"; - public static final String SUPPLEMENT = "supplement"; - public static final String IS_SUPPLEMENT_TO = "isSupplementTo"; - public static final String IS_SUPPLEMENTED_BY = "isSupplementedBy"; - public static final String PART = "part"; - public static final String IS_PART_OF = "isPartOf"; - public static final String HAS_PART = "hasPart"; - public static final String RELATIONSHIP = "relationship"; - public static final String CITATION = "citation"; - public static final String CITES = "cites"; - public static final String IS_CITED_BY = "isCitedBy"; - public static final String REVIEW = "review"; // subreltype - public static final String REVIEWS = "reviews"; - public static final String IS_REVIEWED_BY = "isReviewedBy"; + public static final String PUBLICATION_DATASET = "publicationDataset"; // subreltype - public static final String RESULT_PROJECT = "resultProject"; - public static final String OUTCOME = "outcome"; + public static final String SUPPLEMENT = "supplement"; // subreltype + public static final String IS_SUPPLEMENT_TO = "IsSupplementTo"; + public static final String IS_SUPPLEMENTED_BY = "IsSupplementedBy"; + + public static final String PART = "part"; // subreltype + public static final String IS_PART_OF = "IsPartOf"; + public static final String HAS_PART = "HasPart"; + + public static final String RELATIONSHIP = "relationship"; // subreltype + + public static final String IS_RELATED_TO = "isRelatedTo"; + public static final String IS_IDENTICAL_TO = "IsIdenticalTo"; + + public static final String REFERENCES = "References"; + public static final String IS_REFERENCED_BY = "IsReferencedBy"; + public static final String CONTINUES = "Continues"; + public static final String IS_CONTINUED_BY = "IsContinuedBy"; + public static final String DOCUMENTS = "Documents"; + public static final String IS_DOCUMENTED_BY = "IsDocumentedBy"; + public static final String IS_SOURCE_OF = "IsSourceOf"; + public static final String IS_DERIVED_FROM = "IsDerivedFrom"; + public static final String COMPILES = "Compiles"; + public static final String IS_COMPILED_BY = "IsCompiledBy"; + + public static final String CITATION = "citation"; // subreltype + public static final String CITES = "Cites"; + public static final String IS_CITED_BY = "IsCitedBy"; + + public static final String REVIEW = "review"; // subreltype + public static final String REVIEWS = "Reviews"; + public static final String IS_REVIEWED_BY = "IsReviewedBy"; + + public static final String VERSION = "version"; // subreltype + public static final String IS_VERSION_OF = "IsVersionOf"; + public static final String HAS_VERSION = "HasVersion"; + public static final String IS_PREVIOUS_VERSION_OF = "IsPreviousVersionOf"; + public static final String IS_NEW_VERSION_OF = "IsNewVersionOf"; + public static final String IS_VARIANT_FORM_OF = "IsVariantFormOf"; + public static final String IS_ORIGINAL_FORM_OF = "IsOriginalFormOf"; + public static final String IS_OBSOLETED_BY = "IsObsoletedBy"; + public static final String OBSOLETES = "Obsoletes"; + + public static final String RESULT_PROJECT = "resultProject"; // relType + public static final String OUTCOME = "outcome"; // subreltype public static final String IS_PRODUCED_BY = "isProducedBy"; public static final String PRODUCES = "produces"; - public static final String DATASOURCE_ORGANIZATION = "datasourceOrganization"; - public static final String PROVISION = "provision"; + public static final String DATASOURCE_ORGANIZATION = "datasourceOrganization"; // relType + public static final String PROVISION = "provision"; // subreltype public static final String IS_PROVIDED_BY = "isProvidedBy"; public static final String PROVIDES = "provides"; - public static final String PROJECT_ORGANIZATION = "projectOrganization"; - public static final String PARTICIPATION = "participation"; + public static final String PROJECT_ORGANIZATION = "projectOrganization"; // relType + public static final String PARTICIPATION = "participation"; // subreltype public static final String HAS_PARTICIPANT = "hasParticipant"; public static final String IS_PARTICIPANT = "isParticipant"; - public static final String RESULT_ORGANIZATION = "resultOrganization"; - public static final String AFFILIATION = "affiliation"; + public static final String RESULT_ORGANIZATION = "resultOrganization"; // relType + public static final String AFFILIATION = "affiliation"; // subreltype public static final String IS_AUTHOR_INSTITUTION_OF = "isAuthorInstitutionOf"; public static final String HAS_AUTHOR_INSTITUTION = "hasAuthorInstitution"; - public static final String ORG_ORG_RELTYPE = "organizationOrganization"; + public static final String ORG_ORG_RELTYPE = "organizationOrganization"; // relType - public static final String DEDUP = "dedup"; + public static final String DEDUP = "dedup"; // subreltype public static final String MERGES = "merges"; public static final String IS_MERGED_IN = "isMergedIn"; - public static final String SIMILARITY = "similarity"; + public static final String SIMILARITY = "similarity"; // subreltype public static final String IS_SIMILAR_TO = "isSimilarTo"; public static final String IS_DIFFERENT_FROM = "isDifferentFrom"; From b0203ff5ccd29e834047effd050abf758c8f1720 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 5 Jul 2021 14:35:26 +0200 Subject: [PATCH 4/4] added more cleaning/identifier utilities --- .../schema/oaf/utils/CleaningFunctions.java | 29 ++++++++++++------- .../schema/oaf/utils/IdentifierFactory.java | 20 +++++++------ 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java index 56ee75a..183214c 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java @@ -46,18 +46,27 @@ public class CleaningFunctions { * @return the PID containing the normalised value. */ public static StructuredProperty normalizePidValue(StructuredProperty pid) { - String value = Optional - .ofNullable(pid.getValue()) - .map(String::trim) - .orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty")); - switch (pid.getQualifier().getClassid()) { + pid.setValue( + normalizePidValue( + pid.getQualifier().getClassid(), + pid.getValue())); - // TODO add cleaning for more PID types as needed - case "doi": - pid.setValue(value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX)); - break; - } return pid; } + public static String normalizePidValue(String pidType, String pidValue) { + String value = Optional + .ofNullable(pidValue) + .map(String::trim) + .orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty")); + + switch (pidType) { + + // TODO add cleaning for more PID types as needed + case "doi": + return value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX); + } + return value; + } + } diff --git a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java index 43cdbc1..d0baec5 100644 --- a/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java +++ b/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java @@ -164,9 +164,7 @@ public class IdentifierFactory implements Serializable { .stream() // filter away PIDs provided by a DS that is not considered an authority for the // given PID Type - .filter(p -> { - return shouldFilterPid(collectedFrom, p, mapHandles); - }) + .filter(p -> shouldFilterPid(collectedFrom, p, mapHandles)) .map(CleaningFunctions::normalizePidValue) .filter(CleaningFunctions::pidFilter)) .orElse(Stream.empty()); @@ -193,13 +191,17 @@ public class IdentifierFactory implements Serializable { } private static String idFromPid(T entity, StructuredProperty s, boolean md5) { + return idFromPid(ModelSupport.getIdPrefix(entity.getClass()), s.getQualifier().getClassid(), s.getValue(), md5); + } + + public static String idFromPid(String numericPrefix, String pidType, String pidValue, boolean md5) { return new StringBuilder() - .append(ModelSupport.getIdPrefix(entity.getClass())) - .append(ID_PREFIX_SEPARATOR) - .append(createPrefix(s.getQualifier().getClassid())) - .append(ID_SEPARATOR) - .append(md5 ? md5(s.getValue()) : s.getValue()) - .toString(); + .append(numericPrefix) + .append(ID_PREFIX_SEPARATOR) + .append(createPrefix(pidType)) + .append(ID_SEPARATOR) + .append(md5 ? md5(pidValue) : pidValue) + .toString(); } // create the prefix (length = 12)