Merge branch 'master' of code-repo.d4science.org:D-Net/dhp-schemas

This commit is contained in:
Sandro La Bruzzo 2021-07-05 14:36:55 +02:00
commit c0d47b2248
5 changed files with 92 additions and 52 deletions

View File

@ -2,10 +2,11 @@ Description of the project
-------------------------- --------------------------
This project defines **object schemas** of the OpenAIRE main entities and the relationships that intercur among them. This project defines **object schemas** of the OpenAIRE main entities and the relationships that intercur among them.
Namely it defines the model for Namely it defines the model for
- **research product (result)** which subclasses in publication, dataset, other research product, software - the graph internal representation, defined under the package `eu.dnetlib.dhp.schema.oaf`
- **data source** object describing the data provider (institutional repository, aggregators, cris systems) - the public graph dump representations, defined under the package `eu.dnetlib.dhp.schema.dump.oaf`
- **organization** research bodies managing a data source or participating to a research project - the scholexplorer content representation, defined under the package `eu.dnetlib.dhp.schema.sx`
- **project** research project - the contents acquired from the netadata aggregation subsystem, defined under the package `eu.dnetlib.dhp.schema.mdstore`
- the ORCID common schemas, defined under the package `eu.dnetlib.dhp.schema.orcid`
Te serialization of such objects (data store files) are used to pass data between workflow nodes in the processing pipeline. Te serialization of such objects (data store files) are used to pass data between workflow nodes in the processing pipeline.

View File

@ -5,7 +5,7 @@
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-schemas</artifactId> <artifactId>dhp-schemas</artifactId>
<packaging>jar</packaging> <packaging>jar</packaging>
<version>2.6.14-SNAPSHOT</version> <version>2.7.14-SNAPSHOT</version>
<licenses> <licenses>
<license> <license>

View File

@ -55,54 +55,82 @@ public class ModelConstants {
public static final String SOFTWARE_RESULTTYPE_CLASSID = "software"; public static final String SOFTWARE_RESULTTYPE_CLASSID = "software";
public static final String ORP_RESULTTYPE_CLASSID = "other"; public static final String ORP_RESULTTYPE_CLASSID = "other";
public static final String RESULT_RESULT = "resultResult"; public static final String RESULT_RESULT = "resultResult"; // relType
/** /**
* @deprecated Use {@link ModelConstants#RELATIONSHIP} instead. * @deprecated Use {@link ModelConstants#RELATIONSHIP} instead.
*/ */
@Deprecated @Deprecated
public static final String PUBLICATION_DATASET = "publicationDataset"; public static final String PUBLICATION_DATASET = "publicationDataset"; // subreltype
public static final String IS_RELATED_TO = "isRelatedTo";
public static final String SUPPLEMENT = "supplement";
public static final String IS_SUPPLEMENT_TO = "isSupplementTo";
public static final String IS_SUPPLEMENTED_BY = "isSupplementedBy";
public static final String PART = "part";
public static final String IS_PART_OF = "isPartOf";
public static final String HAS_PART = "hasPart";
public static final String RELATIONSHIP = "relationship";
public static final String CITATION = "citation";
public static final String CITES = "cites";
public static final String IS_CITED_BY = "isCitedBy";
public static final String REVIEW = "review"; // subreltype
public static final String REVIEWS = "reviews";
public static final String IS_REVIEWED_BY = "isReviewedBy";
public static final String RESULT_PROJECT = "resultProject"; public static final String SUPPLEMENT = "supplement"; // subreltype
public static final String OUTCOME = "outcome"; public static final String IS_SUPPLEMENT_TO = "IsSupplementTo";
public static final String IS_SUPPLEMENTED_BY = "IsSupplementedBy";
public static final String PART = "part"; // subreltype
public static final String IS_PART_OF = "IsPartOf";
public static final String HAS_PART = "HasPart";
public static final String RELATIONSHIP = "relationship"; // subreltype
public static final String IS_RELATED_TO = "isRelatedTo";
public static final String IS_IDENTICAL_TO = "IsIdenticalTo";
public static final String REFERENCES = "References";
public static final String IS_REFERENCED_BY = "IsReferencedBy";
public static final String CONTINUES = "Continues";
public static final String IS_CONTINUED_BY = "IsContinuedBy";
public static final String DOCUMENTS = "Documents";
public static final String IS_DOCUMENTED_BY = "IsDocumentedBy";
public static final String IS_SOURCE_OF = "IsSourceOf";
public static final String IS_DERIVED_FROM = "IsDerivedFrom";
public static final String COMPILES = "Compiles";
public static final String IS_COMPILED_BY = "IsCompiledBy";
public static final String CITATION = "citation"; // subreltype
public static final String CITES = "Cites";
public static final String IS_CITED_BY = "IsCitedBy";
public static final String REVIEW = "review"; // subreltype
public static final String REVIEWS = "Reviews";
public static final String IS_REVIEWED_BY = "IsReviewedBy";
public static final String VERSION = "version"; // subreltype
public static final String IS_VERSION_OF = "IsVersionOf";
public static final String HAS_VERSION = "HasVersion";
public static final String IS_PREVIOUS_VERSION_OF = "IsPreviousVersionOf";
public static final String IS_NEW_VERSION_OF = "IsNewVersionOf";
public static final String IS_VARIANT_FORM_OF = "IsVariantFormOf";
public static final String IS_ORIGINAL_FORM_OF = "IsOriginalFormOf";
public static final String IS_OBSOLETED_BY = "IsObsoletedBy";
public static final String OBSOLETES = "Obsoletes";
public static final String RESULT_PROJECT = "resultProject"; // relType
public static final String OUTCOME = "outcome"; // subreltype
public static final String IS_PRODUCED_BY = "isProducedBy"; public static final String IS_PRODUCED_BY = "isProducedBy";
public static final String PRODUCES = "produces"; public static final String PRODUCES = "produces";
public static final String DATASOURCE_ORGANIZATION = "datasourceOrganization"; public static final String DATASOURCE_ORGANIZATION = "datasourceOrganization"; // relType
public static final String PROVISION = "provision"; public static final String PROVISION = "provision"; // subreltype
public static final String IS_PROVIDED_BY = "isProvidedBy"; public static final String IS_PROVIDED_BY = "isProvidedBy";
public static final String PROVIDES = "provides"; public static final String PROVIDES = "provides";
public static final String PROJECT_ORGANIZATION = "projectOrganization"; public static final String PROJECT_ORGANIZATION = "projectOrganization"; // relType
public static final String PARTICIPATION = "participation"; public static final String PARTICIPATION = "participation"; // subreltype
public static final String HAS_PARTICIPANT = "hasParticipant"; public static final String HAS_PARTICIPANT = "hasParticipant";
public static final String IS_PARTICIPANT = "isParticipant"; public static final String IS_PARTICIPANT = "isParticipant";
public static final String RESULT_ORGANIZATION = "resultOrganization"; public static final String RESULT_ORGANIZATION = "resultOrganization"; // relType
public static final String AFFILIATION = "affiliation"; public static final String AFFILIATION = "affiliation"; // subreltype
public static final String IS_AUTHOR_INSTITUTION_OF = "isAuthorInstitutionOf"; public static final String IS_AUTHOR_INSTITUTION_OF = "isAuthorInstitutionOf";
public static final String HAS_AUTHOR_INSTITUTION = "hasAuthorInstitution"; public static final String HAS_AUTHOR_INSTITUTION = "hasAuthorInstitution";
public static final String ORG_ORG_RELTYPE = "organizationOrganization"; public static final String ORG_ORG_RELTYPE = "organizationOrganization"; // relType
public static final String DEDUP = "dedup"; public static final String DEDUP = "dedup"; // subreltype
public static final String MERGES = "merges"; public static final String MERGES = "merges";
public static final String IS_MERGED_IN = "isMergedIn"; public static final String IS_MERGED_IN = "isMergedIn";
public static final String SIMILARITY = "similarity"; public static final String SIMILARITY = "similarity"; // subreltype
public static final String IS_SIMILAR_TO = "isSimilarTo"; public static final String IS_SIMILAR_TO = "isSimilarTo";
public static final String IS_DIFFERENT_FROM = "isDifferentFrom"; public static final String IS_DIFFERENT_FROM = "isDifferentFrom";

View File

@ -46,18 +46,27 @@ public class CleaningFunctions {
* @return the PID containing the normalised value. * @return the PID containing the normalised value.
*/ */
public static StructuredProperty normalizePidValue(StructuredProperty pid) { public static StructuredProperty normalizePidValue(StructuredProperty pid) {
String value = Optional pid.setValue(
.ofNullable(pid.getValue()) normalizePidValue(
.map(String::trim) pid.getQualifier().getClassid(),
.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty")); pid.getValue()));
switch (pid.getQualifier().getClassid()) {
// TODO add cleaning for more PID types as needed
case "doi":
pid.setValue(value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX));
break;
}
return pid; return pid;
} }
public static String normalizePidValue(String pidType, String pidValue) {
String value = Optional
.ofNullable(pidValue)
.map(String::trim)
.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
switch (pidType) {
// TODO add cleaning for more PID types as needed
case "doi":
return value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX);
}
return value;
}
} }

View File

@ -164,9 +164,7 @@ public class IdentifierFactory implements Serializable {
.stream() .stream()
// filter away PIDs provided by a DS that is not considered an authority for the // filter away PIDs provided by a DS that is not considered an authority for the
// given PID Type // given PID Type
.filter(p -> { .filter(p -> shouldFilterPid(collectedFrom, p, mapHandles))
return shouldFilterPid(collectedFrom, p, mapHandles);
})
.map(CleaningFunctions::normalizePidValue) .map(CleaningFunctions::normalizePidValue)
.filter(CleaningFunctions::pidFilter)) .filter(CleaningFunctions::pidFilter))
.orElse(Stream.empty()); .orElse(Stream.empty());
@ -193,13 +191,17 @@ public class IdentifierFactory implements Serializable {
} }
private static <T extends OafEntity> String idFromPid(T entity, StructuredProperty s, boolean md5) { private static <T extends OafEntity> String idFromPid(T entity, StructuredProperty s, boolean md5) {
return idFromPid(ModelSupport.getIdPrefix(entity.getClass()), s.getQualifier().getClassid(), s.getValue(), md5);
}
public static String idFromPid(String numericPrefix, String pidType, String pidValue, boolean md5) {
return new StringBuilder() return new StringBuilder()
.append(ModelSupport.getIdPrefix(entity.getClass())) .append(numericPrefix)
.append(ID_PREFIX_SEPARATOR) .append(ID_PREFIX_SEPARATOR)
.append(createPrefix(s.getQualifier().getClassid())) .append(createPrefix(pidType))
.append(ID_SEPARATOR) .append(ID_SEPARATOR)
.append(md5 ? md5(s.getValue()) : s.getValue()) .append(md5 ? md5(pidValue) : pidValue)
.toString(); .toString();
} }
// create the prefix (length = 12) // create the prefix (length = 12)