Merge branch 'master' of code-repo.d4science.org:D-Net/dhp-schemas

This commit is contained in:
Sandro La Bruzzo 2021-07-05 14:36:55 +02:00
commit c0d47b2248
5 changed files with 92 additions and 52 deletions

View File

@ -2,10 +2,11 @@ Description of the project
--------------------------
This project defines **object schemas** of the OpenAIRE main entities and the relationships that intercur among them.
Namely it defines the model for
- **research product (result)** which subclasses in publication, dataset, other research product, software
- **data source** object describing the data provider (institutional repository, aggregators, cris systems)
- **organization** research bodies managing a data source or participating to a research project
- **project** research project
- the graph internal representation, defined under the package `eu.dnetlib.dhp.schema.oaf`
- the public graph dump representations, defined under the package `eu.dnetlib.dhp.schema.dump.oaf`
- the scholexplorer content representation, defined under the package `eu.dnetlib.dhp.schema.sx`
- the contents acquired from the netadata aggregation subsystem, defined under the package `eu.dnetlib.dhp.schema.mdstore`
- the ORCID common schemas, defined under the package `eu.dnetlib.dhp.schema.orcid`
Te serialization of such objects (data store files) are used to pass data between workflow nodes in the processing pipeline.

View File

@ -5,7 +5,7 @@
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-schemas</artifactId>
<packaging>jar</packaging>
<version>2.6.14-SNAPSHOT</version>
<version>2.7.14-SNAPSHOT</version>
<licenses>
<license>

View File

@ -55,54 +55,82 @@ public class ModelConstants {
public static final String SOFTWARE_RESULTTYPE_CLASSID = "software";
public static final String ORP_RESULTTYPE_CLASSID = "other";
public static final String RESULT_RESULT = "resultResult";
public static final String RESULT_RESULT = "resultResult"; // relType
/**
* @deprecated Use {@link ModelConstants#RELATIONSHIP} instead.
*/
@Deprecated
public static final String PUBLICATION_DATASET = "publicationDataset";
public static final String IS_RELATED_TO = "isRelatedTo";
public static final String SUPPLEMENT = "supplement";
public static final String IS_SUPPLEMENT_TO = "isSupplementTo";
public static final String IS_SUPPLEMENTED_BY = "isSupplementedBy";
public static final String PART = "part";
public static final String IS_PART_OF = "isPartOf";
public static final String HAS_PART = "hasPart";
public static final String RELATIONSHIP = "relationship";
public static final String CITATION = "citation";
public static final String CITES = "cites";
public static final String IS_CITED_BY = "isCitedBy";
public static final String REVIEW = "review"; // subreltype
public static final String REVIEWS = "reviews";
public static final String IS_REVIEWED_BY = "isReviewedBy";
public static final String PUBLICATION_DATASET = "publicationDataset"; // subreltype
public static final String RESULT_PROJECT = "resultProject";
public static final String OUTCOME = "outcome";
public static final String SUPPLEMENT = "supplement"; // subreltype
public static final String IS_SUPPLEMENT_TO = "IsSupplementTo";
public static final String IS_SUPPLEMENTED_BY = "IsSupplementedBy";
public static final String PART = "part"; // subreltype
public static final String IS_PART_OF = "IsPartOf";
public static final String HAS_PART = "HasPart";
public static final String RELATIONSHIP = "relationship"; // subreltype
public static final String IS_RELATED_TO = "isRelatedTo";
public static final String IS_IDENTICAL_TO = "IsIdenticalTo";
public static final String REFERENCES = "References";
public static final String IS_REFERENCED_BY = "IsReferencedBy";
public static final String CONTINUES = "Continues";
public static final String IS_CONTINUED_BY = "IsContinuedBy";
public static final String DOCUMENTS = "Documents";
public static final String IS_DOCUMENTED_BY = "IsDocumentedBy";
public static final String IS_SOURCE_OF = "IsSourceOf";
public static final String IS_DERIVED_FROM = "IsDerivedFrom";
public static final String COMPILES = "Compiles";
public static final String IS_COMPILED_BY = "IsCompiledBy";
public static final String CITATION = "citation"; // subreltype
public static final String CITES = "Cites";
public static final String IS_CITED_BY = "IsCitedBy";
public static final String REVIEW = "review"; // subreltype
public static final String REVIEWS = "Reviews";
public static final String IS_REVIEWED_BY = "IsReviewedBy";
public static final String VERSION = "version"; // subreltype
public static final String IS_VERSION_OF = "IsVersionOf";
public static final String HAS_VERSION = "HasVersion";
public static final String IS_PREVIOUS_VERSION_OF = "IsPreviousVersionOf";
public static final String IS_NEW_VERSION_OF = "IsNewVersionOf";
public static final String IS_VARIANT_FORM_OF = "IsVariantFormOf";
public static final String IS_ORIGINAL_FORM_OF = "IsOriginalFormOf";
public static final String IS_OBSOLETED_BY = "IsObsoletedBy";
public static final String OBSOLETES = "Obsoletes";
public static final String RESULT_PROJECT = "resultProject"; // relType
public static final String OUTCOME = "outcome"; // subreltype
public static final String IS_PRODUCED_BY = "isProducedBy";
public static final String PRODUCES = "produces";
public static final String DATASOURCE_ORGANIZATION = "datasourceOrganization";
public static final String PROVISION = "provision";
public static final String DATASOURCE_ORGANIZATION = "datasourceOrganization"; // relType
public static final String PROVISION = "provision"; // subreltype
public static final String IS_PROVIDED_BY = "isProvidedBy";
public static final String PROVIDES = "provides";
public static final String PROJECT_ORGANIZATION = "projectOrganization";
public static final String PARTICIPATION = "participation";
public static final String PROJECT_ORGANIZATION = "projectOrganization"; // relType
public static final String PARTICIPATION = "participation"; // subreltype
public static final String HAS_PARTICIPANT = "hasParticipant";
public static final String IS_PARTICIPANT = "isParticipant";
public static final String RESULT_ORGANIZATION = "resultOrganization";
public static final String AFFILIATION = "affiliation";
public static final String RESULT_ORGANIZATION = "resultOrganization"; // relType
public static final String AFFILIATION = "affiliation"; // subreltype
public static final String IS_AUTHOR_INSTITUTION_OF = "isAuthorInstitutionOf";
public static final String HAS_AUTHOR_INSTITUTION = "hasAuthorInstitution";
public static final String ORG_ORG_RELTYPE = "organizationOrganization";
public static final String ORG_ORG_RELTYPE = "organizationOrganization"; // relType
public static final String DEDUP = "dedup";
public static final String DEDUP = "dedup"; // subreltype
public static final String MERGES = "merges";
public static final String IS_MERGED_IN = "isMergedIn";
public static final String SIMILARITY = "similarity";
public static final String SIMILARITY = "similarity"; // subreltype
public static final String IS_SIMILAR_TO = "isSimilarTo";
public static final String IS_DIFFERENT_FROM = "isDifferentFrom";

View File

@ -46,18 +46,27 @@ public class CleaningFunctions {
* @return the PID containing the normalised value.
*/
public static StructuredProperty normalizePidValue(StructuredProperty pid) {
String value = Optional
.ofNullable(pid.getValue())
.map(String::trim)
.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
switch (pid.getQualifier().getClassid()) {
pid.setValue(
normalizePidValue(
pid.getQualifier().getClassid(),
pid.getValue()));
// TODO add cleaning for more PID types as needed
case "doi":
pid.setValue(value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX));
break;
}
return pid;
}
public static String normalizePidValue(String pidType, String pidValue) {
String value = Optional
.ofNullable(pidValue)
.map(String::trim)
.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
switch (pidType) {
// TODO add cleaning for more PID types as needed
case "doi":
return value.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX);
}
return value;
}
}

View File

@ -164,9 +164,7 @@ public class IdentifierFactory implements Serializable {
.stream()
// filter away PIDs provided by a DS that is not considered an authority for the
// given PID Type
.filter(p -> {
return shouldFilterPid(collectedFrom, p, mapHandles);
})
.filter(p -> shouldFilterPid(collectedFrom, p, mapHandles))
.map(CleaningFunctions::normalizePidValue)
.filter(CleaningFunctions::pidFilter))
.orElse(Stream.empty());
@ -193,13 +191,17 @@ public class IdentifierFactory implements Serializable {
}
private static <T extends OafEntity> String idFromPid(T entity, StructuredProperty s, boolean md5) {
return idFromPid(ModelSupport.getIdPrefix(entity.getClass()), s.getQualifier().getClassid(), s.getValue(), md5);
}
public static String idFromPid(String numericPrefix, String pidType, String pidValue, boolean md5) {
return new StringBuilder()
.append(ModelSupport.getIdPrefix(entity.getClass()))
.append(ID_PREFIX_SEPARATOR)
.append(createPrefix(s.getQualifier().getClassid()))
.append(ID_SEPARATOR)
.append(md5 ? md5(s.getValue()) : s.getValue())
.toString();
.append(numericPrefix)
.append(ID_PREFIX_SEPARATOR)
.append(createPrefix(pidType))
.append(ID_SEPARATOR)
.append(md5 ? md5(pidValue) : pidValue)
.toString();
}
// create the prefix (length = 12)