forked from D-Net/dnet-hadoop
filter ORCID e MAG identifiers
This commit is contained in:
parent
9f2d0f1b08
commit
dc4621b3cb
|
@ -10,7 +10,16 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -50,6 +59,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4";
|
protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4";
|
||||||
protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3";
|
protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3";
|
||||||
|
protected static final Qualifier ORCID_PID_TYPE = qualifier("ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES);
|
||||||
|
protected static final Qualifier MAG_PID_TYPE = qualifier("MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES);
|
||||||
|
|
||||||
protected static final Map<String, String> nsContext = new HashMap<>();
|
protected static final Map<String, String> nsContext = new HashMap<>();
|
||||||
|
|
||||||
|
@ -63,8 +74,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
nsContext.put("datacite", DATACITE_SCHEMA_KERNEL_3);
|
nsContext.put("datacite", DATACITE_SCHEMA_KERNEL_3);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier(
|
protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
|
||||||
"main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
|
|
||||||
|
|
||||||
protected AbstractMdRecordToOafMapper(final Map<String, String> code2name) {
|
protected AbstractMdRecordToOafMapper(final Map<String, String> code2name) {
|
||||||
this.code2name = code2name;
|
this.code2name = code2name;
|
||||||
|
@ -75,24 +85,18 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
|
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
|
||||||
|
|
||||||
final Document doc = DocumentHelper
|
final Document doc = DocumentHelper
|
||||||
.parseText(
|
.parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3));
|
||||||
xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3));
|
|
||||||
|
|
||||||
final String type = doc.valueOf("//dr:CobjCategory/@type");
|
final String type = doc.valueOf("//dr:CobjCategory/@type");
|
||||||
final KeyValue collectedFrom = getProvenanceDatasource(
|
final KeyValue collectedFrom = getProvenanceDatasource(doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
|
||||||
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
|
|
||||||
|
|
||||||
if (collectedFrom == null) {
|
if (collectedFrom == null) { return null; }
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
|
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
|
||||||
? collectedFrom
|
? collectedFrom
|
||||||
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
|
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
|
||||||
|
|
||||||
if (hostedBy == null) {
|
if (hostedBy == null) { return null; }
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
final DataInfo info = prepareDataInfo(doc);
|
final DataInfo info = prepareDataInfo(doc);
|
||||||
final long lastUpdateTimestamp = new Date().getTime();
|
final long lastUpdateTimestamp = new Date().getTime();
|
||||||
|
@ -103,17 +107,13 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private KeyValue getProvenanceDatasource(Document doc, String xpathId, String xpathName) {
|
private KeyValue getProvenanceDatasource(final Document doc, final String xpathId, final String xpathName) {
|
||||||
final String dsId = doc.valueOf(xpathId);
|
final String dsId = doc.valueOf(xpathId);
|
||||||
final String dsName = doc.valueOf(xpathName);
|
final String dsName = doc.valueOf(xpathName);
|
||||||
|
|
||||||
if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) {
|
if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { return null; }
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return keyValue(
|
return keyValue(createOpenaireId(10, dsId, true), dsName);
|
||||||
createOpenaireId(10, dsId, true),
|
|
||||||
dsName);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<Oaf> createOafs(
|
protected List<Oaf> createOafs(
|
||||||
|
@ -127,47 +127,47 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final List<Oaf> oafs = new ArrayList<>();
|
final List<Oaf> oafs = new ArrayList<>();
|
||||||
|
|
||||||
switch (type.toLowerCase()) {
|
switch (type.toLowerCase()) {
|
||||||
case "publication":
|
case "publication":
|
||||||
final Publication p = new Publication();
|
final Publication p = new Publication();
|
||||||
populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||||
p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
|
p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
|
||||||
p.setJournal(prepareJournal(doc, info));
|
p.setJournal(prepareJournal(doc, info));
|
||||||
oafs.add(p);
|
oafs.add(p);
|
||||||
break;
|
break;
|
||||||
case "dataset":
|
case "dataset":
|
||||||
final Dataset d = new Dataset();
|
final Dataset d = new Dataset();
|
||||||
populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||||
d.setResulttype(DATASET_DEFAULT_RESULTTYPE);
|
d.setResulttype(DATASET_DEFAULT_RESULTTYPE);
|
||||||
d.setStoragedate(prepareDatasetStorageDate(doc, info));
|
d.setStoragedate(prepareDatasetStorageDate(doc, info));
|
||||||
d.setDevice(prepareDatasetDevice(doc, info));
|
d.setDevice(prepareDatasetDevice(doc, info));
|
||||||
d.setSize(prepareDatasetSize(doc, info));
|
d.setSize(prepareDatasetSize(doc, info));
|
||||||
d.setVersion(prepareDatasetVersion(doc, info));
|
d.setVersion(prepareDatasetVersion(doc, info));
|
||||||
d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info));
|
d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info));
|
||||||
d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info));
|
d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info));
|
||||||
d.setGeolocation(prepareDatasetGeoLocations(doc, info));
|
d.setGeolocation(prepareDatasetGeoLocations(doc, info));
|
||||||
oafs.add(d);
|
oafs.add(d);
|
||||||
break;
|
break;
|
||||||
case "software":
|
case "software":
|
||||||
final Software s = new Software();
|
final Software s = new Software();
|
||||||
populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||||
s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
|
s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
|
||||||
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
|
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
|
||||||
s.setLicense(prepareSoftwareLicenses(doc, info));
|
s.setLicense(prepareSoftwareLicenses(doc, info));
|
||||||
s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info));
|
s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info));
|
||||||
s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info));
|
s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info));
|
||||||
oafs.add(s);
|
oafs.add(s);
|
||||||
break;
|
break;
|
||||||
case "":
|
case "":
|
||||||
case "otherresearchproducts":
|
case "otherresearchproducts":
|
||||||
default:
|
default:
|
||||||
final OtherResearchProduct o = new OtherResearchProduct();
|
final OtherResearchProduct o = new OtherResearchProduct();
|
||||||
populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||||
o.setResulttype(ORP_DEFAULT_RESULTTYPE);
|
o.setResulttype(ORP_DEFAULT_RESULTTYPE);
|
||||||
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
|
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
|
||||||
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
|
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
|
||||||
o.setTool(prepareOtherResearchProductTools(doc, info));
|
o.setTool(prepareOtherResearchProductTools(doc, info));
|
||||||
oafs.add(o);
|
oafs.add(o);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!oafs.isEmpty()) {
|
if (!oafs.isEmpty()) {
|
||||||
|
@ -196,23 +196,23 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final String projectId = createOpenaireId(40, originalId, true);
|
final String projectId = createOpenaireId(40, originalId, true);
|
||||||
|
|
||||||
res
|
res
|
||||||
.add(
|
.add(getRelation(docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, lastUpdateTimestamp));
|
||||||
getRelation(
|
|
||||||
docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info,
|
|
||||||
lastUpdateTimestamp));
|
|
||||||
res
|
res
|
||||||
.add(
|
.add(getRelation(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, lastUpdateTimestamp));
|
||||||
getRelation(
|
|
||||||
projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info,
|
|
||||||
lastUpdateTimestamp));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Relation getRelation(String source, String target, String relType, String subRelType, String relClass,
|
protected Relation getRelation(final String source,
|
||||||
KeyValue collectedFrom, DataInfo info, long lastUpdateTimestamp) {
|
final String target,
|
||||||
|
final String relType,
|
||||||
|
final String subRelType,
|
||||||
|
final String relClass,
|
||||||
|
final KeyValue collectedFrom,
|
||||||
|
final DataInfo info,
|
||||||
|
final long lastUpdateTimestamp) {
|
||||||
final Relation rel = new Relation();
|
final Relation rel = new Relation();
|
||||||
rel.setRelType(relType);
|
rel.setRelType(relType);
|
||||||
rel.setSubRelType(subRelType);
|
rel.setSubRelType(subRelType);
|
||||||
|
@ -244,9 +244,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier")));
|
r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier")));
|
||||||
r.setCollectedfrom(Arrays.asList(collectedFrom));
|
r.setCollectedfrom(Arrays.asList(collectedFrom));
|
||||||
r
|
r
|
||||||
.setPid(
|
.setPid(prepareListStructProps(doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info));
|
||||||
prepareListStructProps(
|
|
||||||
doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info));
|
|
||||||
r.setDateofcollection(doc.valueOf("//dr:dateOfCollection"));
|
r.setDateofcollection(doc.valueOf("//dr:dateOfCollection"));
|
||||||
r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation"));
|
r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation"));
|
||||||
r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
||||||
|
@ -289,7 +287,10 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected abstract Qualifier prepareResourceType(Document doc, DataInfo info);
|
protected abstract Qualifier prepareResourceType(Document doc, DataInfo info);
|
||||||
|
|
||||||
protected abstract List<Instance> prepareInstances(
|
protected abstract List<Instance> prepareInstances(
|
||||||
Document doc, DataInfo info, KeyValue collectedfrom, KeyValue hostedby);
|
Document doc,
|
||||||
|
DataInfo info,
|
||||||
|
KeyValue collectedfrom,
|
||||||
|
KeyValue hostedby);
|
||||||
|
|
||||||
protected abstract List<Field<String>> prepareSources(Document doc, DataInfo info);
|
protected abstract List<Field<String>> prepareSources(Document doc, DataInfo info);
|
||||||
|
|
||||||
|
@ -314,13 +315,16 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected abstract List<Author> prepareAuthors(Document doc, DataInfo info);
|
protected abstract List<Author> prepareAuthors(Document doc, DataInfo info);
|
||||||
|
|
||||||
protected abstract List<Field<String>> prepareOtherResearchProductTools(
|
protected abstract List<Field<String>> prepareOtherResearchProductTools(
|
||||||
Document doc, DataInfo info);
|
Document doc,
|
||||||
|
DataInfo info);
|
||||||
|
|
||||||
protected abstract List<Field<String>> prepareOtherResearchProductContactGroups(
|
protected abstract List<Field<String>> prepareOtherResearchProductContactGroups(
|
||||||
Document doc, DataInfo info);
|
Document doc,
|
||||||
|
DataInfo info);
|
||||||
|
|
||||||
protected abstract List<Field<String>> prepareOtherResearchProductContactPersons(
|
protected abstract List<Field<String>> prepareOtherResearchProductContactPersons(
|
||||||
Document doc, DataInfo info);
|
Document doc,
|
||||||
|
DataInfo info);
|
||||||
|
|
||||||
protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info);
|
protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info);
|
||||||
|
|
||||||
|
@ -329,7 +333,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected abstract List<StructuredProperty> prepareSoftwareLicenses(Document doc, DataInfo info);
|
protected abstract List<StructuredProperty> prepareSoftwareLicenses(Document doc, DataInfo info);
|
||||||
|
|
||||||
protected abstract List<Field<String>> prepareSoftwareDocumentationUrls(
|
protected abstract List<Field<String>> prepareSoftwareDocumentationUrls(
|
||||||
Document doc, DataInfo info);
|
Document doc,
|
||||||
|
DataInfo info);
|
||||||
|
|
||||||
protected abstract List<GeoLocation> prepareDatasetGeoLocations(Document doc, DataInfo info);
|
protected abstract List<GeoLocation> prepareDatasetGeoLocations(Document doc, DataInfo info);
|
||||||
|
|
||||||
|
@ -357,27 +362,16 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final String sp = n.valueOf("@sp");
|
final String sp = n.valueOf("@sp");
|
||||||
final String vol = n.valueOf("@vol");
|
final String vol = n.valueOf("@vol");
|
||||||
final String edition = n.valueOf("@edition");
|
final String edition = n.valueOf("@edition");
|
||||||
if (StringUtils.isNotBlank(name)) {
|
if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); }
|
||||||
return journal(
|
|
||||||
name,
|
|
||||||
issnPrinted,
|
|
||||||
issnOnline,
|
|
||||||
issnLinking,
|
|
||||||
ep,
|
|
||||||
iss,
|
|
||||||
sp,
|
|
||||||
vol,
|
|
||||||
edition,
|
|
||||||
null,
|
|
||||||
null,
|
|
||||||
info);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Qualifier prepareQualifier(
|
protected Qualifier prepareQualifier(
|
||||||
final Node node, final String xpath, final String schemeId, final String schemeName) {
|
final Node node,
|
||||||
|
final String xpath,
|
||||||
|
final String schemeId,
|
||||||
|
final String schemeName) {
|
||||||
final String classId = node.valueOf(xpath);
|
final String classId = node.valueOf(xpath);
|
||||||
final String className = code2name.get(classId);
|
final String className = code2name.get(classId);
|
||||||
return qualifier(classId, className, schemeId, schemeName);
|
return qualifier(classId, className, schemeId, schemeName);
|
||||||
|
@ -401,7 +395,10 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<StructuredProperty> prepareListStructProps(
|
protected List<StructuredProperty> prepareListStructProps(
|
||||||
final Node node, final String xpath, final Qualifier qualifier, final DataInfo info) {
|
final Node node,
|
||||||
|
final String xpath,
|
||||||
|
final Qualifier qualifier,
|
||||||
|
final DataInfo info) {
|
||||||
final List<StructuredProperty> res = new ArrayList<>();
|
final List<StructuredProperty> res = new ArrayList<>();
|
||||||
for (final Object o : node.selectNodes(xpath)) {
|
for (final Object o : node.selectNodes(xpath)) {
|
||||||
final Node n = (Node) o;
|
final Node n = (Node) o;
|
||||||
|
@ -411,19 +408,14 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<StructuredProperty> prepareListStructProps(
|
protected List<StructuredProperty> prepareListStructProps(
|
||||||
final Node node, final String xpath, final DataInfo info) {
|
final Node node,
|
||||||
|
final String xpath,
|
||||||
|
final DataInfo info) {
|
||||||
final List<StructuredProperty> res = new ArrayList<>();
|
final List<StructuredProperty> res = new ArrayList<>();
|
||||||
for (final Object o : node.selectNodes(xpath)) {
|
for (final Object o : node.selectNodes(xpath)) {
|
||||||
final Node n = (Node) o;
|
final Node n = (Node) o;
|
||||||
res
|
res
|
||||||
.add(
|
.add(structuredProperty(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n.valueOf("@schemename"), info));
|
||||||
structuredProperty(
|
|
||||||
n.getText(),
|
|
||||||
n.valueOf("@classid"),
|
|
||||||
n.valueOf("@classname"),
|
|
||||||
n.valueOf("@schemeid"),
|
|
||||||
n.valueOf("@schemename"),
|
|
||||||
info));
|
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -431,9 +423,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected OAIProvenance prepareOAIprovenance(final Document doc) {
|
protected OAIProvenance prepareOAIprovenance(final Document doc) {
|
||||||
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
||||||
|
|
||||||
if (n == null) {
|
if (n == null) { return null; }
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
final String identifier = n.valueOf("./*[local-name()='identifier']");
|
final String identifier = n.valueOf("./*[local-name()='identifier']");
|
||||||
final String baseURL = n.valueOf("./*[local-name()='baseURL']");
|
final String baseURL = n.valueOf("./*[local-name()='baseURL']");
|
||||||
|
@ -448,10 +438,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected DataInfo prepareDataInfo(final Document doc) {
|
protected DataInfo prepareDataInfo(final Document doc) {
|
||||||
final Node n = doc.selectSingleNode("//oaf:datainfo");
|
final Node n = doc.selectSingleNode("//oaf:datainfo");
|
||||||
|
|
||||||
if (n == null) {
|
if (n == null) { return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); }
|
||||||
return dataInfo(
|
|
||||||
false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9");
|
|
||||||
}
|
|
||||||
|
|
||||||
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
|
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
|
||||||
final String paClassName = n.valueOf("./oaf:provenanceaction/@classname");
|
final String paClassName = n.valueOf("./oaf:provenanceaction/@classname");
|
||||||
|
@ -463,13 +450,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred"));
|
final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred"));
|
||||||
final String trust = n.valueOf("./oaf:trust");
|
final String trust = n.valueOf("./oaf:trust");
|
||||||
|
|
||||||
return dataInfo(
|
return dataInfo(deletedbyinference, inferenceprovenance, inferred, false, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust);
|
||||||
deletedbyinference,
|
|
||||||
inferenceprovenance,
|
|
||||||
inferred,
|
|
||||||
false,
|
|
||||||
qualifier(paClassId, paClassName, paSchemeId, paSchemeName),
|
|
||||||
trust);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Field<String> prepareField(final Node node, final String xpath, final DataInfo info) {
|
protected Field<String> prepareField(final Node node, final String xpath, final DataInfo info) {
|
||||||
|
@ -477,7 +458,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<Field<String>> prepareListFields(
|
protected List<Field<String>> prepareListFields(
|
||||||
final Node node, final String xpath, final DataInfo info) {
|
final Node node,
|
||||||
|
final String xpath,
|
||||||
|
final DataInfo info) {
|
||||||
return listFields(info, prepareListString(node, xpath));
|
return listFields(info, prepareListString(node, xpath));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,19 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DATASET;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
@ -15,8 +24,15 @@ import org.dom4j.Node;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
|
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
|
@ -39,14 +55,22 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
author.setSurname(p.getNormalisedSurname());
|
author.setSurname(p.getNormalisedSurname());
|
||||||
}
|
}
|
||||||
|
|
||||||
final String pid = e.attributeValue("nameIdentifier");
|
final String pid = e.valueOf("./@nameIdentifier");
|
||||||
final String pidType = e.attributeValue("nameIdentifierScheme");
|
final String type = e.valueOf("./@nameIdentifierScheme")
|
||||||
|
.trim()
|
||||||
|
.toUpperCase()
|
||||||
|
.replaceAll(" ", "")
|
||||||
|
.replaceAll("_", "");
|
||||||
|
|
||||||
author.setPid(new ArrayList<>());
|
author.setPid(new ArrayList<>());
|
||||||
if (StringUtils.isNotBlank(pid) && StringUtils.isNotBlank(pidType)) {
|
|
||||||
author
|
if (StringUtils.isNotBlank(pid)) {
|
||||||
.getPid()
|
if (type.startsWith("ORCID")) {
|
||||||
.add(structuredProperty(pid, qualifier(pidType, pidType, DNET_PID_TYPES, DNET_PID_TYPES), info));
|
final String cleanedId = pid.replaceAll("http://orcid.org/", "").replaceAll("https://orcid.org/", "");
|
||||||
|
author.getPid().add(structuredProperty(cleanedId, ORCID_PID_TYPE, info));
|
||||||
|
} else if (type.startsWith("MAGID")) {
|
||||||
|
author.getPid().add(structuredProperty(pid, MAG_PID_TYPE, info));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
res.add(author);
|
res.add(author);
|
||||||
|
@ -103,38 +127,29 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
final Instance instance = new Instance();
|
final Instance instance = new Instance();
|
||||||
instance
|
instance
|
||||||
.setInstancetype(
|
.setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
|
||||||
prepareQualifier(
|
|
||||||
doc,
|
|
||||||
"//dr:CobjCategory",
|
|
||||||
DNET_PUBLICATION_RESOURCE,
|
|
||||||
DNET_PUBLICATION_RESOURCE));
|
|
||||||
instance.setCollectedfrom(collectedfrom);
|
instance.setCollectedfrom(collectedfrom);
|
||||||
instance.setHostedby(hostedby);
|
instance.setHostedby(hostedby);
|
||||||
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
||||||
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
|
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
|
||||||
instance
|
instance
|
||||||
.setAccessright(
|
.setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES));
|
||||||
prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES));
|
|
||||||
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
||||||
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
||||||
instance
|
instance
|
||||||
.setProcessingchargeamount(
|
.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
||||||
field(doc.valueOf("//oaf:processingchargeamount"), info));
|
|
||||||
instance
|
instance
|
||||||
.setProcessingchargecurrency(
|
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||||
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
|
||||||
|
|
||||||
List<Node> nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier"));
|
final List<Node> nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier"));
|
||||||
instance
|
instance
|
||||||
.setUrl(
|
.setUrl(nodes
|
||||||
nodes
|
.stream()
|
||||||
.stream()
|
.filter(n -> StringUtils.isNotBlank(n.getText()))
|
||||||
.filter(n -> StringUtils.isNotBlank(n.getText()))
|
.map(n -> n.getText().trim())
|
||||||
.map(n -> n.getText().trim())
|
.filter(u -> u.startsWith("http"))
|
||||||
.filter(u -> u.startsWith("http"))
|
.distinct()
|
||||||
.distinct()
|
.collect(Collectors.toCollection(ArrayList::new)));
|
||||||
.collect(Collectors.toCollection(ArrayList::new)));
|
|
||||||
|
|
||||||
return Lists.newArrayList(instance);
|
return Lists.newArrayList(instance);
|
||||||
}
|
}
|
||||||
|
@ -158,19 +173,22 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Field<String> prepareSoftwareCodeRepositoryUrl(
|
protected Field<String> prepareSoftwareCodeRepositoryUrl(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return null; // NOT PRESENT IN OAF
|
return null; // NOT PRESENT IN OAF
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<StructuredProperty> prepareSoftwareLicenses(
|
protected List<StructuredProperty> prepareSoftwareLicenses(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return new ArrayList<>(); // NOT PRESENT IN OAF
|
return new ArrayList<>(); // NOT PRESENT IN OAF
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareSoftwareDocumentationUrls(
|
protected List<Field<String>> prepareSoftwareDocumentationUrls(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return new ArrayList<>(); // NOT PRESENT IN OAF
|
return new ArrayList<>(); // NOT PRESENT IN OAF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -182,13 +200,15 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Field<String> prepareDatasetMetadataVersionNumber(
|
protected Field<String> prepareDatasetMetadataVersionNumber(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return null; // NOT PRESENT IN OAF
|
return null; // NOT PRESENT IN OAF
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Field<String> prepareDatasetLastMetadataUpdate(
|
protected Field<String> prepareDatasetLastMetadataUpdate(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return null; // NOT PRESENT IN OAF
|
return null; // NOT PRESENT IN OAF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -216,19 +236,22 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareOtherResearchProductTools(
|
protected List<Field<String>> prepareOtherResearchProductTools(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return new ArrayList<>(); // NOT PRESENT IN OAF
|
return new ArrayList<>(); // NOT PRESENT IN OAF
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareOtherResearchProductContactGroups(
|
protected List<Field<String>> prepareOtherResearchProductContactGroups(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return new ArrayList<>(); // NOT PRESENT IN OAF
|
return new ArrayList<>(); // NOT PRESENT IN OAF
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareOtherResearchProductContactPersons(
|
protected List<Field<String>> prepareOtherResearchProductContactPersons(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return new ArrayList<>(); // NOT PRESENT IN OAF
|
return new ArrayList<>(); // NOT PRESENT IN OAF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -251,15 +274,9 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
final String otherId = createOpenaireId(50, originalId, false);
|
final String otherId = createOpenaireId(50, originalId, false);
|
||||||
|
|
||||||
res
|
res
|
||||||
.add(
|
.add(getRelation(docId, otherId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, lastUpdateTimestamp));
|
||||||
getRelation(
|
|
||||||
docId, otherId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info,
|
|
||||||
lastUpdateTimestamp));
|
|
||||||
res
|
res
|
||||||
.add(
|
.add(getRelation(otherId, docId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, lastUpdateTimestamp));
|
||||||
getRelation(
|
|
||||||
otherId, docId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info,
|
|
||||||
lastUpdateTimestamp));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
|
|
@ -3,13 +3,11 @@ package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_DATE;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_DATE;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_RESOURCE;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_RESOURCE;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTS;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTS;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PART_OF;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PART_OF;
|
||||||
|
@ -45,9 +43,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
public static final String HTTP_DX_DOI_PREIFX = "http://dx.doi.org/";
|
public static final String HTTP_DX_DOI_PREIFX = "http://dx.doi.org/";
|
||||||
|
|
||||||
public static final Qualifier ORCID_PID_TYPE = qualifier("ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES);
|
|
||||||
public static final Qualifier MAG_PID_TYPE = qualifier("MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES);
|
|
||||||
|
|
||||||
public OdfToOafMapper(final Map<String, String> code2name) {
|
public OdfToOafMapper(final Map<String, String> code2name) {
|
||||||
super(code2name);
|
super(code2name);
|
||||||
}
|
}
|
||||||
|
|
|
@ -75,7 +75,7 @@ public class MappersTest {
|
||||||
.get();
|
.get();
|
||||||
assertEquals("0000-0001-6651-1178", pid.getValue());
|
assertEquals("0000-0001-6651-1178", pid.getValue());
|
||||||
assertEquals("ORCID", pid.getQualifier().getClassid());
|
assertEquals("ORCID", pid.getQualifier().getClassid());
|
||||||
assertEquals("ORCID", pid.getQualifier().getClassname());
|
assertEquals("Open Researcher and Contributor ID", pid.getQualifier().getClassname());
|
||||||
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
|
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
|
||||||
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
|
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
|
||||||
assertEquals("Votsi,Nefta", author.get().getFullname());
|
assertEquals("Votsi,Nefta", author.get().getFullname());
|
||||||
|
|
Loading…
Reference in New Issue