integrated changes from master

This commit is contained in:
Claudio Atzori 2020-04-28 08:55:28 +02:00
commit ac25f2d8d1
3 changed files with 925 additions and 913 deletions

View File

@ -1,4 +1,3 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
@ -44,422 +43,426 @@ import org.dom4j.Node;
public abstract class AbstractMdRecordToOafMapper { public abstract class AbstractMdRecordToOafMapper {
protected final Map<String, String> code2name; protected final Map<String, String> code2name;
protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier( protected static final Qualifier MAIN_TITLE_QUALIFIER =
"main title", "main title", qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
"dnet:dataCite_title", "dnet:dataCite_title");
protected AbstractMdRecordToOafMapper(final Map<String, String> code2name) {
protected AbstractMdRecordToOafMapper(final Map<String, String> code2name) { this.code2name = code2name;
this.code2name = code2name; }
}
public List<Oaf> processMdRecord(final String xml) {
public List<Oaf> processMdRecord(final String xml) { try {
try { final Map<String, String> nsContext = new HashMap<>();
final Map<String, String> nsContext = new HashMap<>(); nsContext.put("dr", "http://www.driver-repository.eu/namespace/dr");
nsContext.put("dr", "http://www.driver-repository.eu/namespace/dr"); nsContext.put("dri", "http://www.driver-repository.eu/namespace/dri");
nsContext.put("dri", "http://www.driver-repository.eu/namespace/dri"); nsContext.put("oaf", "http://namespace.openaire.eu/oaf");
nsContext.put("oaf", "http://namespace.openaire.eu/oaf"); nsContext.put("oai", "http://www.openarchives.org/OAI/2.0/");
nsContext.put("oai", "http://www.openarchives.org/OAI/2.0/"); nsContext.put("prov", "http://www.openarchives.org/OAI/2.0/provenance");
nsContext.put("prov", "http://www.openarchives.org/OAI/2.0/provenance"); nsContext.put("dc", "http://purl.org/dc/elements/1.1/");
nsContext.put("dc", "http://purl.org/dc/elements/1.1/"); nsContext.put("datacite", "http://datacite.org/schema/kernel-3");
nsContext.put("datacite", "http://datacite.org/schema/kernel-3"); DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
final Document doc =
final Document doc = DocumentHelper DocumentHelper.parseText(
.parseText( xml.replaceAll(
xml "http://datacite.org/schema/kernel-4", "http://datacite.org/schema/kernel-3"));
.replaceAll(
"http://datacite.org/schema/kernel-4", "http://datacite.org/schema/kernel-3")); final String type = doc.valueOf("//dr:CobjCategory/@type");
final KeyValue collectedFrom =
final String type = doc.valueOf("//dr:CobjCategory/@type"); keyValue(
final KeyValue collectedFrom = keyValue( createOpenaireId(10, doc.valueOf("//oaf:collectedFrom/@id"), true),
createOpenaireId(10, doc.valueOf("//oaf:collectedFrom/@id"), true), doc.valueOf("//oaf:collectedFrom/@name"));
doc.valueOf("//oaf:collectedFrom/@name")); final KeyValue hostedBy =
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
? collectedFrom ? collectedFrom
: keyValue( : keyValue(
createOpenaireId(10, doc.valueOf("//oaf:hostedBy/@id"), true), createOpenaireId(10, doc.valueOf("//oaf:hostedBy/@id"), true),
doc.valueOf("//oaf:hostedBy/@name")); doc.valueOf("//oaf:hostedBy/@name"));
final DataInfo info = prepareDataInfo(doc); final DataInfo info = prepareDataInfo(doc);
final long lastUpdateTimestamp = new Date().getTime(); final long lastUpdateTimestamp = new Date().getTime();
return createOafs(doc, type, collectedFrom, hostedBy, info, lastUpdateTimestamp); return createOafs(doc, type, collectedFrom, hostedBy, info, lastUpdateTimestamp);
} catch (final Exception e) { } catch (final Exception e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
protected List<Oaf> createOafs( protected List<Oaf> createOafs(
final Document doc, final Document doc,
final String type, final String type,
final KeyValue collectedFrom, final KeyValue collectedFrom,
final KeyValue hostedBy, final KeyValue hostedBy,
final DataInfo info, final DataInfo info,
final long lastUpdateTimestamp) { final long lastUpdateTimestamp) {
final List<Oaf> oafs = new ArrayList<>(); final List<Oaf> oafs = new ArrayList<>();
switch (type.toLowerCase()) { switch (type.toLowerCase()) {
case "": case "":
case "publication": case "publication":
final Publication p = new Publication(); final Publication p = new Publication();
populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
p.setResulttype(MigrationConstants.PUBLICATION_RESULTTYPE_QUALIFIER); p.setResulttype(MigrationConstants.PUBLICATION_RESULTTYPE_QUALIFIER);
p.setJournal(prepareJournal(doc, info)); p.setJournal(prepareJournal(doc, info));
oafs.add(p); oafs.add(p);
break; break;
case "dataset": case "dataset":
final Dataset d = new Dataset(); final Dataset d = new Dataset();
populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
d.setResulttype(MigrationConstants.DATASET_RESULTTYPE_QUALIFIER); d.setResulttype(MigrationConstants.DATASET_RESULTTYPE_QUALIFIER);
d.setStoragedate(prepareDatasetStorageDate(doc, info)); d.setStoragedate(prepareDatasetStorageDate(doc, info));
d.setDevice(prepareDatasetDevice(doc, info)); d.setDevice(prepareDatasetDevice(doc, info));
d.setSize(prepareDatasetSize(doc, info)); d.setSize(prepareDatasetSize(doc, info));
d.setVersion(prepareDatasetVersion(doc, info)); d.setVersion(prepareDatasetVersion(doc, info));
d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info));
d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info));
d.setGeolocation(prepareDatasetGeoLocations(doc, info)); d.setGeolocation(prepareDatasetGeoLocations(doc, info));
oafs.add(d); oafs.add(d);
break; break;
case "software": case "software":
final Software s = new Software(); final Software s = new Software();
populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
s.setResulttype(MigrationConstants.SOFTWARE_RESULTTYPE_QUALIFIER); s.setResulttype(MigrationConstants.SOFTWARE_RESULTTYPE_QUALIFIER);
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
s.setLicense(prepareSoftwareLicenses(doc, info)); s.setLicense(prepareSoftwareLicenses(doc, info));
s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info));
s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info));
oafs.add(s); oafs.add(s);
break; break;
case "otherresearchproducts": case "otherresearchproducts":
default: default:
final OtherResearchProduct o = new OtherResearchProduct(); final OtherResearchProduct o = new OtherResearchProduct();
populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp); populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
o.setResulttype(MigrationConstants.OTHER_RESULTTYPE_QUALIFIER); o.setResulttype(MigrationConstants.OTHER_RESULTTYPE_QUALIFIER);
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
o.setTool(prepareOtherResearchProductTools(doc, info)); o.setTool(prepareOtherResearchProductTools(doc, info));
oafs.add(o); oafs.add(o);
break; break;
} }
if (!oafs.isEmpty()) { if (!oafs.isEmpty()) {
oafs.addAll(addProjectRels(doc, collectedFrom, info, lastUpdateTimestamp)); oafs.addAll(addProjectRels(doc, collectedFrom, info, lastUpdateTimestamp));
oafs.addAll(addOtherResultRels(doc, collectedFrom, info, lastUpdateTimestamp)); oafs.addAll(addOtherResultRels(doc, collectedFrom, info, lastUpdateTimestamp));
} }
return oafs; return oafs;
} }
private List<Oaf> addProjectRels( private List<Oaf> addProjectRels(
final Document doc, final Document doc,
final KeyValue collectedFrom, final KeyValue collectedFrom,
final DataInfo info, final DataInfo info,
final long lastUpdateTimestamp) { final long lastUpdateTimestamp) {
final List<Oaf> res = new ArrayList<>(); final List<Oaf> res = new ArrayList<>();
final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false); final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false);
for (final Object o : doc.selectNodes("//oaf:projectid")) { for (final Object o : doc.selectNodes("//oaf:projectid")) {
final String projectId = createOpenaireId(40, ((Node) o).getText(), true);
final String originalId = ((Node) o).getText();
final Relation r1 = new Relation();
r1.setRelType("resultProject"); if (StringUtils.isNotBlank(originalId)) {
r1.setSubRelType("outcome"); final String projectId = createOpenaireId(40, originalId, true);
r1.setRelClass("isProducedBy");
r1.setSource(docId); final Relation r1 = new Relation();
r1.setTarget(projectId); r1.setRelType("resultProject");
r1.setCollectedfrom(Arrays.asList(collectedFrom)); r1.setSubRelType("outcome");
r1.setDataInfo(info); r1.setRelClass("isProducedBy");
r1.setLastupdatetimestamp(lastUpdateTimestamp); r1.setSource(docId);
res.add(r1); r1.setTarget(projectId);
r1.setCollectedfrom(Arrays.asList(collectedFrom));
final Relation r2 = new Relation(); r1.setDataInfo(info);
r2.setRelType("resultProject"); r1.setLastupdatetimestamp(lastUpdateTimestamp);
r2.setSubRelType("outcome"); res.add(r1);
r2.setRelClass("produces");
r2.setSource(projectId); final Relation r2 = new Relation();
r2.setTarget(docId); r2.setRelType("resultProject");
r2.setCollectedfrom(Arrays.asList(collectedFrom)); r2.setSubRelType("outcome");
r2.setDataInfo(info); r2.setRelClass("produces");
r2.setLastupdatetimestamp(lastUpdateTimestamp); r2.setSource(projectId);
res.add(r2); r2.setTarget(docId);
} r2.setCollectedfrom(Arrays.asList(collectedFrom));
r2.setDataInfo(info);
return res; r2.setLastupdatetimestamp(lastUpdateTimestamp);
} res.add(r2);
}
protected abstract List<Oaf> addOtherResultRels( }
final Document doc,
final KeyValue collectedFrom, return res;
final DataInfo info, }
final long lastUpdateTimestamp);
protected abstract List<Oaf> addOtherResultRels(
private void populateResultFields( final Document doc,
final Result r, final KeyValue collectedFrom,
final Document doc, final DataInfo info,
final KeyValue collectedFrom, final long lastUpdateTimestamp);
final KeyValue hostedBy,
final DataInfo info, private void populateResultFields(
final long lastUpdateTimestamp) { final Result r,
r.setDataInfo(info); final Document doc,
r.setLastupdatetimestamp(lastUpdateTimestamp); final KeyValue collectedFrom,
r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false)); final KeyValue hostedBy,
r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier"))); final DataInfo info,
r.setCollectedfrom(Arrays.asList(collectedFrom)); final long lastUpdateTimestamp) {
r r.setDataInfo(info);
.setPid( r.setLastupdatetimestamp(lastUpdateTimestamp);
prepareListStructProps( r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false));
doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info)); r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier")));
r.setDateofcollection(doc.valueOf("//dr:dateOfCollection")); r.setCollectedfrom(Arrays.asList(collectedFrom));
r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation")); r.setPid(
r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES prepareListStructProps(
r.setOaiprovenance(prepareOAIprovenance(doc)); doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info));
r.setAuthor(prepareAuthors(doc, info)); r.setDateofcollection(doc.valueOf("//dr:dateOfCollection"));
r.setLanguage(prepareLanguages(doc)); r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation"));
r.setCountry(new ArrayList<>()); // NOT PRESENT IN MDSTORES r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES
r.setSubject(prepareSubjects(doc, info)); r.setOaiprovenance(prepareOAIprovenance(doc));
r.setTitle(prepareTitles(doc, info)); r.setAuthor(prepareAuthors(doc, info));
r.setRelevantdate(prepareRelevantDates(doc, info)); r.setLanguage(prepareLanguages(doc));
r.setDescription(prepareDescriptions(doc, info)); r.setCountry(new ArrayList<>()); // NOT PRESENT IN MDSTORES
r.setDateofacceptance(prepareField(doc, "//oaf:dateAccepted", info)); r.setSubject(prepareSubjects(doc, info));
r.setPublisher(preparePublisher(doc, info)); r.setTitle(prepareTitles(doc, info));
r.setEmbargoenddate(prepareField(doc, "//oaf:embargoenddate", info)); r.setRelevantdate(prepareRelevantDates(doc, info));
r.setSource(prepareSources(doc, info)); r.setDescription(prepareDescriptions(doc, info));
r.setFulltext(new ArrayList<>()); // NOT PRESENT IN MDSTORES r.setDateofacceptance(prepareField(doc, "//oaf:dateAccepted", info));
r.setFormat(prepareFormats(doc, info)); r.setPublisher(preparePublisher(doc, info));
r.setContributor(prepareContributors(doc, info)); r.setEmbargoenddate(prepareField(doc, "//oaf:embargoenddate", info));
r.setResourcetype(prepareResourceType(doc, info)); r.setSource(prepareSources(doc, info));
r.setCoverage(prepareCoverages(doc, info)); r.setFulltext(new ArrayList<>()); // NOT PRESENT IN MDSTORES
r.setContext(prepareContexts(doc, info)); r.setFormat(prepareFormats(doc, info));
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES r.setContributor(prepareContributors(doc, info));
r.setInstance(prepareInstances(doc, info, collectedFrom, hostedBy)); r.setResourcetype(prepareResourceType(doc, info));
} r.setCoverage(prepareCoverages(doc, info));
r.setContext(prepareContexts(doc, info));
private List<Context> prepareContexts(final Document doc, final DataInfo info) { r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
final List<Context> list = new ArrayList<>(); r.setInstance(prepareInstances(doc, info, collectedFrom, hostedBy));
for (final Object o : doc.selectNodes("//oaf:concept")) { }
final String cid = ((Node) o).valueOf("@id");
if (StringUtils.isNotBlank(cid)) { private List<Context> prepareContexts(final Document doc, final DataInfo info) {
final Context c = new Context(); final List<Context> list = new ArrayList<>();
c.setId(cid); for (final Object o : doc.selectNodes("//oaf:concept")) {
c.setDataInfo(Arrays.asList(info)); final String cid = ((Node) o).valueOf("@id");
list.add(c); if (StringUtils.isNotBlank(cid)) {
} final Context c = new Context();
} c.setId(cid);
return list; c.setDataInfo(Arrays.asList(info));
} list.add(c);
}
protected abstract Qualifier prepareResourceType(Document doc, DataInfo info); }
return list;
protected abstract List<Instance> prepareInstances( }
Document doc, DataInfo info, KeyValue collectedfrom, KeyValue hostedby);
protected abstract Qualifier prepareResourceType(Document doc, DataInfo info);
protected abstract List<Field<String>> prepareSources(Document doc, DataInfo info);
protected abstract List<Instance> prepareInstances(
protected abstract List<StructuredProperty> prepareRelevantDates(Document doc, DataInfo info); Document doc, DataInfo info, KeyValue collectedfrom, KeyValue hostedby);
protected abstract List<Field<String>> prepareCoverages(Document doc, DataInfo info); protected abstract List<Field<String>> prepareSources(Document doc, DataInfo info);
protected abstract List<Field<String>> prepareContributors(Document doc, DataInfo info); protected abstract List<StructuredProperty> prepareRelevantDates(Document doc, DataInfo info);
protected abstract List<Field<String>> prepareFormats(Document doc, DataInfo info); protected abstract List<Field<String>> prepareCoverages(Document doc, DataInfo info);
protected abstract Field<String> preparePublisher(Document doc, DataInfo info); protected abstract List<Field<String>> prepareContributors(Document doc, DataInfo info);
protected abstract List<Field<String>> prepareDescriptions(Document doc, DataInfo info); protected abstract List<Field<String>> prepareFormats(Document doc, DataInfo info);
protected abstract List<StructuredProperty> prepareTitles(Document doc, DataInfo info); protected abstract Field<String> preparePublisher(Document doc, DataInfo info);
protected abstract List<StructuredProperty> prepareSubjects(Document doc, DataInfo info); protected abstract List<Field<String>> prepareDescriptions(Document doc, DataInfo info);
protected abstract Qualifier prepareLanguages(Document doc); protected abstract List<StructuredProperty> prepareTitles(Document doc, DataInfo info);
protected abstract List<Author> prepareAuthors(Document doc, DataInfo info); protected abstract List<StructuredProperty> prepareSubjects(Document doc, DataInfo info);
protected abstract List<Field<String>> prepareOtherResearchProductTools( protected abstract Qualifier prepareLanguages(Document doc);
Document doc, DataInfo info);
protected abstract List<Author> prepareAuthors(Document doc, DataInfo info);
protected abstract List<Field<String>> prepareOtherResearchProductContactGroups(
Document doc, DataInfo info); protected abstract List<Field<String>> prepareOtherResearchProductTools(
Document doc, DataInfo info);
protected abstract List<Field<String>> prepareOtherResearchProductContactPersons(
Document doc, DataInfo info); protected abstract List<Field<String>> prepareOtherResearchProductContactGroups(
Document doc, DataInfo info);
protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info);
protected abstract List<Field<String>> prepareOtherResearchProductContactPersons(
protected abstract Field<String> prepareSoftwareCodeRepositoryUrl(Document doc, DataInfo info); Document doc, DataInfo info);
protected abstract List<StructuredProperty> prepareSoftwareLicenses(Document doc, DataInfo info); protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info);
protected abstract List<Field<String>> prepareSoftwareDocumentationUrls( protected abstract Field<String> prepareSoftwareCodeRepositoryUrl(Document doc, DataInfo info);
Document doc, DataInfo info);
protected abstract List<StructuredProperty> prepareSoftwareLicenses(Document doc, DataInfo info);
protected abstract List<GeoLocation> prepareDatasetGeoLocations(Document doc, DataInfo info);
protected abstract List<Field<String>> prepareSoftwareDocumentationUrls(
protected abstract Field<String> prepareDatasetMetadataVersionNumber(Document doc, DataInfo info); Document doc, DataInfo info);
protected abstract Field<String> prepareDatasetLastMetadataUpdate(Document doc, DataInfo info); protected abstract List<GeoLocation> prepareDatasetGeoLocations(Document doc, DataInfo info);
protected abstract Field<String> prepareDatasetVersion(Document doc, DataInfo info); protected abstract Field<String> prepareDatasetMetadataVersionNumber(Document doc, DataInfo info);
protected abstract Field<String> prepareDatasetSize(Document doc, DataInfo info); protected abstract Field<String> prepareDatasetLastMetadataUpdate(Document doc, DataInfo info);
protected abstract Field<String> prepareDatasetDevice(Document doc, DataInfo info); protected abstract Field<String> prepareDatasetVersion(Document doc, DataInfo info);
protected abstract Field<String> prepareDatasetStorageDate(Document doc, DataInfo info); protected abstract Field<String> prepareDatasetSize(Document doc, DataInfo info);
private Journal prepareJournal(final Document doc, final DataInfo info) { protected abstract Field<String> prepareDatasetDevice(Document doc, DataInfo info);
final Node n = doc.selectSingleNode("//oaf:journal");
if (n != null) { protected abstract Field<String> prepareDatasetStorageDate(Document doc, DataInfo info);
final String name = n.getText();
final String issnPrinted = n.valueOf("@issn"); private Journal prepareJournal(final Document doc, final DataInfo info) {
final String issnOnline = n.valueOf("@eissn"); final Node n = doc.selectSingleNode("//oaf:journal");
final String issnLinking = n.valueOf("@lissn"); if (n != null) {
final String ep = n.valueOf("@ep"); final String name = n.getText();
final String iss = n.valueOf("@iss"); final String issnPrinted = n.valueOf("@issn");
final String sp = n.valueOf("@sp"); final String issnOnline = n.valueOf("@eissn");
final String vol = n.valueOf("@vol"); final String issnLinking = n.valueOf("@lissn");
final String edition = n.valueOf("@edition"); final String ep = n.valueOf("@ep");
if (StringUtils.isNotBlank(name)) { final String iss = n.valueOf("@iss");
return journal( final String sp = n.valueOf("@sp");
name, final String vol = n.valueOf("@vol");
issnPrinted, final String edition = n.valueOf("@edition");
issnOnline, if (StringUtils.isNotBlank(name)) {
issnLinking, return journal(
ep, name,
iss, issnPrinted,
sp, issnOnline,
vol, issnLinking,
edition, ep,
null, iss,
null, sp,
info); vol,
} edition,
} null,
return null; null,
} info);
}
protected Qualifier prepareQualifier( }
final Node node, final String xpath, final String schemeId, final String schemeName) { return null;
final String classId = node.valueOf(xpath); }
final String className = code2name.get(classId);
return qualifier(classId, className, schemeId, schemeName); protected Qualifier prepareQualifier(
} final Node node, final String xpath, final String schemeId, final String schemeName) {
final String classId = node.valueOf(xpath);
protected List<StructuredProperty> prepareListStructProps( final String className = code2name.get(classId);
final Node node, return qualifier(classId, className, schemeId, schemeName);
final String xpath, }
final String xpathClassId,
final String schemeId, protected List<StructuredProperty> prepareListStructProps(
final String schemeName, final Node node,
final DataInfo info) { final String xpath,
final List<StructuredProperty> res = new ArrayList<>(); final String xpathClassId,
for (final Object o : node.selectNodes(xpath)) { final String schemeId,
final Node n = (Node) o; final String schemeName,
final String classId = n.valueOf(xpathClassId); final DataInfo info) {
final String className = code2name.get(classId); final List<StructuredProperty> res = new ArrayList<>();
res.add(structuredProperty(n.getText(), classId, className, schemeId, schemeName, info)); for (final Object o : node.selectNodes(xpath)) {
} final Node n = (Node) o;
return res; final String classId = n.valueOf(xpathClassId);
} final String className = code2name.get(classId);
res.add(structuredProperty(n.getText(), classId, className, schemeId, schemeName, info));
protected List<StructuredProperty> prepareListStructProps( }
final Node node, final String xpath, final Qualifier qualifier, final DataInfo info) { return res;
final List<StructuredProperty> res = new ArrayList<>(); }
for (final Object o : node.selectNodes(xpath)) {
final Node n = (Node) o; protected List<StructuredProperty> prepareListStructProps(
res.add(structuredProperty(n.getText(), qualifier, info)); final Node node, final String xpath, final Qualifier qualifier, final DataInfo info) {
} final List<StructuredProperty> res = new ArrayList<>();
return res; for (final Object o : node.selectNodes(xpath)) {
} final Node n = (Node) o;
res.add(structuredProperty(n.getText(), qualifier, info));
protected List<StructuredProperty> prepareListStructProps( }
final Node node, final String xpath, final DataInfo info) { return res;
final List<StructuredProperty> res = new ArrayList<>(); }
for (final Object o : node.selectNodes(xpath)) {
final Node n = (Node) o; protected List<StructuredProperty> prepareListStructProps(
res final Node node, final String xpath, final DataInfo info) {
.add( final List<StructuredProperty> res = new ArrayList<>();
structuredProperty( for (final Object o : node.selectNodes(xpath)) {
n.getText(), final Node n = (Node) o;
n.valueOf("@classid"), res.add(
n.valueOf("@classname"), structuredProperty(
n.valueOf("@schemeid"), n.getText(),
n.valueOf("@schemename"), n.valueOf("@classid"),
info)); n.valueOf("@classname"),
} n.valueOf("@schemeid"),
return res; n.valueOf("@schemename"),
} info));
}
protected OAIProvenance prepareOAIprovenance(final Document doc) { return res;
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']"); }
if (n == null) { protected OAIProvenance prepareOAIprovenance(final Document doc) {
return null; final Node n =
} doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
final String identifier = n.valueOf("./*[local-name()='identifier']"); if (n == null) {
final String baseURL = n.valueOf("./*[local-name()='baseURL']"); return null;
; }
final String metadataNamespace = n.valueOf("./*[local-name()='metadataNamespace']");
; final String identifier = n.valueOf("./*[local-name()='identifier']");
final boolean altered = n.valueOf("@altered").equalsIgnoreCase("true"); final String baseURL = n.valueOf("./*[local-name()='baseURL']");
final String datestamp = n.valueOf("./*[local-name()='datestamp']"); ;
; final String metadataNamespace = n.valueOf("./*[local-name()='metadataNamespace']");
final String harvestDate = n.valueOf("@harvestDate"); ;
; final boolean altered = n.valueOf("@altered").equalsIgnoreCase("true");
final String datestamp = n.valueOf("./*[local-name()='datestamp']");
return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate); ;
} final String harvestDate = n.valueOf("@harvestDate");
;
protected DataInfo prepareDataInfo(final Document doc) {
final Node n = doc.selectSingleNode("//oaf:datainfo"); return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate);
}
if (n == null) {
return dataInfo( protected DataInfo prepareDataInfo(final Document doc) {
false, null, false, false, MigrationConstants.REPOSITORY_PROVENANCE_ACTIONS, "0.9"); final Node n = doc.selectSingleNode("//oaf:datainfo");
}
if (n == null) {
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); return dataInfo(
final String paClassName = n.valueOf("./oaf:provenanceaction/@classname"); false, null, false, false, MigrationConstants.REPOSITORY_PROVENANCE_ACTIONS, "0.9");
final String paSchemeId = n.valueOf("./oaf:provenanceaction/@schemeid"); }
final String paSchemeName = n.valueOf("./oaf:provenanceaction/@schemename");
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
final boolean deletedbyinference = Boolean.parseBoolean(n.valueOf("./oaf:deletedbyinference")); final String paClassName = n.valueOf("./oaf:provenanceaction/@classname");
final String inferenceprovenance = n.valueOf("./oaf:inferenceprovenance"); final String paSchemeId = n.valueOf("./oaf:provenanceaction/@schemeid");
final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred")); final String paSchemeName = n.valueOf("./oaf:provenanceaction/@schemename");
final String trust = n.valueOf("./oaf:trust");
final boolean deletedbyinference = Boolean.parseBoolean(n.valueOf("./oaf:deletedbyinference"));
return dataInfo( final String inferenceprovenance = n.valueOf("./oaf:inferenceprovenance");
deletedbyinference, final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred"));
inferenceprovenance, final String trust = n.valueOf("./oaf:trust");
inferred,
false, return dataInfo(
qualifier(paClassId, paClassName, paSchemeId, paSchemeName), deletedbyinference,
trust); inferenceprovenance,
} inferred,
false,
protected Field<String> prepareField(final Node node, final String xpath, final DataInfo info) { qualifier(paClassId, paClassName, paSchemeId, paSchemeName),
return field(node.valueOf(xpath), info); trust);
} }
protected List<Field<String>> prepareListFields( protected Field<String> prepareField(final Node node, final String xpath, final DataInfo info) {
final Node node, final String xpath, final DataInfo info) { return field(node.valueOf(xpath), info);
return listFields(info, prepareListString(node, xpath)); }
}
protected List<Field<String>> prepareListFields(
protected List<String> prepareListString(final Node node, final String xpath) { final Node node, final String xpath, final DataInfo info) {
final List<String> res = new ArrayList<>(); return listFields(info, prepareListString(node, xpath));
for (final Object o : node.selectNodes(xpath)) { }
final String s = ((Node) o).getText().trim();
if (StringUtils.isNotBlank(s)) { protected List<String> prepareListString(final Node node, final String xpath) {
res.add(s); final List<String> res = new ArrayList<>();
} for (final Object o : node.selectNodes(xpath)) {
} final String s = ((Node) o).getText().trim();
return res; if (StringUtils.isNotBlank(s)) {
} res.add(s);
}
}
return res;
}
} }

View File

@ -1,256 +1,267 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.Node; import org.dom4j.Node;
public class OafToOafMapper extends AbstractMdRecordToOafMapper { public class OafToOafMapper extends AbstractMdRecordToOafMapper {
public OafToOafMapper(final Map<String, String> code2name) { public OafToOafMapper(final Map<String, String> code2name) {
super(code2name); super(code2name);
} }
@Override @Override
protected List<Author> prepareAuthors(final Document doc, final DataInfo info) { protected List<Author> prepareAuthors(final Document doc, final DataInfo info) {
final List<Author> res = new ArrayList<>(); final List<Author> res = new ArrayList<>();
int pos = 1; int pos = 1;
for (final Object o : doc.selectNodes("//dc:creator")) { for (final Object o : doc.selectNodes("//dc:creator")) {
final Node n = (Node) o; final Node n = (Node) o;
final Author author = new Author(); final Author author = new Author();
author.setFullname(n.getText()); author.setFullname(n.getText());
author.setRank(pos++); author.setRank(pos++);
final PacePerson p = new PacePerson(n.getText(), false); final PacePerson p = new PacePerson(n.getText(), false);
if (p.isAccurate()) { if (p.isAccurate()) {
author.setName(p.getNormalisedFirstName()); author.setName(p.getNormalisedFirstName());
author.setSurname(p.getNormalisedSurname()); author.setSurname(p.getNormalisedSurname());
} }
res.add(author); res.add(author);
} }
return res; return res;
} }
@Override @Override
protected Qualifier prepareLanguages(final Document doc) { protected Qualifier prepareLanguages(final Document doc) {
return prepareQualifier(doc, "//dc:language", "dnet:languages", "dnet:languages"); return prepareQualifier(doc, "//dc:language", "dnet:languages", "dnet:languages");
} }
@Override @Override
protected List<StructuredProperty> prepareSubjects(final Document doc, final DataInfo info) { protected List<StructuredProperty> prepareSubjects(final Document doc, final DataInfo info) {
return prepareListStructProps(doc, "//dc:subject", info); return prepareListStructProps(doc, "//dc:subject", info);
} }
@Override @Override
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) { protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
return prepareListStructProps(doc, "//dc:title", MAIN_TITLE_QUALIFIER, info); return prepareListStructProps(doc, "//dc:title", MAIN_TITLE_QUALIFIER, info);
} }
@Override @Override
protected List<Field<String>> prepareDescriptions(final Document doc, final DataInfo info) { protected List<Field<String>> prepareDescriptions(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//dc:description", info); return prepareListFields(doc, "//dc:description", info);
} }
@Override @Override
protected Field<String> preparePublisher(final Document doc, final DataInfo info) { protected Field<String> preparePublisher(final Document doc, final DataInfo info) {
return prepareField(doc, "//dc:publisher", info); return prepareField(doc, "//dc:publisher", info);
} }
@Override @Override
protected List<Field<String>> prepareFormats(final Document doc, final DataInfo info) { protected List<Field<String>> prepareFormats(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//dc:format", info); return prepareListFields(doc, "//dc:format", info);
} }
@Override @Override
protected List<Field<String>> prepareContributors(final Document doc, final DataInfo info) { protected List<Field<String>> prepareContributors(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//dc:contributor", info); return prepareListFields(doc, "//dc:contributor", info);
} }
@Override @Override
protected List<Field<String>> prepareCoverages(final Document doc, final DataInfo info) { protected List<Field<String>> prepareCoverages(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//dc:coverage", info); return prepareListFields(doc, "//dc:coverage", info);
} }
@Override @Override
protected List<Instance> prepareInstances( protected List<Instance> prepareInstances(
final Document doc, final Document doc,
final DataInfo info, final DataInfo info,
final KeyValue collectedfrom, final KeyValue collectedfrom,
final KeyValue hostedby) { final KeyValue hostedby) {
final List<Instance> res = new ArrayList<>(); final List<Instance> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//dc:identifier")) { for (final Object o : doc.selectNodes("//dc:identifier")) {
final String url = ((Node) o).getText().trim(); final String url = ((Node) o).getText().trim();
if (url.startsWith("http")) { if (url.startsWith("http")) {
final Instance instance = new Instance(); final Instance instance = new Instance();
instance.setUrl(Arrays.asList(url)); instance.setUrl(Arrays.asList(url));
instance instance.setInstancetype(
.setInstancetype( prepareQualifier(
prepareQualifier( doc,
doc, "//dr:CobjCategory",
"//dr:CobjCategory", "dnet:publication_resource",
"dnet:publication_resource", "dnet:publication_resource"));
"dnet:publication_resource")); instance.setCollectedfrom(collectedfrom);
instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby);
instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); instance.setAccessright(
instance prepareQualifier(doc, "//oaf:accessrights", "dnet:access_modes", "dnet:access_modes"));
.setAccessright( instance.setLicense(field(doc.valueOf("//oaf:license"), info));
prepareQualifier(doc, "//oaf:accessrights", "dnet:access_modes", "dnet:access_modes")); instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
instance.setLicense(field(doc.valueOf("//oaf:license"), info)); instance.setProcessingchargeamount(
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info)); field(doc.valueOf("//oaf:processingchargeamount"), info));
instance instance.setProcessingchargecurrency(
.setProcessingchargeamount( field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
field(doc.valueOf("//oaf:processingchargeamount"), info)); res.add(instance);
instance }
.setProcessingchargecurrency( }
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); return res;
res.add(instance); }
}
}
return res;
}
@Override @Override
protected List<Field<String>> prepareSources(final Document doc, final DataInfo info) { protected List<Field<String>> prepareSources(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//dc:source", info); return prepareListFields(doc, "//dc:source", info);
} }
@Override @Override
protected List<StructuredProperty> prepareRelevantDates(final Document doc, final DataInfo info) { protected List<StructuredProperty> prepareRelevantDates(final Document doc, final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
// SOFTWARES // SOFTWARES
@Override @Override
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@Override @Override
protected Field<String> prepareSoftwareCodeRepositoryUrl( protected Field<String> prepareSoftwareCodeRepositoryUrl(
final Document doc, final DataInfo info) { final Document doc, final DataInfo info) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@Override @Override
protected List<StructuredProperty> prepareSoftwareLicenses( protected List<StructuredProperty> prepareSoftwareLicenses(
final Document doc, final DataInfo info) { final Document doc, final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
@Override @Override
protected List<Field<String>> prepareSoftwareDocumentationUrls( protected List<Field<String>> prepareSoftwareDocumentationUrls(
final Document doc, final DataInfo info) { final Document doc, final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
// DATASETS // DATASETS
@Override @Override
protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc, final DataInfo info) { protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc, final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
@Override @Override
protected Field<String> prepareDatasetMetadataVersionNumber( protected Field<String> prepareDatasetMetadataVersionNumber(
final Document doc, final DataInfo info) { final Document doc, final DataInfo info) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@Override @Override
protected Field<String> prepareDatasetLastMetadataUpdate( protected Field<String> prepareDatasetLastMetadataUpdate(
final Document doc, final DataInfo info) { final Document doc, final DataInfo info) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@Override @Override
protected Field<String> prepareDatasetVersion(final Document doc, final DataInfo info) { protected Field<String> prepareDatasetVersion(final Document doc, final DataInfo info) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@Override @Override
protected Field<String> prepareDatasetSize(final Document doc, final DataInfo info) { protected Field<String> prepareDatasetSize(final Document doc, final DataInfo info) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@Override @Override
protected Field<String> prepareDatasetDevice(final Document doc, final DataInfo info) { protected Field<String> prepareDatasetDevice(final Document doc, final DataInfo info) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@Override @Override
protected Field<String> prepareDatasetStorageDate(final Document doc, final DataInfo info) { protected Field<String> prepareDatasetStorageDate(final Document doc, final DataInfo info) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
// OTHER PRODUCTS // OTHER PRODUCTS
@Override @Override
protected List<Field<String>> prepareOtherResearchProductTools( protected List<Field<String>> prepareOtherResearchProductTools(
final Document doc, final DataInfo info) { final Document doc, final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
@Override @Override
protected List<Field<String>> prepareOtherResearchProductContactGroups( protected List<Field<String>> prepareOtherResearchProductContactGroups(
final Document doc, final DataInfo info) { final Document doc, final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
@Override @Override
protected List<Field<String>> prepareOtherResearchProductContactPersons( protected List<Field<String>> prepareOtherResearchProductContactPersons(
final Document doc, final DataInfo info) { final Document doc, final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
@Override @Override
protected List<Oaf> addOtherResultRels( protected List<Oaf> addOtherResultRels(
final Document doc, final Document doc,
final KeyValue collectedFrom, final KeyValue collectedFrom,
final DataInfo info, final DataInfo info,
final long lastUpdateTimestamp) { final long lastUpdateTimestamp) {
final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false); final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false);
final List<Oaf> res = new ArrayList<>(); final List<Oaf> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//*[local-name()='relatedDataset']")) { for (final Object o : doc.selectNodes("//*[local-name()='relatedDataset']")) {
final String otherId = createOpenaireId(50, ((Node) o).getText(), false);
final Relation r1 = new Relation(); final String originalId = ((Node) o).getText();
r1.setRelType("resultResult");
r1.setSubRelType("publicationDataset");
r1.setRelClass("isRelatedTo");
r1.setSource(docId);
r1.setTarget(otherId);
r1.setCollectedfrom(Arrays.asList(collectedFrom));
r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp);
res.add(r1);
final Relation r2 = new Relation(); if (StringUtils.isNotBlank(originalId)) {
r2.setRelType("resultResult");
r2.setSubRelType("publicationDataset");
r2.setRelClass("isRelatedTo");
r2.setSource(otherId);
r2.setTarget(docId);
r2.setCollectedfrom(Arrays.asList(collectedFrom));
r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp);
res.add(r2);
}
return res;
}
@Override final String otherId = createOpenaireId(50, originalId, false);
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
return null; // NOT PRESENT IN OAF final Relation r1 = new Relation();
} r1.setRelType("resultResult");
r1.setSubRelType("publicationDataset");
r1.setRelClass("isRelatedTo");
r1.setSource(docId);
r1.setTarget(otherId);
r1.setCollectedfrom(Arrays.asList(collectedFrom));
r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp);
res.add(r1);
final Relation r2 = new Relation();
r2.setRelType("resultResult");
r2.setSubRelType("publicationDataset");
r2.setRelClass("isRelatedTo");
r2.setSource(otherId);
r2.setTarget(docId);
r2.setCollectedfrom(Arrays.asList(collectedFrom));
r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp);
res.add(r2);
}
}
return res;
}
@Override
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
return null; // NOT PRESENT IN OAF
}
} }

View File

@ -1,4 +1,3 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
@ -25,329 +24,328 @@ import org.dom4j.Node;
public class OdfToOafMapper extends AbstractMdRecordToOafMapper { public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
public OdfToOafMapper(final Map<String, String> code2name) { public OdfToOafMapper(final Map<String, String> code2name) {
super(code2name); super(code2name);
} }
@Override @Override
protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) { protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
return prepareListStructProps(doc, "//datacite:title", MAIN_TITLE_QUALIFIER, info); return prepareListStructProps(doc, "//datacite:title", MAIN_TITLE_QUALIFIER, info);
} }
@Override @Override
protected List<Author> prepareAuthors(final Document doc, final DataInfo info) { protected List<Author> prepareAuthors(final Document doc, final DataInfo info) {
final List<Author> res = new ArrayList<>(); final List<Author> res = new ArrayList<>();
int pos = 1; int pos = 1;
for (final Object o : doc.selectNodes("//datacite:creator")) { for (final Object o : doc.selectNodes("//datacite:creator")) {
final Node n = (Node) o; final Node n = (Node) o;
final Author author = new Author(); final Author author = new Author();
author.setFullname(n.valueOf("./datacite:creatorName")); author.setFullname(n.valueOf("./datacite:creatorName"));
author.setName(n.valueOf("./datacite:givenName")); author.setName(n.valueOf("./datacite:givenName"));
author.setSurname(n.valueOf("./datacite:familyName")); author.setSurname(n.valueOf("./datacite:familyName"));
author.setAffiliation(prepareListFields(doc, "./datacite:affiliation", info)); author.setAffiliation(prepareListFields(doc, "./datacite:affiliation", info));
author.setPid(preparePids(doc, info)); author.setPid(preparePids(doc, info));
author.setRank(pos++); author.setRank(pos++);
res.add(author); res.add(author);
} }
return res; return res;
} }
private List<StructuredProperty> preparePids(final Document doc, final DataInfo info) { private List<StructuredProperty> preparePids(final Document doc, final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>(); final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : doc.selectNodes("./datacite:nameIdentifier")) { for (final Object o : doc.selectNodes("./datacite:nameIdentifier")) {
res res.add(
.add( structuredProperty(
structuredProperty( ((Node) o).getText(),
((Node) o).getText(), prepareQualifier(
prepareQualifier( (Node) o, "./@nameIdentifierScheme", "dnet:pid_types", "dnet:pid_types"),
(Node) o, "./@nameIdentifierScheme", "dnet:pid_types", "dnet:pid_types"), info));
info)); }
} return res;
return res; }
}
@Override @Override
protected List<Instance> prepareInstances( protected List<Instance> prepareInstances(
final Document doc, final Document doc,
final DataInfo info, final DataInfo info,
final KeyValue collectedfrom, final KeyValue collectedfrom,
final KeyValue hostedby) { final KeyValue hostedby) {
final Instance instance = new Instance(); final Instance instance = new Instance();
instance.setUrl(new ArrayList<>()); instance.setUrl(new ArrayList<>());
instance instance.setInstancetype(
.setInstancetype( prepareQualifier(
prepareQualifier( doc, "//dr:CobjCategory", "dnet:publication_resource", "dnet:publication_resource"));
doc, "//dr:CobjCategory", "dnet:publication_resource", "dnet:publication_resource")); instance.setCollectedfrom(collectedfrom);
instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby);
instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); instance.setAccessright(
instance prepareQualifier(doc, "//oaf:accessrights", "dnet:access_modes", "dnet:access_modes"));
.setAccessright( instance.setLicense(field(doc.valueOf("//oaf:license"), info));
prepareQualifier(doc, "//oaf:accessrights", "dnet:access_modes", "dnet:access_modes")); instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
instance.setLicense(field(doc.valueOf("//oaf:license"), info)); instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info)); instance.setProcessingchargecurrency(
instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
instance
.setProcessingchargecurrency(
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) { for (final Object o :
instance.getUrl().add(((Node) o).getText().trim()); doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
} instance.getUrl().add(((Node) o).getText().trim());
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) { }
instance.getUrl().add(((Node) o).getText().trim()); for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) {
} instance.getUrl().add(((Node) o).getText().trim());
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) { }
instance.getUrl().add("http://dx.doi.org/" + ((Node) o).getText().trim()); for (final Object o :
} doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) {
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) { instance.getUrl().add("http://dx.doi.org/" + ((Node) o).getText().trim());
instance.getUrl().add("http://dx.doi.org/" + ((Node) o).getText().trim()); }
} for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) {
return Arrays.asList(instance); instance.getUrl().add("http://dx.doi.org/" + ((Node) o).getText().trim());
} }
return Arrays.asList(instance);
}
@Override @Override
protected List<Field<String>> prepareSources(final Document doc, final DataInfo info) { protected List<Field<String>> prepareSources(final Document doc, final DataInfo info) {
return new ArrayList<>(); // Not present in ODF ??? return new ArrayList<>(); // Not present in ODF ???
} }
@Override @Override
protected List<StructuredProperty> prepareRelevantDates(final Document doc, final DataInfo info) { protected List<StructuredProperty> prepareRelevantDates(final Document doc, final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>(); final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//datacite:date")) { for (final Object o : doc.selectNodes("//datacite:date")) {
final String dateType = ((Node) o).valueOf("@dateType"); final String dateType = ((Node) o).valueOf("@dateType");
if (StringUtils.isBlank(dateType) if (StringUtils.isBlank(dateType)
&& !dateType.equalsIgnoreCase("Accepted") && !dateType.equalsIgnoreCase("Accepted")
&& !dateType.equalsIgnoreCase("Issued") && !dateType.equalsIgnoreCase("Issued")
&& !dateType.equalsIgnoreCase("Updated") && !dateType.equalsIgnoreCase("Updated")
&& !dateType.equalsIgnoreCase("Available")) { && !dateType.equalsIgnoreCase("Available")) {
res res.add(
.add( structuredProperty(
structuredProperty( ((Node) o).getText(),
((Node) o).getText(), "UNKNOWN",
"UNKNOWN", "UNKNOWN",
"UNKNOWN", "dnet:dataCite_date",
"dnet:dataCite_date", "dnet:dataCite_date",
"dnet:dataCite_date", info));
info)); }
} }
} return res;
return res; }
}
@Override @Override
protected List<Field<String>> prepareCoverages(final Document doc, final DataInfo info) { protected List<Field<String>> prepareCoverages(final Document doc, final DataInfo info) {
return new ArrayList<>(); // Not present in ODF ??? return new ArrayList<>(); // Not present in ODF ???
} }
@Override @Override
protected List<Field<String>> prepareContributors(final Document doc, final DataInfo info) { protected List<Field<String>> prepareContributors(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//datacite:contributorName", info); return prepareListFields(doc, "//datacite:contributorName", info);
} }
@Override @Override
protected List<Field<String>> prepareFormats(final Document doc, final DataInfo info) { protected List<Field<String>> prepareFormats(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//datacite:format", info); return prepareListFields(doc, "//datacite:format", info);
} }
@Override @Override
protected Field<String> preparePublisher(final Document doc, final DataInfo info) { protected Field<String> preparePublisher(final Document doc, final DataInfo info) {
return prepareField(doc, "//datacite:publisher", info); return prepareField(doc, "//datacite:publisher", info);
} }
@Override @Override
protected List<Field<String>> prepareDescriptions(final Document doc, final DataInfo info) { protected List<Field<String>> prepareDescriptions(final Document doc, final DataInfo info) {
return prepareListFields(doc, "//datacite:description[@descriptionType='Abstract']", info); return prepareListFields(doc, "//datacite:description[@descriptionType='Abstract']", info);
} }
@Override @Override
protected List<StructuredProperty> prepareSubjects(final Document doc, final DataInfo info) { protected List<StructuredProperty> prepareSubjects(final Document doc, final DataInfo info) {
return prepareListStructProps(doc, "//datacite:subject", info); return prepareListStructProps(doc, "//datacite:subject", info);
} }
@Override @Override
protected Qualifier prepareLanguages(final Document doc) { protected Qualifier prepareLanguages(final Document doc) {
return prepareQualifier(doc, "//datacite:language", "dnet:languages", "dnet:languages"); return prepareQualifier(doc, "//datacite:language", "dnet:languages", "dnet:languages");
} }
@Override @Override
protected List<Field<String>> prepareOtherResearchProductTools( protected List<Field<String>> prepareOtherResearchProductTools(
final Document doc, final DataInfo info) { final Document doc, final DataInfo info) {
return new ArrayList<>(); // Not present in ODF ??? return new ArrayList<>(); // Not present in ODF ???
} }
@Override @Override
protected List<Field<String>> prepareOtherResearchProductContactGroups( protected List<Field<String>> prepareOtherResearchProductContactGroups(
final Document doc, final DataInfo info) { final Document doc, final DataInfo info) {
return prepareListFields( return prepareListFields(
doc, doc,
"//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName",
info); info);
} }
@Override @Override
protected List<Field<String>> prepareOtherResearchProductContactPersons( protected List<Field<String>> prepareOtherResearchProductContactPersons(
final Document doc, final DataInfo info) { final Document doc, final DataInfo info) {
return prepareListFields( return prepareListFields(
doc, doc,
"//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName",
info); info);
} }
@Override @Override
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
return prepareQualifier( return prepareQualifier(
doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages"); doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages");
} }
@Override @Override
protected Field<String> prepareSoftwareCodeRepositoryUrl( protected Field<String> prepareSoftwareCodeRepositoryUrl(
final Document doc, final DataInfo info) { final Document doc, final DataInfo info) {
return null; // Not present in ODF ??? return null; // Not present in ODF ???
} }
@Override @Override
protected List<StructuredProperty> prepareSoftwareLicenses( protected List<StructuredProperty> prepareSoftwareLicenses(
final Document doc, final DataInfo info) { final Document doc, final DataInfo info) {
return new ArrayList<>(); // Not present in ODF ??? return new ArrayList<>(); // Not present in ODF ???
} }
@Override @Override
protected List<Field<String>> prepareSoftwareDocumentationUrls( protected List<Field<String>> prepareSoftwareDocumentationUrls(
final Document doc, final DataInfo info) { final Document doc, final DataInfo info) {
return prepareListFields( return prepareListFields(
doc, doc,
"//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']",
info); info);
} }
// DATASETS // DATASETS
@Override @Override
protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc, final DataInfo info) { protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc, final DataInfo info) {
final List<GeoLocation> res = new ArrayList<>(); final List<GeoLocation> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//datacite:geoLocation")) { for (final Object o : doc.selectNodes("//datacite:geoLocation")) {
final GeoLocation loc = new GeoLocation(); final GeoLocation loc = new GeoLocation();
loc.setBox(((Node) o).valueOf("./datacite:geoLocationBox")); loc.setBox(((Node) o).valueOf("./datacite:geoLocationBox"));
loc.setPlace(((Node) o).valueOf("./datacite:geoLocationPlace")); loc.setPlace(((Node) o).valueOf("./datacite:geoLocationPlace"));
loc.setPoint(((Node) o).valueOf("./datacite:geoLocationPoint")); loc.setPoint(((Node) o).valueOf("./datacite:geoLocationPoint"));
res.add(loc); res.add(loc);
} }
return res; return res;
} }
@Override @Override
protected Field<String> prepareDatasetMetadataVersionNumber( protected Field<String> prepareDatasetMetadataVersionNumber(
final Document doc, final DataInfo info) { final Document doc, final DataInfo info) {
return null; // Not present in ODF ??? return null; // Not present in ODF ???
} }
@Override @Override
protected Field<String> prepareDatasetLastMetadataUpdate( protected Field<String> prepareDatasetLastMetadataUpdate(
final Document doc, final DataInfo info) { final Document doc, final DataInfo info) {
return prepareField(doc, "//datacite:date[@dateType='Updated']", info); return prepareField(doc, "//datacite:date[@dateType='Updated']", info);
} }
@Override @Override
protected Field<String> prepareDatasetVersion(final Document doc, final DataInfo info) { protected Field<String> prepareDatasetVersion(final Document doc, final DataInfo info) {
return prepareField(doc, "//datacite:version", info); return prepareField(doc, "//datacite:version", info);
} }
@Override @Override
protected Field<String> prepareDatasetSize(final Document doc, final DataInfo info) { protected Field<String> prepareDatasetSize(final Document doc, final DataInfo info) {
return prepareField(doc, "//datacite:size", info); return prepareField(doc, "//datacite:size", info);
} }
@Override @Override
protected Field<String> prepareDatasetDevice(final Document doc, final DataInfo info) { protected Field<String> prepareDatasetDevice(final Document doc, final DataInfo info) {
return null; // Not present in ODF ??? return null; // Not present in ODF ???
} }
@Override @Override
protected Field<String> prepareDatasetStorageDate(final Document doc, final DataInfo info) { protected Field<String> prepareDatasetStorageDate(final Document doc, final DataInfo info) {
return prepareField(doc, "//datacite:date[@dateType='Issued']", info); return prepareField(doc, "//datacite:date[@dateType='Issued']", info);
} }
@Override @Override
protected List<Oaf> addOtherResultRels( protected List<Oaf> addOtherResultRels(
final Document doc, final Document doc,
final KeyValue collectedFrom, final KeyValue collectedFrom,
final DataInfo info, final DataInfo info,
final long lastUpdateTimestamp) { final long lastUpdateTimestamp) {
final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false); final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false);
final List<Oaf> res = new ArrayList<>(); final List<Oaf> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//datacite:relatedIdentifier[@relatedIdentifierType='OPENAIRE']")) { for (final Object o :
final String otherId = createOpenaireId(50, ((Node) o).getText(), false); doc.selectNodes("//datacite:relatedIdentifier[@relatedIdentifierType='OPENAIRE']")) {
final String type = ((Node) o).valueOf("@relationType");
if (type.equals("IsSupplementTo")) { final String originalId = ((Node) o).getText();
res
.add(
prepareOtherResultRel(
collectedFrom,
info,
lastUpdateTimestamp,
docId,
otherId,
"supplement",
"isSupplementTo"));
res
.add(
prepareOtherResultRel(
collectedFrom,
info,
lastUpdateTimestamp,
otherId,
docId,
"supplement",
"isSupplementedBy"));
} else if (type.equals("IsPartOf")) {
res
.add(
prepareOtherResultRel(
collectedFrom, info, lastUpdateTimestamp, docId, otherId, "part", "IsPartOf"));
res
.add(
prepareOtherResultRel(
collectedFrom, info, lastUpdateTimestamp, otherId, docId, "part", "HasParts"));
} else {
}
}
return res;
}
private Relation prepareOtherResultRel( if (StringUtils.isNotBlank(originalId)) {
final KeyValue collectedFrom, final String otherId = createOpenaireId(50, originalId, false);
final DataInfo info, final String type = ((Node) o).valueOf("@relationType");
final long lastUpdateTimestamp,
final String source,
final String target,
final String subRelType,
final String relClass) {
final Relation r = new Relation();
r.setRelType("resultResult");
r.setSubRelType(subRelType);
r.setRelClass(relClass);
r.setSource(source);
r.setTarget(target);
r.setCollectedfrom(Arrays.asList(collectedFrom));
r.setDataInfo(info);
r.setLastupdatetimestamp(lastUpdateTimestamp);
return r;
}
@Override if (type.equals("IsSupplementTo")) {
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { res.add(
return prepareQualifier( prepareOtherResultRel(
doc, collectedFrom,
"//*[local-name() = 'resource']//*[local-name() = 'resourceType']", info,
"dnet:dataCite_resource", lastUpdateTimestamp,
"dnet:dataCite_resource"); docId,
} otherId,
"supplement",
"isSupplementTo"));
res.add(
prepareOtherResultRel(
collectedFrom,
info,
lastUpdateTimestamp,
otherId,
docId,
"supplement",
"isSupplementedBy"));
} else if (type.equals("IsPartOf")) {
res.add(
prepareOtherResultRel(
collectedFrom, info, lastUpdateTimestamp, docId, otherId, "part", "IsPartOf"));
res.add(
prepareOtherResultRel(
collectedFrom, info, lastUpdateTimestamp, otherId, docId, "part", "HasParts"));
} else {
}
}
}
return res;
}
private Relation prepareOtherResultRel(
final KeyValue collectedFrom,
final DataInfo info,
final long lastUpdateTimestamp,
final String source,
final String target,
final String subRelType,
final String relClass) {
final Relation r = new Relation();
r.setRelType("resultResult");
r.setSubRelType(subRelType);
r.setRelClass(relClass);
r.setSource(source);
r.setTarget(target);
r.setCollectedfrom(Arrays.asList(collectedFrom));
r.setDataInfo(info);
r.setLastupdatetimestamp(lastUpdateTimestamp);
return r;
}
@Override
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
return prepareQualifier(
doc,
"//*[local-name() = 'resource']//*[local-name() = 'resourceType']",
"dnet:dataCite_resource",
"dnet:dataCite_resource");
}
} }