forked from D-Net/dnet-hadoop
integrated changes from master
This commit is contained in:
commit
ac25f2d8d1
|
@ -1,4 +1,3 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
||||||
|
@ -46,9 +45,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
protected final Map<String, String> code2name;
|
protected final Map<String, String> code2name;
|
||||||
|
|
||||||
protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier(
|
protected static final Qualifier MAIN_TITLE_QUALIFIER =
|
||||||
"main title", "main title",
|
qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
|
||||||
"dnet:dataCite_title", "dnet:dataCite_title");
|
|
||||||
|
|
||||||
protected AbstractMdRecordToOafMapper(final Map<String, String> code2name) {
|
protected AbstractMdRecordToOafMapper(final Map<String, String> code2name) {
|
||||||
this.code2name = code2name;
|
this.code2name = code2name;
|
||||||
|
@ -66,17 +64,18 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
nsContext.put("datacite", "http://datacite.org/schema/kernel-3");
|
nsContext.put("datacite", "http://datacite.org/schema/kernel-3");
|
||||||
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
|
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
|
||||||
|
|
||||||
final Document doc = DocumentHelper
|
final Document doc =
|
||||||
.parseText(
|
DocumentHelper.parseText(
|
||||||
xml
|
xml.replaceAll(
|
||||||
.replaceAll(
|
|
||||||
"http://datacite.org/schema/kernel-4", "http://datacite.org/schema/kernel-3"));
|
"http://datacite.org/schema/kernel-4", "http://datacite.org/schema/kernel-3"));
|
||||||
|
|
||||||
final String type = doc.valueOf("//dr:CobjCategory/@type");
|
final String type = doc.valueOf("//dr:CobjCategory/@type");
|
||||||
final KeyValue collectedFrom = keyValue(
|
final KeyValue collectedFrom =
|
||||||
|
keyValue(
|
||||||
createOpenaireId(10, doc.valueOf("//oaf:collectedFrom/@id"), true),
|
createOpenaireId(10, doc.valueOf("//oaf:collectedFrom/@id"), true),
|
||||||
doc.valueOf("//oaf:collectedFrom/@name"));
|
doc.valueOf("//oaf:collectedFrom/@name"));
|
||||||
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
|
final KeyValue hostedBy =
|
||||||
|
StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
|
||||||
? collectedFrom
|
? collectedFrom
|
||||||
: keyValue(
|
: keyValue(
|
||||||
createOpenaireId(10, doc.valueOf("//oaf:hostedBy/@id"), true),
|
createOpenaireId(10, doc.valueOf("//oaf:hostedBy/@id"), true),
|
||||||
|
@ -164,7 +163,11 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false);
|
final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false);
|
||||||
|
|
||||||
for (final Object o : doc.selectNodes("//oaf:projectid")) {
|
for (final Object o : doc.selectNodes("//oaf:projectid")) {
|
||||||
final String projectId = createOpenaireId(40, ((Node) o).getText(), true);
|
|
||||||
|
final String originalId = ((Node) o).getText();
|
||||||
|
|
||||||
|
if (StringUtils.isNotBlank(originalId)) {
|
||||||
|
final String projectId = createOpenaireId(40, originalId, true);
|
||||||
|
|
||||||
final Relation r1 = new Relation();
|
final Relation r1 = new Relation();
|
||||||
r1.setRelType("resultProject");
|
r1.setRelType("resultProject");
|
||||||
|
@ -188,6 +191,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
r2.setLastupdatetimestamp(lastUpdateTimestamp);
|
r2.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||||
res.add(r2);
|
res.add(r2);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -210,8 +214,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false));
|
r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false));
|
||||||
r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier")));
|
r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier")));
|
||||||
r.setCollectedfrom(Arrays.asList(collectedFrom));
|
r.setCollectedfrom(Arrays.asList(collectedFrom));
|
||||||
r
|
r.setPid(
|
||||||
.setPid(
|
|
||||||
prepareListStructProps(
|
prepareListStructProps(
|
||||||
doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info));
|
doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info));
|
||||||
r.setDateofcollection(doc.valueOf("//dr:dateOfCollection"));
|
r.setDateofcollection(doc.valueOf("//dr:dateOfCollection"));
|
||||||
|
@ -382,8 +385,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final List<StructuredProperty> res = new ArrayList<>();
|
final List<StructuredProperty> res = new ArrayList<>();
|
||||||
for (final Object o : node.selectNodes(xpath)) {
|
for (final Object o : node.selectNodes(xpath)) {
|
||||||
final Node n = (Node) o;
|
final Node n = (Node) o;
|
||||||
res
|
res.add(
|
||||||
.add(
|
|
||||||
structuredProperty(
|
structuredProperty(
|
||||||
n.getText(),
|
n.getText(),
|
||||||
n.valueOf("@classid"),
|
n.valueOf("@classid"),
|
||||||
|
@ -396,7 +398,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected OAIProvenance prepareOAIprovenance(final Document doc) {
|
protected OAIProvenance prepareOAIprovenance(final Document doc) {
|
||||||
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
final Node n =
|
||||||
|
doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
||||||
|
|
||||||
if (n == null) {
|
if (n == null) {
|
||||||
return null;
|
return null;
|
||||||
|
|
|
@ -1,15 +1,24 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
|
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
|
|
||||||
|
@ -90,8 +99,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
if (url.startsWith("http")) {
|
if (url.startsWith("http")) {
|
||||||
final Instance instance = new Instance();
|
final Instance instance = new Instance();
|
||||||
instance.setUrl(Arrays.asList(url));
|
instance.setUrl(Arrays.asList(url));
|
||||||
instance
|
instance.setInstancetype(
|
||||||
.setInstancetype(
|
|
||||||
prepareQualifier(
|
prepareQualifier(
|
||||||
doc,
|
doc,
|
||||||
"//dr:CobjCategory",
|
"//dr:CobjCategory",
|
||||||
|
@ -101,16 +109,13 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
instance.setHostedby(hostedby);
|
instance.setHostedby(hostedby);
|
||||||
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
||||||
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
|
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
|
||||||
instance
|
instance.setAccessright(
|
||||||
.setAccessright(
|
|
||||||
prepareQualifier(doc, "//oaf:accessrights", "dnet:access_modes", "dnet:access_modes"));
|
prepareQualifier(doc, "//oaf:accessrights", "dnet:access_modes", "dnet:access_modes"));
|
||||||
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
||||||
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
||||||
instance
|
instance.setProcessingchargeamount(
|
||||||
.setProcessingchargeamount(
|
|
||||||
field(doc.valueOf("//oaf:processingchargeamount"), info));
|
field(doc.valueOf("//oaf:processingchargeamount"), info));
|
||||||
instance
|
instance.setProcessingchargecurrency(
|
||||||
.setProcessingchargecurrency(
|
|
||||||
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||||
res.add(instance);
|
res.add(instance);
|
||||||
}
|
}
|
||||||
|
@ -222,7 +227,12 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
final List<Oaf> res = new ArrayList<>();
|
final List<Oaf> res = new ArrayList<>();
|
||||||
|
|
||||||
for (final Object o : doc.selectNodes("//*[local-name()='relatedDataset']")) {
|
for (final Object o : doc.selectNodes("//*[local-name()='relatedDataset']")) {
|
||||||
final String otherId = createOpenaireId(50, ((Node) o).getText(), false);
|
|
||||||
|
final String originalId = ((Node) o).getText();
|
||||||
|
|
||||||
|
if (StringUtils.isNotBlank(originalId)) {
|
||||||
|
|
||||||
|
final String otherId = createOpenaireId(50, originalId, false);
|
||||||
|
|
||||||
final Relation r1 = new Relation();
|
final Relation r1 = new Relation();
|
||||||
r1.setRelType("resultResult");
|
r1.setRelType("resultResult");
|
||||||
|
@ -246,6 +256,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
r2.setLastupdatetimestamp(lastUpdateTimestamp);
|
r2.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||||
res.add(r2);
|
res.add(r2);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
||||||
|
@ -55,8 +54,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
private List<StructuredProperty> preparePids(final Document doc, final DataInfo info) {
|
private List<StructuredProperty> preparePids(final Document doc, final DataInfo info) {
|
||||||
final List<StructuredProperty> res = new ArrayList<>();
|
final List<StructuredProperty> res = new ArrayList<>();
|
||||||
for (final Object o : doc.selectNodes("./datacite:nameIdentifier")) {
|
for (final Object o : doc.selectNodes("./datacite:nameIdentifier")) {
|
||||||
res
|
res.add(
|
||||||
.add(
|
|
||||||
structuredProperty(
|
structuredProperty(
|
||||||
((Node) o).getText(),
|
((Node) o).getText(),
|
||||||
prepareQualifier(
|
prepareQualifier(
|
||||||
|
@ -75,31 +73,30 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
final Instance instance = new Instance();
|
final Instance instance = new Instance();
|
||||||
instance.setUrl(new ArrayList<>());
|
instance.setUrl(new ArrayList<>());
|
||||||
instance
|
instance.setInstancetype(
|
||||||
.setInstancetype(
|
|
||||||
prepareQualifier(
|
prepareQualifier(
|
||||||
doc, "//dr:CobjCategory", "dnet:publication_resource", "dnet:publication_resource"));
|
doc, "//dr:CobjCategory", "dnet:publication_resource", "dnet:publication_resource"));
|
||||||
instance.setCollectedfrom(collectedfrom);
|
instance.setCollectedfrom(collectedfrom);
|
||||||
instance.setHostedby(hostedby);
|
instance.setHostedby(hostedby);
|
||||||
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
||||||
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
|
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
|
||||||
instance
|
instance.setAccessright(
|
||||||
.setAccessright(
|
|
||||||
prepareQualifier(doc, "//oaf:accessrights", "dnet:access_modes", "dnet:access_modes"));
|
prepareQualifier(doc, "//oaf:accessrights", "dnet:access_modes", "dnet:access_modes"));
|
||||||
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
||||||
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
||||||
instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
||||||
instance
|
instance.setProcessingchargecurrency(
|
||||||
.setProcessingchargecurrency(
|
|
||||||
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||||
|
|
||||||
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
|
for (final Object o :
|
||||||
|
doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
|
||||||
instance.getUrl().add(((Node) o).getText().trim());
|
instance.getUrl().add(((Node) o).getText().trim());
|
||||||
}
|
}
|
||||||
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) {
|
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) {
|
||||||
instance.getUrl().add(((Node) o).getText().trim());
|
instance.getUrl().add(((Node) o).getText().trim());
|
||||||
}
|
}
|
||||||
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) {
|
for (final Object o :
|
||||||
|
doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) {
|
||||||
instance.getUrl().add("http://dx.doi.org/" + ((Node) o).getText().trim());
|
instance.getUrl().add("http://dx.doi.org/" + ((Node) o).getText().trim());
|
||||||
}
|
}
|
||||||
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) {
|
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) {
|
||||||
|
@ -123,8 +120,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
&& !dateType.equalsIgnoreCase("Issued")
|
&& !dateType.equalsIgnoreCase("Issued")
|
||||||
&& !dateType.equalsIgnoreCase("Updated")
|
&& !dateType.equalsIgnoreCase("Updated")
|
||||||
&& !dateType.equalsIgnoreCase("Available")) {
|
&& !dateType.equalsIgnoreCase("Available")) {
|
||||||
res
|
res.add(
|
||||||
.add(
|
|
||||||
structuredProperty(
|
structuredProperty(
|
||||||
((Node) o).getText(),
|
((Node) o).getText(),
|
||||||
"UNKNOWN",
|
"UNKNOWN",
|
||||||
|
@ -282,13 +278,17 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
final List<Oaf> res = new ArrayList<>();
|
final List<Oaf> res = new ArrayList<>();
|
||||||
|
|
||||||
for (final Object o : doc.selectNodes("//datacite:relatedIdentifier[@relatedIdentifierType='OPENAIRE']")) {
|
for (final Object o :
|
||||||
final String otherId = createOpenaireId(50, ((Node) o).getText(), false);
|
doc.selectNodes("//datacite:relatedIdentifier[@relatedIdentifierType='OPENAIRE']")) {
|
||||||
|
|
||||||
|
final String originalId = ((Node) o).getText();
|
||||||
|
|
||||||
|
if (StringUtils.isNotBlank(originalId)) {
|
||||||
|
final String otherId = createOpenaireId(50, originalId, false);
|
||||||
final String type = ((Node) o).valueOf("@relationType");
|
final String type = ((Node) o).valueOf("@relationType");
|
||||||
|
|
||||||
if (type.equals("IsSupplementTo")) {
|
if (type.equals("IsSupplementTo")) {
|
||||||
res
|
res.add(
|
||||||
.add(
|
|
||||||
prepareOtherResultRel(
|
prepareOtherResultRel(
|
||||||
collectedFrom,
|
collectedFrom,
|
||||||
info,
|
info,
|
||||||
|
@ -297,8 +297,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
otherId,
|
otherId,
|
||||||
"supplement",
|
"supplement",
|
||||||
"isSupplementTo"));
|
"isSupplementTo"));
|
||||||
res
|
res.add(
|
||||||
.add(
|
|
||||||
prepareOtherResultRel(
|
prepareOtherResultRel(
|
||||||
collectedFrom,
|
collectedFrom,
|
||||||
info,
|
info,
|
||||||
|
@ -308,17 +307,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
"supplement",
|
"supplement",
|
||||||
"isSupplementedBy"));
|
"isSupplementedBy"));
|
||||||
} else if (type.equals("IsPartOf")) {
|
} else if (type.equals("IsPartOf")) {
|
||||||
res
|
res.add(
|
||||||
.add(
|
|
||||||
prepareOtherResultRel(
|
prepareOtherResultRel(
|
||||||
collectedFrom, info, lastUpdateTimestamp, docId, otherId, "part", "IsPartOf"));
|
collectedFrom, info, lastUpdateTimestamp, docId, otherId, "part", "IsPartOf"));
|
||||||
res
|
res.add(
|
||||||
.add(
|
|
||||||
prepareOtherResultRel(
|
prepareOtherResultRel(
|
||||||
collectedFrom, info, lastUpdateTimestamp, otherId, docId, "part", "HasParts"));
|
collectedFrom, info, lastUpdateTimestamp, otherId, docId, "part", "HasParts"));
|
||||||
} else {
|
} else {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue