forked from D-Net/dnet-hadoop
mapped oaf:country from results
This commit is contained in:
parent
88fef367b9
commit
30167aa882
|
@ -1,16 +1,50 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.AFFILIATION;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_AUTHOR_INSTITUTION;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_AUTHOR_INSTITUTION_OF;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_ORGANIZATION;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.createOpenaireId;
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.createOpenaireId;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.journal;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.keyValue;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listFields;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.oaiIProvenance;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.subject;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.validator.routines.UrlValidator;
|
import org.apache.commons.validator.routines.UrlValidator;
|
||||||
import org.dom4j.*;
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.DocumentFactory;
|
||||||
|
import org.dom4j.DocumentHelper;
|
||||||
|
import org.dom4j.Element;
|
||||||
|
import org.dom4j.Node;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -21,7 +55,29 @@ import eu.dnetlib.dhp.common.Constants;
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.AccessRight;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Country;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.EoscIfGuidelines;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.InstanceTypeMapping;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Journal;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OAIProvenance;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Subject;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
|
||||||
|
@ -42,12 +98,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3";
|
protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3";
|
||||||
protected static final String DATACITE_SCHEMA_KERNEL_3_SLASH = "http://datacite.org/schema/kernel-3/";
|
protected static final String DATACITE_SCHEMA_KERNEL_3_SLASH = "http://datacite.org/schema/kernel-3/";
|
||||||
|
|
||||||
protected static final Qualifier ORCID_PID_TYPE = qualifier(
|
protected static final Qualifier ORCID_PID_TYPE = qualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, DNET_PID_TYPES, DNET_PID_TYPES);
|
||||||
ModelConstants.ORCID_PENDING,
|
protected static final Qualifier MAG_PID_TYPE = qualifier("MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES);
|
||||||
ModelConstants.ORCID_CLASSNAME,
|
|
||||||
DNET_PID_TYPES, DNET_PID_TYPES);
|
|
||||||
protected static final Qualifier MAG_PID_TYPE = qualifier(
|
|
||||||
"MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES);
|
|
||||||
|
|
||||||
protected static final String DEFAULT_TRUST_FOR_VALIDATED_RELS = "0.999";
|
protected static final String DEFAULT_TRUST_FOR_VALIDATED_RELS = "0.999";
|
||||||
|
|
||||||
|
@ -97,28 +149,22 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
|
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
|
||||||
try {
|
try {
|
||||||
final Document doc = DocumentHelper
|
final Document doc = DocumentHelper
|
||||||
.parseText(
|
.parseText(xml
|
||||||
xml
|
|
||||||
.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)
|
.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)
|
||||||
.replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3)
|
.replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3)
|
||||||
.replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3));
|
.replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3));
|
||||||
|
|
||||||
final KeyValue collectedFrom = getProvenanceDatasource(
|
final KeyValue collectedFrom = getProvenanceDatasource(doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
|
||||||
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
|
|
||||||
|
|
||||||
if (collectedFrom == null) {
|
if (collectedFrom == null) { return Lists.newArrayList(); }
|
||||||
return Lists.newArrayList();
|
|
||||||
}
|
|
||||||
|
|
||||||
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
|
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
|
||||||
? collectedFrom
|
? collectedFrom
|
||||||
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
|
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
|
||||||
|
|
||||||
if (hostedBy == null) {
|
if (hostedBy == null) { return Lists.newArrayList(); }
|
||||||
return Lists.newArrayList();
|
|
||||||
}
|
|
||||||
|
|
||||||
final DataInfo entityInfo = prepareDataInfo(doc, invisible);
|
final DataInfo entityInfo = prepareDataInfo(doc, this.invisible);
|
||||||
final long lastUpdateTimestamp = new Date().getTime();
|
final long lastUpdateTimestamp = new Date().getTime();
|
||||||
|
|
||||||
final List<Instance> instances = prepareInstances(doc, entityInfo, collectedFrom, hostedBy);
|
final List<Instance> instances = prepareInstances(doc, entityInfo, collectedFrom, hostedBy);
|
||||||
|
@ -126,7 +172,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final String type = getResultType(doc, instances);
|
final String type = getResultType(doc, instances);
|
||||||
|
|
||||||
return createOafs(doc, type, instances, collectedFrom, entityInfo, lastUpdateTimestamp);
|
return createOafs(doc, type, instances, collectedFrom, entityInfo, lastUpdateTimestamp);
|
||||||
} catch (DocumentException e) {
|
} catch (final DocumentException e) {
|
||||||
log.error("Error with record:\n" + xml);
|
log.error("Error with record:\n" + xml);
|
||||||
return Lists.newArrayList();
|
return Lists.newArrayList();
|
||||||
}
|
}
|
||||||
|
@ -135,7 +181,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected String getResultType(final Document doc, final List<Instance> instances) {
|
protected String getResultType(final Document doc, final List<Instance> instances) {
|
||||||
final String type = doc.valueOf("//dr:CobjCategory/@type");
|
final String type = doc.valueOf("//dr:CobjCategory/@type");
|
||||||
|
|
||||||
if (StringUtils.isBlank(type) && vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) {
|
if (StringUtils.isBlank(type) && this.vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) {
|
||||||
final String instanceType = instances
|
final String instanceType = instances
|
||||||
.stream()
|
.stream()
|
||||||
.map(i -> i.getInstancetype().getClassid())
|
.map(i -> i.getInstancetype().getClassid())
|
||||||
|
@ -143,7 +189,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
.filter(s -> !UNKNOWN.equalsIgnoreCase(s))
|
.filter(s -> !UNKNOWN.equalsIgnoreCase(s))
|
||||||
.orElse("0000"); // Unknown
|
.orElse("0000"); // Unknown
|
||||||
return Optional
|
return Optional
|
||||||
.ofNullable(vocs.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, instanceType))
|
.ofNullable(this.vocs.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, instanceType))
|
||||||
.map(Qualifier::getClassid)
|
.map(Qualifier::getClassid)
|
||||||
.orElse("0000");
|
.orElse("0000");
|
||||||
}
|
}
|
||||||
|
@ -155,9 +201,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final String dsId = doc.valueOf(xpathId);
|
final String dsId = doc.valueOf(xpathId);
|
||||||
final String dsName = doc.valueOf(xpathName);
|
final String dsName = doc.valueOf(xpathName);
|
||||||
|
|
||||||
if (StringUtils.isBlank(dsId) || StringUtils.isBlank(dsName)) {
|
if (StringUtils.isBlank(dsId) || StringUtils.isBlank(dsName)) { return null; }
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return keyValue(createOpenaireId(10, dsId, true), dsName);
|
return keyValue(createOpenaireId(10, dsId, true), dsName);
|
||||||
}
|
}
|
||||||
|
@ -170,15 +214,14 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final DataInfo info,
|
final DataInfo info,
|
||||||
final long lastUpdateTimestamp) {
|
final long lastUpdateTimestamp) {
|
||||||
|
|
||||||
final OafEntity entity = createEntity(
|
final OafEntity entity = createEntity(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||||
doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
|
|
||||||
|
|
||||||
final Set<String> originalId = Sets.newHashSet(entity.getOriginalId());
|
final Set<String> originalId = Sets.newHashSet(entity.getOriginalId());
|
||||||
originalId.add(entity.getId());
|
originalId.add(entity.getId());
|
||||||
entity.setOriginalId(Lists.newArrayList(originalId));
|
entity.setOriginalId(Lists.newArrayList(originalId));
|
||||||
|
|
||||||
if (!forceOriginalId) {
|
if (!this.forceOriginalId) {
|
||||||
final String id = IdentifierFactory.createIdentifier(entity, shouldHashId);
|
final String id = IdentifierFactory.createIdentifier(entity, this.shouldHashId);
|
||||||
if (!id.equals(entity.getId())) {
|
if (!id.equals(entity.getId())) {
|
||||||
entity.setId(id);
|
entity.setId(id);
|
||||||
}
|
}
|
||||||
|
@ -189,7 +232,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final DataInfo relationInfo = prepareDataInfo(doc, false);
|
final DataInfo relationInfo = prepareDataInfo(doc, false);
|
||||||
|
|
||||||
if (!oafs.isEmpty()) {
|
if (!oafs.isEmpty()) {
|
||||||
Set<Oaf> rels = Sets.newHashSet();
|
final Set<Oaf> rels = Sets.newHashSet();
|
||||||
|
|
||||||
rels.addAll(addProjectRels(doc, entity, relationInfo));
|
rels.addAll(addProjectRels(doc, entity, relationInfo));
|
||||||
rels.addAll(addOtherResultRels(doc, entity, relationInfo));
|
rels.addAll(addOtherResultRels(doc, entity, relationInfo));
|
||||||
|
@ -247,7 +290,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
private List<Oaf> addProjectRels(
|
private List<Oaf> addProjectRels(
|
||||||
final Document doc,
|
final Document doc,
|
||||||
final OafEntity entity, DataInfo info) {
|
final OafEntity entity,
|
||||||
|
final DataInfo info) {
|
||||||
|
|
||||||
final List<Oaf> res = new ArrayList<>();
|
final List<Oaf> res = new ArrayList<>();
|
||||||
|
|
||||||
|
@ -263,29 +307,25 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final String projectId = createOpenaireId(40, originalId, true);
|
final String projectId = createOpenaireId(40, originalId, true);
|
||||||
|
|
||||||
res
|
res
|
||||||
.add(
|
.add(OafMapperUtils
|
||||||
OafMapperUtils
|
.getRelation(docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, entity.getCollectedfrom(), info, entity
|
||||||
.getRelation(
|
.getLastupdatetimestamp(), validationdDate, null));
|
||||||
docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, entity.getCollectedfrom(),
|
|
||||||
info, entity.getLastupdatetimestamp(), validationdDate, null));
|
|
||||||
res
|
res
|
||||||
.add(
|
.add(OafMapperUtils
|
||||||
OafMapperUtils
|
.getRelation(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, entity.getCollectedfrom(), info, entity
|
||||||
.getRelation(
|
.getLastupdatetimestamp(), validationdDate, null));
|
||||||
projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, entity.getCollectedfrom(), info,
|
|
||||||
entity.getLastupdatetimestamp(), validationdDate, null));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<Oaf> addRelations(Document doc, OafEntity entity, DataInfo info) {
|
private List<Oaf> addRelations(final Document doc, final OafEntity entity, final DataInfo info) {
|
||||||
|
|
||||||
final List<Oaf> rels = Lists.newArrayList();
|
final List<Oaf> rels = Lists.newArrayList();
|
||||||
|
|
||||||
for (Object o : doc.selectNodes("//oaf:relation")) {
|
for (final Object o : doc.selectNodes("//oaf:relation")) {
|
||||||
Element element = (Element) o;
|
final Element element = (Element) o;
|
||||||
|
|
||||||
final String target = StringUtils.trim(element.getText());
|
final String target = StringUtils.trim(element.getText());
|
||||||
final String relType = element.attributeValue("relType");
|
final String relType = element.attributeValue("relType");
|
||||||
|
@ -305,19 +345,13 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
if (StringUtils.isNotBlank(targetType)) {
|
if (StringUtils.isNotBlank(targetType)) {
|
||||||
final String targetId = createOpenaireId(targetType, target, true);
|
final String targetId = createOpenaireId(targetType, target, true);
|
||||||
rels
|
rels
|
||||||
.add(
|
.add(OafMapperUtils
|
||||||
OafMapperUtils
|
.getRelation(entity.getId(), targetId, relType, subRelType, relClass, entity.getCollectedfrom(), info, entity
|
||||||
.getRelation(
|
.getLastupdatetimestamp(), validationDate, null));
|
||||||
entity.getId(), targetId, relType, subRelType, relClass,
|
|
||||||
entity.getCollectedfrom(), info,
|
|
||||||
entity.getLastupdatetimestamp(), validationDate, null));
|
|
||||||
rels
|
rels
|
||||||
.add(
|
.add(OafMapperUtils
|
||||||
OafMapperUtils
|
.getRelation(targetId, entity.getId(), relType, subRelType, relClassInverse, entity.getCollectedfrom(), info, entity
|
||||||
.getRelation(
|
.getLastupdatetimestamp(), validationDate, null));
|
||||||
targetId, entity.getId(), relType, subRelType, relClassInverse,
|
|
||||||
entity.getCollectedfrom(), info,
|
|
||||||
entity.getLastupdatetimestamp(), validationDate, null));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -325,24 +359,24 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
return rels;
|
return rels;
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<Oaf> addAffiliations(Document doc, OafEntity entity, DataInfo info) {
|
private List<Oaf> addAffiliations(final Document doc, final OafEntity entity, final DataInfo info) {
|
||||||
final List<Oaf> rels = Lists.newArrayList();
|
final List<Oaf> rels = Lists.newArrayList();
|
||||||
|
|
||||||
for (Object o : doc.selectNodes("//datacite:affiliation[@affiliationIdentifierScheme='ROR']")) {
|
for (final Object o : doc.selectNodes("//datacite:affiliation[@affiliationIdentifierScheme='ROR']")) {
|
||||||
Element element = (Element) o;
|
final Element element = (Element) o;
|
||||||
|
|
||||||
String rorId = element.attributeValue("affiliationIdentifier");
|
final String rorId = element.attributeValue("affiliationIdentifier");
|
||||||
if (StringUtils.isNotBlank(rorId)) {
|
if (StringUtils.isNotBlank(rorId)) {
|
||||||
|
|
||||||
String fullRorId = Constants.ROR_NS_PREFIX + "::" + rorId;
|
final String fullRorId = Constants.ROR_NS_PREFIX + "::" + rorId;
|
||||||
|
|
||||||
String resultId = entity.getId();
|
final String resultId = entity.getId();
|
||||||
String orgId = createOpenaireId("organization", fullRorId, true);
|
final String orgId = createOpenaireId("organization", fullRorId, true);
|
||||||
|
|
||||||
List<KeyValue> properties = Lists.newArrayList();
|
final List<KeyValue> properties = Lists.newArrayList();
|
||||||
|
|
||||||
String apcAmount = doc.valueOf("//oaf:processingchargeamount");
|
final String apcAmount = doc.valueOf("//oaf:processingchargeamount");
|
||||||
String apcCurrency = doc.valueOf("//oaf:processingchargeamount/@currency");
|
final String apcCurrency = doc.valueOf("//oaf:processingchargeamount/@currency");
|
||||||
|
|
||||||
if (StringUtils.isNotBlank(apcAmount) && StringUtils.isNotBlank(apcCurrency)) {
|
if (StringUtils.isNotBlank(apcAmount) && StringUtils.isNotBlank(apcCurrency)) {
|
||||||
properties.add(OafMapperUtils.keyValue("apc_amount", apcAmount));
|
properties.add(OafMapperUtils.keyValue("apc_amount", apcAmount));
|
||||||
|
@ -350,19 +384,13 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
rels
|
rels
|
||||||
.add(
|
.add(OafMapperUtils
|
||||||
OafMapperUtils
|
.getRelation(resultId, orgId, RESULT_ORGANIZATION, AFFILIATION, HAS_AUTHOR_INSTITUTION, entity.getCollectedfrom(), info, entity
|
||||||
.getRelation(
|
.getLastupdatetimestamp(), null, properties));
|
||||||
resultId, orgId, RESULT_ORGANIZATION, AFFILIATION, HAS_AUTHOR_INSTITUTION,
|
|
||||||
entity.getCollectedfrom(), info, entity.getLastupdatetimestamp(), null,
|
|
||||||
properties));
|
|
||||||
rels
|
rels
|
||||||
.add(
|
.add(OafMapperUtils
|
||||||
OafMapperUtils
|
.getRelation(orgId, resultId, RESULT_ORGANIZATION, AFFILIATION, IS_AUTHOR_INSTITUTION_OF, entity
|
||||||
.getRelation(
|
.getCollectedfrom(), info, entity.getLastupdatetimestamp(), null, properties));
|
||||||
orgId, resultId, RESULT_ORGANIZATION, AFFILIATION, IS_AUTHOR_INSTITUTION_OF,
|
|
||||||
entity.getCollectedfrom(), info, entity.getLastupdatetimestamp(), null,
|
|
||||||
properties));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return rels;
|
return rels;
|
||||||
|
@ -370,7 +398,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
protected abstract List<Oaf> addOtherResultRels(
|
protected abstract List<Oaf> addOtherResultRels(
|
||||||
final Document doc,
|
final Document doc,
|
||||||
final OafEntity entity, DataInfo info);
|
final OafEntity entity,
|
||||||
|
DataInfo info);
|
||||||
|
|
||||||
private void populateResultFields(
|
private void populateResultFields(
|
||||||
final Result r,
|
final Result r,
|
||||||
|
@ -391,7 +420,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
r.setOaiprovenance(prepareOAIprovenance(doc));
|
r.setOaiprovenance(prepareOAIprovenance(doc));
|
||||||
r.setAuthor(prepareAuthors(doc, info));
|
r.setAuthor(prepareAuthors(doc, info));
|
||||||
r.setLanguage(prepareLanguages(doc));
|
r.setLanguage(prepareLanguages(doc));
|
||||||
r.setCountry(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
r.setCountry(prepareCountries(doc, info));
|
||||||
r.setSubject(prepareSubjects(doc, info));
|
r.setSubject(prepareSubjects(doc, info));
|
||||||
r.setTitle(prepareTitles(doc, info));
|
r.setTitle(prepareTitles(doc, info));
|
||||||
r.setRelevantdate(prepareRelevantDates(doc, info));
|
r.setRelevantdate(prepareRelevantDates(doc, info));
|
||||||
|
@ -407,16 +436,31 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
r.setCoverage(prepareCoverages(doc, info));
|
r.setCoverage(prepareCoverages(doc, info));
|
||||||
r.setContext(prepareContexts(doc, info));
|
r.setContext(prepareContexts(doc, info));
|
||||||
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
||||||
r
|
r.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
||||||
.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
r.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||||
r
|
|
||||||
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
|
||||||
|
|
||||||
r.setInstance(instances);
|
r.setInstance(instances);
|
||||||
r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances));
|
r.setBestaccessright(OafMapperUtils.createBestAccessRights(instances));
|
||||||
r.setEoscifguidelines(prepareEOSCIfGuidelines(doc, info));
|
r.setEoscifguidelines(prepareEOSCIfGuidelines(doc, info));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private List<Country> prepareCountries(final Document doc, final DataInfo info) {
|
||||||
|
final List<Country> list = new ArrayList<>();
|
||||||
|
for (final Object n : doc.selectNodes("//oaf:country")) {
|
||||||
|
final String code = ((Node) n).getText().trim();
|
||||||
|
if (StringUtils.isNotBlank(code)) {
|
||||||
|
final Qualifier q = this.vocs.getTermAsQualifier(ModelConstants.DNET_COUNTRY_TYPE, code);
|
||||||
|
final Country country = new Country();
|
||||||
|
country.setClassid(q.getClassid());
|
||||||
|
country.setClassname(q.getClassname());
|
||||||
|
country.setSchemeid(q.getSchemeid());
|
||||||
|
country.setSchemename(q.getSchemename());
|
||||||
|
country.setDataInfo(info);
|
||||||
|
list.add(country);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
protected abstract List<StructuredProperty> prepareResultPids(Document doc, DataInfo info);
|
protected abstract List<StructuredProperty> prepareResultPids(Document doc, DataInfo info);
|
||||||
|
|
||||||
private List<Context> prepareContexts(final Document doc, final DataInfo info) {
|
private List<Context> prepareContexts(final Document doc, final DataInfo info) {
|
||||||
|
@ -433,7 +477,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<EoscIfGuidelines> prepareEOSCIfGuidelines(Document doc, DataInfo info) {
|
private List<EoscIfGuidelines> prepareEOSCIfGuidelines(final Document doc, final DataInfo info) {
|
||||||
final Set<EoscIfGuidelines> set = Sets.newHashSet();
|
final Set<EoscIfGuidelines> set = Sets.newHashSet();
|
||||||
for (final Object o : doc.selectNodes("//oaf:eoscifguidelines")) {
|
for (final Object o : doc.selectNodes("//oaf:eoscifguidelines")) {
|
||||||
final String code = ((Node) o).valueOf("@code");
|
final String code = ((Node) o).valueOf("@code");
|
||||||
|
@ -520,7 +564,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
protected abstract String findOriginalType(Document doc);
|
protected abstract String findOriginalType(Document doc);
|
||||||
|
|
||||||
protected List<InstanceTypeMapping> prepareInstanceTypeMapping(Document doc) {
|
protected List<InstanceTypeMapping> prepareInstanceTypeMapping(final Document doc) {
|
||||||
return Optional
|
return Optional
|
||||||
.ofNullable(findOriginalType(doc))
|
.ofNullable(findOriginalType(doc))
|
||||||
.map(originalType -> {
|
.map(originalType -> {
|
||||||
|
@ -543,9 +587,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final String sp = n.valueOf("@sp");
|
final String sp = n.valueOf("@sp");
|
||||||
final String vol = n.valueOf("@vol");
|
final String vol = n.valueOf("@vol");
|
||||||
final String edition = n.valueOf("@edition");
|
final String edition = n.valueOf("@edition");
|
||||||
if (StringUtils.isNotBlank(name)) {
|
if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); }
|
||||||
return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -554,18 +596,13 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
||||||
if (n != null) {
|
if (n != null) {
|
||||||
final String id = n.valueOf("./*[local-name()='identifier']");
|
final String id = n.valueOf("./*[local-name()='identifier']");
|
||||||
if (StringUtils.isNotBlank(id)) {
|
if (StringUtils.isNotBlank(id)) { return Lists.newArrayList(id); }
|
||||||
return Lists.newArrayList(id);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
final List<String> idList = doc
|
final List<String> idList = doc
|
||||||
.selectNodes(
|
.selectNodes("normalize-space(//*[local-name()='header']/*[local-name()='identifier' or local-name()='recordIdentifier']/text())");
|
||||||
"normalize-space(//*[local-name()='header']/*[local-name()='identifier' or local-name()='recordIdentifier']/text())");
|
|
||||||
final Set<String> originalIds = Sets.newHashSet(idList);
|
final Set<String> originalIds = Sets.newHashSet(idList);
|
||||||
|
|
||||||
if (originalIds.isEmpty()) {
|
if (originalIds.isEmpty()) { throw new IllegalStateException("missing originalID on " + doc.asXML()); }
|
||||||
throw new IllegalStateException("missing originalID on " + doc.asXML());
|
|
||||||
}
|
|
||||||
return Lists.newArrayList(originalIds);
|
return Lists.newArrayList(originalIds);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -587,7 +624,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Qualifier prepareQualifier(final String classId, final String schemeId) {
|
protected Qualifier prepareQualifier(final String classId, final String schemeId) {
|
||||||
return vocs.getTermAsQualifier(schemeId, classId);
|
return this.vocs.getTermAsQualifier(schemeId, classId);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<StructuredProperty> prepareListStructPropsWithValidQualifier(
|
protected List<StructuredProperty> prepareListStructPropsWithValidQualifier(
|
||||||
|
@ -601,8 +638,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
for (final Object o : node.selectNodes(xpath)) {
|
for (final Object o : node.selectNodes(xpath)) {
|
||||||
final Node n = (Node) o;
|
final Node n = (Node) o;
|
||||||
final String classId = n.valueOf(xpathClassId).trim();
|
final String classId = n.valueOf(xpathClassId).trim();
|
||||||
if (vocs.termExists(schemeId, classId)) {
|
if (this.vocs.termExists(schemeId, classId)) {
|
||||||
res.add(structuredProperty(n.getText(), vocs.getTermAsQualifier(schemeId, classId), info));
|
res.add(structuredProperty(n.getText(), this.vocs.getTermAsQualifier(schemeId, classId), info));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
@ -629,10 +666,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
for (final Object o : node.selectNodes(xpath)) {
|
for (final Object o : node.selectNodes(xpath)) {
|
||||||
final Node n = (Node) o;
|
final Node n = (Node) o;
|
||||||
res
|
res
|
||||||
.add(
|
.add(structuredProperty(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n
|
||||||
structuredProperty(
|
.valueOf("@schemename"), info));
|
||||||
n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"),
|
|
||||||
n.valueOf("@schemename"), info));
|
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -645,10 +680,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
for (final Object o : node.selectNodes(xpath)) {
|
for (final Object o : node.selectNodes(xpath)) {
|
||||||
final Node n = (Node) o;
|
final Node n = (Node) o;
|
||||||
res
|
res
|
||||||
.add(
|
.add(subject(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n.valueOf("@schemename"), info));
|
||||||
subject(
|
|
||||||
n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"),
|
|
||||||
n.valueOf("@schemename"), info));
|
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -656,14 +688,12 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected OAIProvenance prepareOAIprovenance(final Document doc) {
|
protected OAIProvenance prepareOAIprovenance(final Document doc) {
|
||||||
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
||||||
|
|
||||||
if (n == null) {
|
if (n == null) { return null; }
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
final String identifier = n.valueOf("./*[local-name()='identifier']");
|
final String identifier = n.valueOf("./*[local-name()='identifier']");
|
||||||
final String baseURL = n.valueOf("./*[local-name()='baseURL']");
|
final String baseURL = n.valueOf("./*[local-name()='baseURL']");
|
||||||
final String metadataNamespace = n.valueOf("./*[local-name()='metadataNamespace']");
|
final String metadataNamespace = n.valueOf("./*[local-name()='metadataNamespace']");
|
||||||
final boolean altered = n.valueOf("@altered").equalsIgnoreCase("true");
|
final boolean altered = "true".equalsIgnoreCase(n.valueOf("@altered"));
|
||||||
final String datestamp = n.valueOf("./*[local-name()='datestamp']");
|
final String datestamp = n.valueOf("./*[local-name()='datestamp']");
|
||||||
final String harvestDate = n.valueOf("@harvestDate");
|
final String harvestDate = n.valueOf("@harvestDate");
|
||||||
|
|
||||||
|
@ -673,9 +703,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected DataInfo prepareDataInfo(final Document doc, final boolean invisible) {
|
protected DataInfo prepareDataInfo(final Document doc, final boolean invisible) {
|
||||||
final Node n = doc.selectSingleNode("//oaf:datainfo");
|
final Node n = doc.selectSingleNode("//oaf:datainfo");
|
||||||
|
|
||||||
if (n == null) {
|
if (n == null) { return dataInfo(false, null, false, invisible, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); }
|
||||||
return dataInfo(false, null, false, invisible, REPOSITORY_PROVENANCE_ACTIONS, "0.9");
|
|
||||||
}
|
|
||||||
|
|
||||||
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
|
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
|
||||||
final String paClassName = n.valueOf("./oaf:provenanceaction/@classname");
|
final String paClassName = n.valueOf("./oaf:provenanceaction/@classname");
|
||||||
|
@ -687,14 +715,11 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred"));
|
final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred"));
|
||||||
final String trust = n.valueOf("./oaf:trust");
|
final String trust = n.valueOf("./oaf:trust");
|
||||||
|
|
||||||
return dataInfo(
|
return dataInfo(deletedbyinference, inferenceprovenance, inferred, invisible, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust);
|
||||||
deletedbyinference, inferenceprovenance, inferred, invisible,
|
|
||||||
qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<Field<String>> prepareListURL(final Node node, final String xpath, final DataInfo info) {
|
protected List<Field<String>> prepareListURL(final Node node, final String xpath, final DataInfo info) {
|
||||||
return listFields(
|
return listFields(info, prepareListString(node, xpath)
|
||||||
info, prepareListString(node, xpath)
|
|
||||||
.stream()
|
.stream()
|
||||||
.filter(URL_VALIDATOR::isValid)
|
.filter(URL_VALIDATOR::isValid)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
|
@ -722,11 +747,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Set<String> validateUrl(Collection<String> url) {
|
protected Set<String> validateUrl(final Collection<String> url) {
|
||||||
|
|
||||||
if (Objects.isNull(url)) {
|
if (Objects.isNull(url)) { return new HashSet<>(); }
|
||||||
return new HashSet<>();
|
|
||||||
}
|
|
||||||
return url
|
return url
|
||||||
.stream()
|
.stream()
|
||||||
.filter(URL_VALIDATOR::isValid)
|
.filter(URL_VALIDATOR::isValid)
|
||||||
|
|
|
@ -196,6 +196,12 @@ class MappersTest {
|
||||||
|
|
||||||
assertEquals(aff1.getSource(), aff2.getTarget());
|
assertEquals(aff1.getSource(), aff2.getTarget());
|
||||||
assertEquals(aff2.getSource(), aff1.getTarget());
|
assertEquals(aff2.getSource(), aff1.getTarget());
|
||||||
|
|
||||||
|
// COUNTRIES
|
||||||
|
assertEquals(3, p.getCountry().size());
|
||||||
|
assertEquals("IT", p.getCountry().get(0).getClassid());
|
||||||
|
assertEquals("FR", p.getCountry().get(1).getClassid());
|
||||||
|
assertEquals("DE", p.getCountry().get(2).getClassid());
|
||||||
}
|
}
|
||||||
|
|
||||||
private void verifyRelation(Relation r) {
|
private void verifyRelation(Relation r) {
|
||||||
|
@ -867,6 +873,12 @@ class MappersTest {
|
||||||
assertValidId(p.getCollectedfrom().get(0).getKey());
|
assertValidId(p.getCollectedfrom().get(0).getKey());
|
||||||
System.out.println(p.getTitle().get(0).getValue());
|
System.out.println(p.getTitle().get(0).getValue());
|
||||||
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
||||||
|
|
||||||
|
// COUNTRIES
|
||||||
|
assertEquals(3, p.getCountry().size());
|
||||||
|
assertEquals("IT", p.getCountry().get(0).getClassid());
|
||||||
|
assertEquals("FR", p.getCountry().get(1).getClassid());
|
||||||
|
assertEquals("DE", p.getCountry().get(2).getClassid());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -71,6 +71,9 @@
|
||||||
subRelType="outcome"
|
subRelType="outcome"
|
||||||
targetType="project"
|
targetType="project"
|
||||||
validationDate="2020-01-01">corda_______::226852</oaf:relation>
|
validationDate="2020-01-01">corda_______::226852</oaf:relation>
|
||||||
|
<oaf:country>IT</oaf:country>
|
||||||
|
<oaf:country>FR</oaf:country>
|
||||||
|
<oaf:country>DE</oaf:country>
|
||||||
</metadata>
|
</metadata>
|
||||||
<about xmlns:oai="http://www.openarchives.org/OAI/2.0/">
|
<about xmlns:oai="http://www.openarchives.org/OAI/2.0/">
|
||||||
<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
|
<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
|
||||||
|
|
|
@ -78,6 +78,9 @@
|
||||||
<oaf:projectid>corda_______::630786</oaf:projectid>
|
<oaf:projectid>corda_______::630786</oaf:projectid>
|
||||||
<oaf:hostedBy id="re3data_____::r3d100010386" name="LINDAT/CLARIN repository"/>
|
<oaf:hostedBy id="re3data_____::r3d100010386" name="LINDAT/CLARIN repository"/>
|
||||||
<oaf:collectedFrom id="re3data_____::r3d100010386" name="LINDAT/CLARIN repository"/>
|
<oaf:collectedFrom id="re3data_____::r3d100010386" name="LINDAT/CLARIN repository"/>
|
||||||
|
<oaf:country>IT</oaf:country>
|
||||||
|
<oaf:country>FR</oaf:country>
|
||||||
|
<oaf:country>DE</oaf:country>
|
||||||
</metadata>
|
</metadata>
|
||||||
<about xmlns:dc="http://purl.org/dc/elements/1.1/"
|
<about xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
|
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
|
||||||
|
|
Loading…
Reference in New Issue