forked from D-Net/dnet-hadoop
author fullnames
This commit is contained in:
parent
c7ca3cf35b
commit
c5f7e17348
|
@ -4,16 +4,32 @@ package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_DATE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_RESOURCE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTS;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PART_OF;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENTED_BY;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENT_TO;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PART;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.SUPPLEMENT;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
|
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
@ -22,7 +38,6 @@ import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
@ -48,7 +63,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
final String fullname = n.valueOf("./datacite:creatorName");
|
final String fullname = n.valueOf("./datacite:creatorName");
|
||||||
author.setFullname(fullname);
|
author.setFullname(fullname);
|
||||||
|
|
||||||
PacePerson pp = new PacePerson(fullname, false);
|
final PacePerson pp = new PacePerson(fullname, false);
|
||||||
final String name = n.valueOf("./datacite:givenName");
|
final String name = n.valueOf("./datacite:givenName");
|
||||||
if (StringUtils.isBlank(name) & pp.isAccurate()) {
|
if (StringUtils.isBlank(name) & pp.isAccurate()) {
|
||||||
author.setName(pp.getNormalisedFirstName());
|
author.setName(pp.getNormalisedFirstName());
|
||||||
|
@ -63,6 +78,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
author.setSurname(surname);
|
author.setSurname(surname);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (StringUtils.isBlank(author.getFullname())) {
|
||||||
|
author.setFullname(String.format("%s, %s", author.getSurname(), author.getName()));
|
||||||
|
}
|
||||||
|
|
||||||
author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info));
|
author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info));
|
||||||
author.setPid(preparePids(n, info));
|
author.setPid(preparePids(n, info));
|
||||||
author.setRank(pos++);
|
author.setRank(pos++);
|
||||||
|
@ -75,12 +94,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
final List<StructuredProperty> res = new ArrayList<>();
|
final List<StructuredProperty> res = new ArrayList<>();
|
||||||
for (final Object o : n.selectNodes("./datacite:nameIdentifier")) {
|
for (final Object o : n.selectNodes("./datacite:nameIdentifier")) {
|
||||||
res
|
res
|
||||||
.add(
|
.add(structuredProperty(((Node) o).getText(), prepareQualifier((Node) o, "./@nameIdentifierScheme", DNET_PID_TYPES, DNET_PID_TYPES), info));
|
||||||
structuredProperty(
|
|
||||||
((Node) o).getText(),
|
|
||||||
prepareQualifier(
|
|
||||||
(Node) o, "./@nameIdentifierScheme", DNET_PID_TYPES, DNET_PID_TYPES),
|
|
||||||
info));
|
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -94,22 +108,18 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
final Instance instance = new Instance();
|
final Instance instance = new Instance();
|
||||||
instance
|
instance
|
||||||
.setInstancetype(
|
.setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
|
||||||
prepareQualifier(
|
|
||||||
doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
|
|
||||||
instance.setCollectedfrom(collectedfrom);
|
instance.setCollectedfrom(collectedfrom);
|
||||||
instance.setHostedby(hostedby);
|
instance.setHostedby(hostedby);
|
||||||
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
||||||
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
|
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
|
||||||
instance
|
instance
|
||||||
.setAccessright(
|
.setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES));
|
||||||
prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES));
|
|
||||||
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
||||||
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
||||||
instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
||||||
instance
|
instance
|
||||||
.setProcessingchargecurrency(
|
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||||
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
|
||||||
|
|
||||||
final Set<String> url = new HashSet<>();
|
final Set<String> url = new HashSet<>();
|
||||||
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
|
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
|
||||||
|
@ -147,14 +157,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
&& !dateType.equalsIgnoreCase("Updated")
|
&& !dateType.equalsIgnoreCase("Updated")
|
||||||
&& !dateType.equalsIgnoreCase("Available")) {
|
&& !dateType.equalsIgnoreCase("Available")) {
|
||||||
res
|
res
|
||||||
.add(
|
.add(structuredProperty(((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, info));
|
||||||
structuredProperty(
|
|
||||||
((Node) o).getText(),
|
|
||||||
"UNKNOWN",
|
|
||||||
"UNKNOWN",
|
|
||||||
DNET_DATA_CITE_DATE,
|
|
||||||
DNET_DATA_CITE_DATE,
|
|
||||||
info));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
@ -197,53 +200,49 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareOtherResearchProductTools(
|
protected List<Field<String>> prepareOtherResearchProductTools(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return new ArrayList<>(); // Not present in ODF ???
|
return new ArrayList<>(); // Not present in ODF ???
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareOtherResearchProductContactGroups(
|
protected List<Field<String>> prepareOtherResearchProductContactGroups(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
return prepareListFields(
|
final DataInfo info) {
|
||||||
doc,
|
return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info);
|
||||||
"//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName",
|
|
||||||
info);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareOtherResearchProductContactPersons(
|
protected List<Field<String>> prepareOtherResearchProductContactPersons(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
return prepareListFields(
|
final DataInfo info) {
|
||||||
doc,
|
return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info);
|
||||||
"//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName",
|
|
||||||
info);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
|
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
|
||||||
return prepareQualifier(
|
return prepareQualifier(doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages");
|
||||||
doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Field<String> prepareSoftwareCodeRepositoryUrl(
|
protected Field<String> prepareSoftwareCodeRepositoryUrl(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return null; // Not present in ODF ???
|
return null; // Not present in ODF ???
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<StructuredProperty> prepareSoftwareLicenses(
|
protected List<StructuredProperty> prepareSoftwareLicenses(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return new ArrayList<>(); // Not present in ODF ???
|
return new ArrayList<>(); // Not present in ODF ???
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareSoftwareDocumentationUrls(
|
protected List<Field<String>> prepareSoftwareDocumentationUrls(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
return prepareListFields(
|
final DataInfo info) {
|
||||||
doc,
|
return prepareListFields(doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info);
|
||||||
"//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']",
|
|
||||||
info);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// DATASETS
|
// DATASETS
|
||||||
|
@ -264,13 +263,15 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Field<String> prepareDatasetMetadataVersionNumber(
|
protected Field<String> prepareDatasetMetadataVersionNumber(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return null; // Not present in ODF ???
|
return null; // Not present in ODF ???
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Field<String> prepareDatasetLastMetadataUpdate(
|
protected Field<String> prepareDatasetLastMetadataUpdate(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return prepareField(doc, "//datacite:date[@dateType='Updated']", info);
|
return prepareField(doc, "//datacite:date[@dateType='Updated']", info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -315,29 +316,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
if (type.equalsIgnoreCase("IsSupplementTo")) {
|
if (type.equalsIgnoreCase("IsSupplementTo")) {
|
||||||
res
|
res
|
||||||
.add(
|
.add(getRelation(docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, lastUpdateTimestamp));
|
||||||
getRelation(
|
|
||||||
docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info,
|
|
||||||
lastUpdateTimestamp));
|
|
||||||
res
|
res
|
||||||
.add(
|
.add(getRelation(otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, lastUpdateTimestamp));
|
||||||
getRelation(
|
|
||||||
otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info,
|
|
||||||
lastUpdateTimestamp));
|
|
||||||
} else if (type.equals("IsPartOf")) {
|
} else if (type.equals("IsPartOf")) {
|
||||||
|
|
||||||
res
|
res
|
||||||
.add(
|
.add(getRelation(docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, lastUpdateTimestamp));
|
||||||
getRelation(
|
|
||||||
docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info,
|
|
||||||
lastUpdateTimestamp));
|
|
||||||
res
|
res
|
||||||
.add(
|
.add(getRelation(otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, lastUpdateTimestamp));
|
||||||
getRelation(
|
} else {}
|
||||||
otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info,
|
|
||||||
lastUpdateTimestamp));
|
|
||||||
} else {
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
@ -345,10 +333,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
|
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
|
||||||
return prepareQualifier(
|
return prepareQualifier(doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE, DNET_DATA_CITE_RESOURCE);
|
||||||
doc,
|
|
||||||
"//*[local-name() = 'resource']//*[local-name() = 'resourceType']",
|
|
||||||
DNET_DATA_CITE_RESOURCE,
|
|
||||||
DNET_DATA_CITE_RESOURCE);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue