author fullnames

This commit is contained in:
Michele Artini 2020-05-22 10:08:02 +02:00
parent c7ca3cf35b
commit c5f7e17348
1 changed files with 55 additions and 71 deletions

View File

@ -4,16 +4,32 @@ package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_DATE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_RESOURCE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTS;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PART_OF;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENTED_BY;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENT_TO;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PART;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.SUPPLEMENT;
import java.util.*; import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.Node; import org.dom4j.Node;
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Field; import eu.dnetlib.dhp.schema.oaf.Field;
@ -22,7 +38,6 @@ import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class OdfToOafMapper extends AbstractMdRecordToOafMapper { public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@ -48,7 +63,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final String fullname = n.valueOf("./datacite:creatorName"); final String fullname = n.valueOf("./datacite:creatorName");
author.setFullname(fullname); author.setFullname(fullname);
PacePerson pp = new PacePerson(fullname, false); final PacePerson pp = new PacePerson(fullname, false);
final String name = n.valueOf("./datacite:givenName"); final String name = n.valueOf("./datacite:givenName");
if (StringUtils.isBlank(name) & pp.isAccurate()) { if (StringUtils.isBlank(name) & pp.isAccurate()) {
author.setName(pp.getNormalisedFirstName()); author.setName(pp.getNormalisedFirstName());
@ -63,6 +78,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
author.setSurname(surname); author.setSurname(surname);
} }
if (StringUtils.isBlank(author.getFullname())) {
author.setFullname(String.format("%s, %s", author.getSurname(), author.getName()));
}
author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info)); author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info));
author.setPid(preparePids(n, info)); author.setPid(preparePids(n, info));
author.setRank(pos++); author.setRank(pos++);
@ -75,12 +94,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final List<StructuredProperty> res = new ArrayList<>(); final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : n.selectNodes("./datacite:nameIdentifier")) { for (final Object o : n.selectNodes("./datacite:nameIdentifier")) {
res res
.add( .add(structuredProperty(((Node) o).getText(), prepareQualifier((Node) o, "./@nameIdentifierScheme", DNET_PID_TYPES, DNET_PID_TYPES), info));
structuredProperty(
((Node) o).getText(),
prepareQualifier(
(Node) o, "./@nameIdentifierScheme", DNET_PID_TYPES, DNET_PID_TYPES),
info));
} }
return res; return res;
} }
@ -94,22 +108,18 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final Instance instance = new Instance(); final Instance instance = new Instance();
instance instance
.setInstancetype( .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
prepareQualifier(
doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
instance.setCollectedfrom(collectedfrom); instance.setCollectedfrom(collectedfrom);
instance.setHostedby(hostedby); instance.setHostedby(hostedby);
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
instance instance
.setAccessright( .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES));
prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES));
instance.setLicense(field(doc.valueOf("//oaf:license"), info)); instance.setLicense(field(doc.valueOf("//oaf:license"), info));
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info)); instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
instance instance
.setProcessingchargecurrency( .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
final Set<String> url = new HashSet<>(); final Set<String> url = new HashSet<>();
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) { for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
@ -147,14 +157,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
&& !dateType.equalsIgnoreCase("Updated") && !dateType.equalsIgnoreCase("Updated")
&& !dateType.equalsIgnoreCase("Available")) { && !dateType.equalsIgnoreCase("Available")) {
res res
.add( .add(structuredProperty(((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, info));
structuredProperty(
((Node) o).getText(),
"UNKNOWN",
"UNKNOWN",
DNET_DATA_CITE_DATE,
DNET_DATA_CITE_DATE,
info));
} }
} }
return res; return res;
@ -197,53 +200,49 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected List<Field<String>> prepareOtherResearchProductTools( protected List<Field<String>> prepareOtherResearchProductTools(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return new ArrayList<>(); // Not present in ODF ??? return new ArrayList<>(); // Not present in ODF ???
} }
@Override @Override
protected List<Field<String>> prepareOtherResearchProductContactGroups( protected List<Field<String>> prepareOtherResearchProductContactGroups(
final Document doc, final DataInfo info) { final Document doc,
return prepareListFields( final DataInfo info) {
doc, return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info);
"//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName",
info);
} }
@Override @Override
protected List<Field<String>> prepareOtherResearchProductContactPersons( protected List<Field<String>> prepareOtherResearchProductContactPersons(
final Document doc, final DataInfo info) { final Document doc,
return prepareListFields( final DataInfo info) {
doc, return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info);
"//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName",
info);
} }
@Override @Override
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
return prepareQualifier( return prepareQualifier(doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages");
doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages");
} }
@Override @Override
protected Field<String> prepareSoftwareCodeRepositoryUrl( protected Field<String> prepareSoftwareCodeRepositoryUrl(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return null; // Not present in ODF ??? return null; // Not present in ODF ???
} }
@Override @Override
protected List<StructuredProperty> prepareSoftwareLicenses( protected List<StructuredProperty> prepareSoftwareLicenses(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return new ArrayList<>(); // Not present in ODF ??? return new ArrayList<>(); // Not present in ODF ???
} }
@Override @Override
protected List<Field<String>> prepareSoftwareDocumentationUrls( protected List<Field<String>> prepareSoftwareDocumentationUrls(
final Document doc, final DataInfo info) { final Document doc,
return prepareListFields( final DataInfo info) {
doc, return prepareListFields(doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info);
"//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']",
info);
} }
// DATASETS // DATASETS
@ -264,13 +263,15 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected Field<String> prepareDatasetMetadataVersionNumber( protected Field<String> prepareDatasetMetadataVersionNumber(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return null; // Not present in ODF ??? return null; // Not present in ODF ???
} }
@Override @Override
protected Field<String> prepareDatasetLastMetadataUpdate( protected Field<String> prepareDatasetLastMetadataUpdate(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return prepareField(doc, "//datacite:date[@dateType='Updated']", info); return prepareField(doc, "//datacite:date[@dateType='Updated']", info);
} }
@ -315,29 +316,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
if (type.equalsIgnoreCase("IsSupplementTo")) { if (type.equalsIgnoreCase("IsSupplementTo")) {
res res
.add( .add(getRelation(docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, lastUpdateTimestamp));
getRelation(
docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info,
lastUpdateTimestamp));
res res
.add( .add(getRelation(otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, lastUpdateTimestamp));
getRelation(
otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info,
lastUpdateTimestamp));
} else if (type.equals("IsPartOf")) { } else if (type.equals("IsPartOf")) {
res res
.add( .add(getRelation(docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, lastUpdateTimestamp));
getRelation(
docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info,
lastUpdateTimestamp));
res res
.add( .add(getRelation(otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, lastUpdateTimestamp));
getRelation( } else {}
otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info,
lastUpdateTimestamp));
} else {
}
} }
} }
return res; return res;
@ -345,10 +333,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
return prepareQualifier( return prepareQualifier(doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE, DNET_DATA_CITE_RESOURCE);
doc,
"//*[local-name() = 'resource']//*[local-name() = 'resourceType']",
DNET_DATA_CITE_RESOURCE,
DNET_DATA_CITE_RESOURCE);
} }
} }