From f044d093156c3c29cf00a4a9b498459885ebcdd0 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 14 May 2020 15:07:24 +0200 Subject: [PATCH] revised mapping: more accurate mapping for name/surname from datacite format; improved mapping of null values --- .../migration/ProtoConverter.java | 27 ++++++++++++----- .../raw/MigrateDbEntitiesApplication.java | 20 +++++-------- .../dhp/oa/graph/raw/OdfToOafMapper.java | 30 +++++++++++++++---- .../dhp/oa/graph/raw/common/PacePerson.java | 1 - 4 files changed, 51 insertions(+), 27 deletions(-) diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java index 90d573ac0..e55c0eb7b 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/migration/ProtoConverter.java @@ -523,7 +523,9 @@ public class ProtoConverter implements Serializable { } private static Context mapContext(ResultProtos.Result.Context context) { - + if (context == null || StringUtils.isBlank(context.getId())) { + return null; + } final Context entity = new Context(); entity.setId(context.getId()); entity @@ -537,6 +539,10 @@ public class ProtoConverter implements Serializable { } public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) { + if (kv == null || StringUtils.isBlank(kv.getKey()) & StringUtils.isBlank(kv.getValue())) { + return null; + } + final KeyValue keyValue = new KeyValue(); keyValue.setKey(kv.getKey()); keyValue.setValue(kv.getValue()); @@ -575,6 +581,10 @@ public class ProtoConverter implements Serializable { } public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) { + if (sp == null | StringUtils.isBlank(sp.getValue())) { + return null; + } + final StructuredProperty structuredProperty = new StructuredProperty(); structuredProperty.setValue(sp.getValue()); structuredProperty.setQualifier(mapQualifier(sp.getQualifier())); @@ -611,6 +621,10 @@ public class ProtoConverter implements Serializable { } public static Field mapStringField(FieldTypeProtos.StringField s) { + if (s == null || StringUtils.isBlank(s.getValue())) { + return null; + } + final Field stringField = new Field<>(); stringField.setValue(s.getValue()); stringField.setDataInfo(mapDataInfo(s.getDataInfo())); @@ -618,19 +632,16 @@ public class ProtoConverter implements Serializable { } public static Field mapBoolField(FieldTypeProtos.BoolField b) { + if (b == null) { + return null; + } + final Field booleanField = new Field<>(); booleanField.setValue(b.getValue()); booleanField.setDataInfo(mapDataInfo(b.getDataInfo())); return booleanField; } - public static Field mapIntField(FieldTypeProtos.IntField b) { - final Field entity = new Field<>(); - entity.setValue(b.getValue()); - entity.setDataInfo(mapDataInfo(b.getDataInfo())); - return entity; - } - public static Journal mapJournal(FieldTypeProtos.Journal j) { final Journal journal = new Journal(); journal.setConferencedate(j.getConferencedate()); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index e5e348642..ebe2b703b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -50,8 +50,7 @@ import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -public class MigrateDbEntitiesApplication extends AbstractMigrationApplication - implements Closeable { +public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable { private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class); @@ -128,9 +127,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication } public List processDatasource(final ResultSet rs) { - try { - final DataInfo info = prepareDataInfo(rs); final Datasource ds = new Datasource(); @@ -194,7 +191,6 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication public List processProject(final ResultSet rs) { try { - final DataInfo info = prepareDataInfo(rs); final Project p = new Project(); @@ -249,9 +245,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication } public List processOrganization(final ResultSet rs) { - try { - final DataInfo info = prepareDataInfo(rs); final Organization o = new Organization(); @@ -370,14 +364,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication final DataInfo info = dataInfo( false, null, false, false, - qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), "0.9"); final List collectedFrom = listKeyValues( createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); try { - if (rs.getString(SOURCE_TYPE).equals("context")) { final Result r; @@ -461,9 +453,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication final Boolean inferred = rs.getBoolean("inferred"); final String trust = rs.getString("trust"); return dataInfo( - - deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust); - + deletedbyinference, + inferenceprovenance, + inferred, + false, + ENTITYREGISTRY_PROVENANCE_ACTION, + trust); } private Qualifier prepareQualifierSplitting(final String s) { @@ -535,4 +530,5 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication super.close(); dbClient.close(); } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 04984d008..5baac12fd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -12,6 +12,7 @@ import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Node; +import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.DataInfo; @@ -44,9 +45,24 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { for (final Object o : doc.selectNodes("//datacite:creator")) { final Node n = (Node) o; final Author author = new Author(); - author.setFullname(n.valueOf("./datacite:creatorName")); - author.setName(n.valueOf("./datacite:givenName")); - author.setSurname(n.valueOf("./datacite:familyName")); + final String fullname = n.valueOf("./datacite:creatorName"); + author.setFullname(fullname); + + PacePerson pp = new PacePerson(fullname, false); + final String name = n.valueOf("./datacite:givenName"); + if (StringUtils.isBlank(name) & pp.isAccurate()) { + author.setName(pp.getNormalisedFirstName()); + } else { + author.setName(name); + } + + final String surname = n.valueOf("./datacite:familyName"); + if (StringUtils.isBlank(surname) & pp.isAccurate()) { + author.setSurname(pp.getNormalisedSurname()); + } else { + author.setSurname(surname); + } + author.setAffiliation(prepareListFields(doc, "./datacite:affiliation", info)); author.setPid(preparePids(doc, info)); author.setRank(pos++); @@ -77,8 +93,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final KeyValue hostedby) { final Instance instance = new Instance(); - final Set url = new HashSet<>(); - instance.setUrl(new ArrayList<>()); instance .setInstancetype( prepareQualifier( @@ -97,6 +111,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { .setProcessingchargecurrency( field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); + final Set url = new HashSet<>(); for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) { url.add(((Node) o).getText().trim()); } @@ -109,7 +124,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) { url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); } - instance.getUrl().addAll(url); + if (!url.isEmpty()) { + instance.setUrl(new ArrayList<>()); + instance.getUrl().addAll(url); + } return Arrays.asList(instance); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java index d1c615dcd..6e474f2f3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java @@ -1,7 +1,6 @@ package eu.dnetlib.dhp.oa.graph.raw.common; -import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.text.Normalizer; import java.util.HashSet;