revised mapping: more accurate mapping for name/surname from datacite format; improved mapping of null values

This commit is contained in:
Claudio Atzori 2020-05-14 15:07:24 +02:00
parent ab37953332
commit f044d09315
4 changed files with 51 additions and 27 deletions

View File

@ -523,7 +523,9 @@ public class ProtoConverter implements Serializable {
} }
private static Context mapContext(ResultProtos.Result.Context context) { private static Context mapContext(ResultProtos.Result.Context context) {
if (context == null || StringUtils.isBlank(context.getId())) {
return null;
}
final Context entity = new Context(); final Context entity = new Context();
entity.setId(context.getId()); entity.setId(context.getId());
entity entity
@ -537,6 +539,10 @@ public class ProtoConverter implements Serializable {
} }
public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) { public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
if (kv == null || StringUtils.isBlank(kv.getKey()) & StringUtils.isBlank(kv.getValue())) {
return null;
}
final KeyValue keyValue = new KeyValue(); final KeyValue keyValue = new KeyValue();
keyValue.setKey(kv.getKey()); keyValue.setKey(kv.getKey());
keyValue.setValue(kv.getValue()); keyValue.setValue(kv.getValue());
@ -575,6 +581,10 @@ public class ProtoConverter implements Serializable {
} }
public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) { public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) {
if (sp == null | StringUtils.isBlank(sp.getValue())) {
return null;
}
final StructuredProperty structuredProperty = new StructuredProperty(); final StructuredProperty structuredProperty = new StructuredProperty();
structuredProperty.setValue(sp.getValue()); structuredProperty.setValue(sp.getValue());
structuredProperty.setQualifier(mapQualifier(sp.getQualifier())); structuredProperty.setQualifier(mapQualifier(sp.getQualifier()));
@ -611,6 +621,10 @@ public class ProtoConverter implements Serializable {
} }
public static Field<String> mapStringField(FieldTypeProtos.StringField s) { public static Field<String> mapStringField(FieldTypeProtos.StringField s) {
if (s == null || StringUtils.isBlank(s.getValue())) {
return null;
}
final Field<String> stringField = new Field<>(); final Field<String> stringField = new Field<>();
stringField.setValue(s.getValue()); stringField.setValue(s.getValue());
stringField.setDataInfo(mapDataInfo(s.getDataInfo())); stringField.setDataInfo(mapDataInfo(s.getDataInfo()));
@ -618,19 +632,16 @@ public class ProtoConverter implements Serializable {
} }
public static Field<Boolean> mapBoolField(FieldTypeProtos.BoolField b) { public static Field<Boolean> mapBoolField(FieldTypeProtos.BoolField b) {
if (b == null) {
return null;
}
final Field<Boolean> booleanField = new Field<>(); final Field<Boolean> booleanField = new Field<>();
booleanField.setValue(b.getValue()); booleanField.setValue(b.getValue());
booleanField.setDataInfo(mapDataInfo(b.getDataInfo())); booleanField.setDataInfo(mapDataInfo(b.getDataInfo()));
return booleanField; return booleanField;
} }
public static Field<Integer> mapIntField(FieldTypeProtos.IntField b) {
final Field<Integer> entity = new Field<>();
entity.setValue(b.getValue());
entity.setDataInfo(mapDataInfo(b.getDataInfo()));
return entity;
}
public static Journal mapJournal(FieldTypeProtos.Journal j) { public static Journal mapJournal(FieldTypeProtos.Journal j) {
final Journal journal = new Journal(); final Journal journal = new Journal();
journal.setConferencedate(j.getConferencedate()); journal.setConferencedate(j.getConferencedate());

View File

@ -50,8 +50,7 @@ import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable {
implements Closeable {
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class); private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
@ -128,9 +127,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
} }
public List<Oaf> processDatasource(final ResultSet rs) { public List<Oaf> processDatasource(final ResultSet rs) {
try { try {
final DataInfo info = prepareDataInfo(rs); final DataInfo info = prepareDataInfo(rs);
final Datasource ds = new Datasource(); final Datasource ds = new Datasource();
@ -194,7 +191,6 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
public List<Oaf> processProject(final ResultSet rs) { public List<Oaf> processProject(final ResultSet rs) {
try { try {
final DataInfo info = prepareDataInfo(rs); final DataInfo info = prepareDataInfo(rs);
final Project p = new Project(); final Project p = new Project();
@ -249,9 +245,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
} }
public List<Oaf> processOrganization(final ResultSet rs) { public List<Oaf> processOrganization(final ResultSet rs) {
try { try {
final DataInfo info = prepareDataInfo(rs); final DataInfo info = prepareDataInfo(rs);
final Organization o = new Organization(); final Organization o = new Organization();
@ -370,14 +364,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
final DataInfo info = dataInfo( final DataInfo info = dataInfo(
false, null, false, false, false, null, false, false,
qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), "0.9"); qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), "0.9");
final List<KeyValue> collectedFrom = listKeyValues( final List<KeyValue> collectedFrom = listKeyValues(
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
try { try {
if (rs.getString(SOURCE_TYPE).equals("context")) { if (rs.getString(SOURCE_TYPE).equals("context")) {
final Result r; final Result r;
@ -461,9 +453,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
final Boolean inferred = rs.getBoolean("inferred"); final Boolean inferred = rs.getBoolean("inferred");
final String trust = rs.getString("trust"); final String trust = rs.getString("trust");
return dataInfo( return dataInfo(
deletedbyinference,
deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust); inferenceprovenance,
inferred,
false,
ENTITYREGISTRY_PROVENANCE_ACTION,
trust);
} }
private Qualifier prepareQualifierSplitting(final String s) { private Qualifier prepareQualifierSplitting(final String s) {
@ -535,4 +530,5 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
super.close(); super.close();
dbClient.close(); dbClient.close();
} }
} }

View File

@ -12,6 +12,7 @@ import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.Node; import org.dom4j.Node;
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.DataInfo;
@ -44,9 +45,24 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
for (final Object o : doc.selectNodes("//datacite:creator")) { for (final Object o : doc.selectNodes("//datacite:creator")) {
final Node n = (Node) o; final Node n = (Node) o;
final Author author = new Author(); final Author author = new Author();
author.setFullname(n.valueOf("./datacite:creatorName")); final String fullname = n.valueOf("./datacite:creatorName");
author.setName(n.valueOf("./datacite:givenName")); author.setFullname(fullname);
author.setSurname(n.valueOf("./datacite:familyName"));
PacePerson pp = new PacePerson(fullname, false);
final String name = n.valueOf("./datacite:givenName");
if (StringUtils.isBlank(name) & pp.isAccurate()) {
author.setName(pp.getNormalisedFirstName());
} else {
author.setName(name);
}
final String surname = n.valueOf("./datacite:familyName");
if (StringUtils.isBlank(surname) & pp.isAccurate()) {
author.setSurname(pp.getNormalisedSurname());
} else {
author.setSurname(surname);
}
author.setAffiliation(prepareListFields(doc, "./datacite:affiliation", info)); author.setAffiliation(prepareListFields(doc, "./datacite:affiliation", info));
author.setPid(preparePids(doc, info)); author.setPid(preparePids(doc, info));
author.setRank(pos++); author.setRank(pos++);
@ -77,8 +93,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final KeyValue hostedby) { final KeyValue hostedby) {
final Instance instance = new Instance(); final Instance instance = new Instance();
final Set<String> url = new HashSet<>();
instance.setUrl(new ArrayList<>());
instance instance
.setInstancetype( .setInstancetype(
prepareQualifier( prepareQualifier(
@ -97,6 +111,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
.setProcessingchargecurrency( .setProcessingchargecurrency(
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
final Set<String> url = new HashSet<>();
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) { for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
url.add(((Node) o).getText().trim()); url.add(((Node) o).getText().trim());
} }
@ -109,7 +124,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) { for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) {
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
} }
instance.getUrl().addAll(url); if (!url.isEmpty()) {
instance.setUrl(new ArrayList<>());
instance.getUrl().addAll(url);
}
return Arrays.asList(instance); return Arrays.asList(instance);
} }

View File

@ -1,7 +1,6 @@
package eu.dnetlib.dhp.oa.graph.raw.common; package eu.dnetlib.dhp.oa.graph.raw.common;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.text.Normalizer; import java.text.Normalizer;
import java.util.HashSet; import java.util.HashSet;