revised mapping: more accurate mapping for name/surname from datacite format; improved mapping of null values

This commit is contained in:
Claudio Atzori 2020-05-14 15:07:24 +02:00
parent ab37953332
commit f044d09315
4 changed files with 51 additions and 27 deletions

View File

@ -523,7 +523,9 @@ public class ProtoConverter implements Serializable {
}
private static Context mapContext(ResultProtos.Result.Context context) {
if (context == null || StringUtils.isBlank(context.getId())) {
return null;
}
final Context entity = new Context();
entity.setId(context.getId());
entity
@ -537,6 +539,10 @@ public class ProtoConverter implements Serializable {
}
public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
if (kv == null || StringUtils.isBlank(kv.getKey()) & StringUtils.isBlank(kv.getValue())) {
return null;
}
final KeyValue keyValue = new KeyValue();
keyValue.setKey(kv.getKey());
keyValue.setValue(kv.getValue());
@ -575,6 +581,10 @@ public class ProtoConverter implements Serializable {
}
public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) {
if (sp == null | StringUtils.isBlank(sp.getValue())) {
return null;
}
final StructuredProperty structuredProperty = new StructuredProperty();
structuredProperty.setValue(sp.getValue());
structuredProperty.setQualifier(mapQualifier(sp.getQualifier()));
@ -611,6 +621,10 @@ public class ProtoConverter implements Serializable {
}
public static Field<String> mapStringField(FieldTypeProtos.StringField s) {
if (s == null || StringUtils.isBlank(s.getValue())) {
return null;
}
final Field<String> stringField = new Field<>();
stringField.setValue(s.getValue());
stringField.setDataInfo(mapDataInfo(s.getDataInfo()));
@ -618,19 +632,16 @@ public class ProtoConverter implements Serializable {
}
public static Field<Boolean> mapBoolField(FieldTypeProtos.BoolField b) {
if (b == null) {
return null;
}
final Field<Boolean> booleanField = new Field<>();
booleanField.setValue(b.getValue());
booleanField.setDataInfo(mapDataInfo(b.getDataInfo()));
return booleanField;
}
public static Field<Integer> mapIntField(FieldTypeProtos.IntField b) {
final Field<Integer> entity = new Field<>();
entity.setValue(b.getValue());
entity.setDataInfo(mapDataInfo(b.getDataInfo()));
return entity;
}
public static Journal mapJournal(FieldTypeProtos.Journal j) {
final Journal journal = new Journal();
journal.setConferencedate(j.getConferencedate());

View File

@ -50,8 +50,7 @@ import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
implements Closeable {
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable {
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
@ -128,9 +127,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
}
public List<Oaf> processDatasource(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final Datasource ds = new Datasource();
@ -194,7 +191,6 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
public List<Oaf> processProject(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final Project p = new Project();
@ -249,9 +245,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
}
public List<Oaf> processOrganization(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final Organization o = new Organization();
@ -370,14 +364,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
final DataInfo info = dataInfo(
false, null, false, false,
qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), "0.9");
final List<KeyValue> collectedFrom = listKeyValues(
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
try {
if (rs.getString(SOURCE_TYPE).equals("context")) {
final Result r;
@ -461,9 +453,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
final Boolean inferred = rs.getBoolean("inferred");
final String trust = rs.getString("trust");
return dataInfo(
deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust);
deletedbyinference,
inferenceprovenance,
inferred,
false,
ENTITYREGISTRY_PROVENANCE_ACTION,
trust);
}
private Qualifier prepareQualifierSplitting(final String s) {
@ -535,4 +530,5 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
super.close();
dbClient.close();
}
}

View File

@ -12,6 +12,7 @@ import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.Node;
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
@ -44,9 +45,24 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
for (final Object o : doc.selectNodes("//datacite:creator")) {
final Node n = (Node) o;
final Author author = new Author();
author.setFullname(n.valueOf("./datacite:creatorName"));
author.setName(n.valueOf("./datacite:givenName"));
author.setSurname(n.valueOf("./datacite:familyName"));
final String fullname = n.valueOf("./datacite:creatorName");
author.setFullname(fullname);
PacePerson pp = new PacePerson(fullname, false);
final String name = n.valueOf("./datacite:givenName");
if (StringUtils.isBlank(name) & pp.isAccurate()) {
author.setName(pp.getNormalisedFirstName());
} else {
author.setName(name);
}
final String surname = n.valueOf("./datacite:familyName");
if (StringUtils.isBlank(surname) & pp.isAccurate()) {
author.setSurname(pp.getNormalisedSurname());
} else {
author.setSurname(surname);
}
author.setAffiliation(prepareListFields(doc, "./datacite:affiliation", info));
author.setPid(preparePids(doc, info));
author.setRank(pos++);
@ -77,8 +93,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final KeyValue hostedby) {
final Instance instance = new Instance();
final Set<String> url = new HashSet<>();
instance.setUrl(new ArrayList<>());
instance
.setInstancetype(
prepareQualifier(
@ -97,6 +111,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
.setProcessingchargecurrency(
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
final Set<String> url = new HashSet<>();
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
url.add(((Node) o).getText().trim());
}
@ -109,7 +124,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) {
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
}
instance.getUrl().addAll(url);
if (!url.isEmpty()) {
instance.setUrl(new ArrayList<>());
instance.getUrl().addAll(url);
}
return Arrays.asList(instance);
}

View File

@ -1,7 +1,6 @@
package eu.dnetlib.dhp.oa.graph.raw.common;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.util.HashSet;