forked from antonis.lempesis/dnet-hadoop
revised mapping: more accurate mapping for name/surname from datacite format; improved mapping of null values
This commit is contained in:
parent
ab37953332
commit
f044d09315
|
@ -523,7 +523,9 @@ public class ProtoConverter implements Serializable {
|
|||
}
|
||||
|
||||
private static Context mapContext(ResultProtos.Result.Context context) {
|
||||
|
||||
if (context == null || StringUtils.isBlank(context.getId())) {
|
||||
return null;
|
||||
}
|
||||
final Context entity = new Context();
|
||||
entity.setId(context.getId());
|
||||
entity
|
||||
|
@ -537,6 +539,10 @@ public class ProtoConverter implements Serializable {
|
|||
}
|
||||
|
||||
public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
|
||||
if (kv == null || StringUtils.isBlank(kv.getKey()) & StringUtils.isBlank(kv.getValue())) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final KeyValue keyValue = new KeyValue();
|
||||
keyValue.setKey(kv.getKey());
|
||||
keyValue.setValue(kv.getValue());
|
||||
|
@ -575,6 +581,10 @@ public class ProtoConverter implements Serializable {
|
|||
}
|
||||
|
||||
public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) {
|
||||
if (sp == null | StringUtils.isBlank(sp.getValue())) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final StructuredProperty structuredProperty = new StructuredProperty();
|
||||
structuredProperty.setValue(sp.getValue());
|
||||
structuredProperty.setQualifier(mapQualifier(sp.getQualifier()));
|
||||
|
@ -611,6 +621,10 @@ public class ProtoConverter implements Serializable {
|
|||
}
|
||||
|
||||
public static Field<String> mapStringField(FieldTypeProtos.StringField s) {
|
||||
if (s == null || StringUtils.isBlank(s.getValue())) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final Field<String> stringField = new Field<>();
|
||||
stringField.setValue(s.getValue());
|
||||
stringField.setDataInfo(mapDataInfo(s.getDataInfo()));
|
||||
|
@ -618,19 +632,16 @@ public class ProtoConverter implements Serializable {
|
|||
}
|
||||
|
||||
public static Field<Boolean> mapBoolField(FieldTypeProtos.BoolField b) {
|
||||
if (b == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final Field<Boolean> booleanField = new Field<>();
|
||||
booleanField.setValue(b.getValue());
|
||||
booleanField.setDataInfo(mapDataInfo(b.getDataInfo()));
|
||||
return booleanField;
|
||||
}
|
||||
|
||||
public static Field<Integer> mapIntField(FieldTypeProtos.IntField b) {
|
||||
final Field<Integer> entity = new Field<>();
|
||||
entity.setValue(b.getValue());
|
||||
entity.setDataInfo(mapDataInfo(b.getDataInfo()));
|
||||
return entity;
|
||||
}
|
||||
|
||||
public static Journal mapJournal(FieldTypeProtos.Journal j) {
|
||||
final Journal journal = new Journal();
|
||||
journal.setConferencedate(j.getConferencedate());
|
||||
|
|
|
@ -50,8 +50,7 @@ import eu.dnetlib.dhp.schema.oaf.Result;
|
|||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
||||
implements Closeable {
|
||||
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable {
|
||||
|
||||
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
|
||||
|
||||
|
@ -128,9 +127,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
|||
}
|
||||
|
||||
public List<Oaf> processDatasource(final ResultSet rs) {
|
||||
|
||||
try {
|
||||
|
||||
final DataInfo info = prepareDataInfo(rs);
|
||||
|
||||
final Datasource ds = new Datasource();
|
||||
|
@ -194,7 +191,6 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
|||
|
||||
public List<Oaf> processProject(final ResultSet rs) {
|
||||
try {
|
||||
|
||||
final DataInfo info = prepareDataInfo(rs);
|
||||
|
||||
final Project p = new Project();
|
||||
|
@ -249,9 +245,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
|||
}
|
||||
|
||||
public List<Oaf> processOrganization(final ResultSet rs) {
|
||||
|
||||
try {
|
||||
|
||||
final DataInfo info = prepareDataInfo(rs);
|
||||
|
||||
final Organization o = new Organization();
|
||||
|
@ -370,14 +364,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
|||
|
||||
final DataInfo info = dataInfo(
|
||||
false, null, false, false,
|
||||
|
||||
qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), "0.9");
|
||||
|
||||
final List<KeyValue> collectedFrom = listKeyValues(
|
||||
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
|
||||
|
||||
try {
|
||||
|
||||
if (rs.getString(SOURCE_TYPE).equals("context")) {
|
||||
final Result r;
|
||||
|
||||
|
@ -461,9 +453,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
|||
final Boolean inferred = rs.getBoolean("inferred");
|
||||
final String trust = rs.getString("trust");
|
||||
return dataInfo(
|
||||
|
||||
deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust);
|
||||
|
||||
deletedbyinference,
|
||||
inferenceprovenance,
|
||||
inferred,
|
||||
false,
|
||||
ENTITYREGISTRY_PROVENANCE_ACTION,
|
||||
trust);
|
||||
}
|
||||
|
||||
private Qualifier prepareQualifierSplitting(final String s) {
|
||||
|
@ -535,4 +530,5 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
|||
super.close();
|
||||
dbClient.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@ import org.apache.commons.lang3.StringUtils;
|
|||
import org.dom4j.Document;
|
||||
import org.dom4j.Node;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||
|
@ -44,9 +45,24 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
for (final Object o : doc.selectNodes("//datacite:creator")) {
|
||||
final Node n = (Node) o;
|
||||
final Author author = new Author();
|
||||
author.setFullname(n.valueOf("./datacite:creatorName"));
|
||||
author.setName(n.valueOf("./datacite:givenName"));
|
||||
author.setSurname(n.valueOf("./datacite:familyName"));
|
||||
final String fullname = n.valueOf("./datacite:creatorName");
|
||||
author.setFullname(fullname);
|
||||
|
||||
PacePerson pp = new PacePerson(fullname, false);
|
||||
final String name = n.valueOf("./datacite:givenName");
|
||||
if (StringUtils.isBlank(name) & pp.isAccurate()) {
|
||||
author.setName(pp.getNormalisedFirstName());
|
||||
} else {
|
||||
author.setName(name);
|
||||
}
|
||||
|
||||
final String surname = n.valueOf("./datacite:familyName");
|
||||
if (StringUtils.isBlank(surname) & pp.isAccurate()) {
|
||||
author.setSurname(pp.getNormalisedSurname());
|
||||
} else {
|
||||
author.setSurname(surname);
|
||||
}
|
||||
|
||||
author.setAffiliation(prepareListFields(doc, "./datacite:affiliation", info));
|
||||
author.setPid(preparePids(doc, info));
|
||||
author.setRank(pos++);
|
||||
|
@ -77,8 +93,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
final KeyValue hostedby) {
|
||||
|
||||
final Instance instance = new Instance();
|
||||
final Set<String> url = new HashSet<>();
|
||||
instance.setUrl(new ArrayList<>());
|
||||
instance
|
||||
.setInstancetype(
|
||||
prepareQualifier(
|
||||
|
@ -97,6 +111,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
.setProcessingchargecurrency(
|
||||
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||
|
||||
final Set<String> url = new HashSet<>();
|
||||
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
|
||||
url.add(((Node) o).getText().trim());
|
||||
}
|
||||
|
@ -109,7 +124,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) {
|
||||
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
|
||||
}
|
||||
instance.getUrl().addAll(url);
|
||||
if (!url.isEmpty()) {
|
||||
instance.setUrl(new ArrayList<>());
|
||||
instance.getUrl().addAll(url);
|
||||
}
|
||||
return Arrays.asList(instance);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.raw.common;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.Normalizer;
|
||||
import java.util.HashSet;
|
||||
|
|
Loading…
Reference in New Issue