forked from antonis.lempesis/dnet-hadoop
revised mapping: more accurate mapping for name/surname from datacite format; improved mapping of null values
This commit is contained in:
parent
ab37953332
commit
f044d09315
|
@ -523,7 +523,9 @@ public class ProtoConverter implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Context mapContext(ResultProtos.Result.Context context) {
|
private static Context mapContext(ResultProtos.Result.Context context) {
|
||||||
|
if (context == null || StringUtils.isBlank(context.getId())) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
final Context entity = new Context();
|
final Context entity = new Context();
|
||||||
entity.setId(context.getId());
|
entity.setId(context.getId());
|
||||||
entity
|
entity
|
||||||
|
@ -537,6 +539,10 @@ public class ProtoConverter implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
|
public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
|
||||||
|
if (kv == null || StringUtils.isBlank(kv.getKey()) & StringUtils.isBlank(kv.getValue())) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final KeyValue keyValue = new KeyValue();
|
final KeyValue keyValue = new KeyValue();
|
||||||
keyValue.setKey(kv.getKey());
|
keyValue.setKey(kv.getKey());
|
||||||
keyValue.setValue(kv.getValue());
|
keyValue.setValue(kv.getValue());
|
||||||
|
@ -575,6 +581,10 @@ public class ProtoConverter implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) {
|
public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) {
|
||||||
|
if (sp == null | StringUtils.isBlank(sp.getValue())) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final StructuredProperty structuredProperty = new StructuredProperty();
|
final StructuredProperty structuredProperty = new StructuredProperty();
|
||||||
structuredProperty.setValue(sp.getValue());
|
structuredProperty.setValue(sp.getValue());
|
||||||
structuredProperty.setQualifier(mapQualifier(sp.getQualifier()));
|
structuredProperty.setQualifier(mapQualifier(sp.getQualifier()));
|
||||||
|
@ -611,6 +621,10 @@ public class ProtoConverter implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Field<String> mapStringField(FieldTypeProtos.StringField s) {
|
public static Field<String> mapStringField(FieldTypeProtos.StringField s) {
|
||||||
|
if (s == null || StringUtils.isBlank(s.getValue())) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final Field<String> stringField = new Field<>();
|
final Field<String> stringField = new Field<>();
|
||||||
stringField.setValue(s.getValue());
|
stringField.setValue(s.getValue());
|
||||||
stringField.setDataInfo(mapDataInfo(s.getDataInfo()));
|
stringField.setDataInfo(mapDataInfo(s.getDataInfo()));
|
||||||
|
@ -618,19 +632,16 @@ public class ProtoConverter implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Field<Boolean> mapBoolField(FieldTypeProtos.BoolField b) {
|
public static Field<Boolean> mapBoolField(FieldTypeProtos.BoolField b) {
|
||||||
|
if (b == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final Field<Boolean> booleanField = new Field<>();
|
final Field<Boolean> booleanField = new Field<>();
|
||||||
booleanField.setValue(b.getValue());
|
booleanField.setValue(b.getValue());
|
||||||
booleanField.setDataInfo(mapDataInfo(b.getDataInfo()));
|
booleanField.setDataInfo(mapDataInfo(b.getDataInfo()));
|
||||||
return booleanField;
|
return booleanField;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Field<Integer> mapIntField(FieldTypeProtos.IntField b) {
|
|
||||||
final Field<Integer> entity = new Field<>();
|
|
||||||
entity.setValue(b.getValue());
|
|
||||||
entity.setDataInfo(mapDataInfo(b.getDataInfo()));
|
|
||||||
return entity;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Journal mapJournal(FieldTypeProtos.Journal j) {
|
public static Journal mapJournal(FieldTypeProtos.Journal j) {
|
||||||
final Journal journal = new Journal();
|
final Journal journal = new Journal();
|
||||||
journal.setConferencedate(j.getConferencedate());
|
journal.setConferencedate(j.getConferencedate());
|
||||||
|
|
|
@ -50,8 +50,7 @@ import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable {
|
||||||
implements Closeable {
|
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
|
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
|
||||||
|
|
||||||
|
@ -128,9 +127,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Oaf> processDatasource(final ResultSet rs) {
|
public List<Oaf> processDatasource(final ResultSet rs) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
final DataInfo info = prepareDataInfo(rs);
|
final DataInfo info = prepareDataInfo(rs);
|
||||||
|
|
||||||
final Datasource ds = new Datasource();
|
final Datasource ds = new Datasource();
|
||||||
|
@ -194,7 +191,6 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
||||||
|
|
||||||
public List<Oaf> processProject(final ResultSet rs) {
|
public List<Oaf> processProject(final ResultSet rs) {
|
||||||
try {
|
try {
|
||||||
|
|
||||||
final DataInfo info = prepareDataInfo(rs);
|
final DataInfo info = prepareDataInfo(rs);
|
||||||
|
|
||||||
final Project p = new Project();
|
final Project p = new Project();
|
||||||
|
@ -249,9 +245,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Oaf> processOrganization(final ResultSet rs) {
|
public List<Oaf> processOrganization(final ResultSet rs) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
final DataInfo info = prepareDataInfo(rs);
|
final DataInfo info = prepareDataInfo(rs);
|
||||||
|
|
||||||
final Organization o = new Organization();
|
final Organization o = new Organization();
|
||||||
|
@ -370,14 +364,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
||||||
|
|
||||||
final DataInfo info = dataInfo(
|
final DataInfo info = dataInfo(
|
||||||
false, null, false, false,
|
false, null, false, false,
|
||||||
|
|
||||||
qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), "0.9");
|
qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), "0.9");
|
||||||
|
|
||||||
final List<KeyValue> collectedFrom = listKeyValues(
|
final List<KeyValue> collectedFrom = listKeyValues(
|
||||||
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
|
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
if (rs.getString(SOURCE_TYPE).equals("context")) {
|
if (rs.getString(SOURCE_TYPE).equals("context")) {
|
||||||
final Result r;
|
final Result r;
|
||||||
|
|
||||||
|
@ -461,9 +453,12 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
||||||
final Boolean inferred = rs.getBoolean("inferred");
|
final Boolean inferred = rs.getBoolean("inferred");
|
||||||
final String trust = rs.getString("trust");
|
final String trust = rs.getString("trust");
|
||||||
return dataInfo(
|
return dataInfo(
|
||||||
|
deletedbyinference,
|
||||||
deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust);
|
inferenceprovenance,
|
||||||
|
inferred,
|
||||||
|
false,
|
||||||
|
ENTITYREGISTRY_PROVENANCE_ACTION,
|
||||||
|
trust);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Qualifier prepareQualifierSplitting(final String s) {
|
private Qualifier prepareQualifierSplitting(final String s) {
|
||||||
|
@ -535,4 +530,5 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
|
||||||
super.close();
|
super.close();
|
||||||
dbClient.close();
|
dbClient.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,6 +12,7 @@ import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
@ -44,9 +45,24 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
for (final Object o : doc.selectNodes("//datacite:creator")) {
|
for (final Object o : doc.selectNodes("//datacite:creator")) {
|
||||||
final Node n = (Node) o;
|
final Node n = (Node) o;
|
||||||
final Author author = new Author();
|
final Author author = new Author();
|
||||||
author.setFullname(n.valueOf("./datacite:creatorName"));
|
final String fullname = n.valueOf("./datacite:creatorName");
|
||||||
author.setName(n.valueOf("./datacite:givenName"));
|
author.setFullname(fullname);
|
||||||
author.setSurname(n.valueOf("./datacite:familyName"));
|
|
||||||
|
PacePerson pp = new PacePerson(fullname, false);
|
||||||
|
final String name = n.valueOf("./datacite:givenName");
|
||||||
|
if (StringUtils.isBlank(name) & pp.isAccurate()) {
|
||||||
|
author.setName(pp.getNormalisedFirstName());
|
||||||
|
} else {
|
||||||
|
author.setName(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
final String surname = n.valueOf("./datacite:familyName");
|
||||||
|
if (StringUtils.isBlank(surname) & pp.isAccurate()) {
|
||||||
|
author.setSurname(pp.getNormalisedSurname());
|
||||||
|
} else {
|
||||||
|
author.setSurname(surname);
|
||||||
|
}
|
||||||
|
|
||||||
author.setAffiliation(prepareListFields(doc, "./datacite:affiliation", info));
|
author.setAffiliation(prepareListFields(doc, "./datacite:affiliation", info));
|
||||||
author.setPid(preparePids(doc, info));
|
author.setPid(preparePids(doc, info));
|
||||||
author.setRank(pos++);
|
author.setRank(pos++);
|
||||||
|
@ -77,8 +93,6 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
final KeyValue hostedby) {
|
final KeyValue hostedby) {
|
||||||
|
|
||||||
final Instance instance = new Instance();
|
final Instance instance = new Instance();
|
||||||
final Set<String> url = new HashSet<>();
|
|
||||||
instance.setUrl(new ArrayList<>());
|
|
||||||
instance
|
instance
|
||||||
.setInstancetype(
|
.setInstancetype(
|
||||||
prepareQualifier(
|
prepareQualifier(
|
||||||
|
@ -97,6 +111,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
.setProcessingchargecurrency(
|
.setProcessingchargecurrency(
|
||||||
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||||
|
|
||||||
|
final Set<String> url = new HashSet<>();
|
||||||
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
|
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
|
||||||
url.add(((Node) o).getText().trim());
|
url.add(((Node) o).getText().trim());
|
||||||
}
|
}
|
||||||
|
@ -109,7 +124,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) {
|
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) {
|
||||||
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
|
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
|
||||||
}
|
}
|
||||||
instance.getUrl().addAll(url);
|
if (!url.isEmpty()) {
|
||||||
|
instance.setUrl(new ArrayList<>());
|
||||||
|
instance.getUrl().addAll(url);
|
||||||
|
}
|
||||||
return Arrays.asList(instance);
|
return Arrays.asList(instance);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw.common;
|
package eu.dnetlib.dhp.oa.graph.raw.common;
|
||||||
|
|
||||||
import java.nio.charset.Charset;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.text.Normalizer;
|
import java.text.Normalizer;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
|
Loading…
Reference in New Issue