Merge branch 'master' into provision_indexing

This commit is contained in:
Claudio Atzori 2020-05-22 12:35:41 +02:00
commit 946598cfba
7 changed files with 367 additions and 294 deletions

View File

@ -1,159 +1,164 @@
package eu.dnetlib.dhp.oa.dedup;
import com.wcohen.ss.JaroWinkler; package eu.dnetlib.dhp.oa.dedup;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.pace.model.Person;
import org.apache.commons.lang3.StringUtils;
import scala.Tuple2;
import java.text.Normalizer; import java.text.Normalizer;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import com.wcohen.ss.JaroWinkler;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.pace.model.Person;
import scala.Tuple2;
public class AuthorMerger { public class AuthorMerger {
private static final Double THRESHOLD = 0.95; private static final Double THRESHOLD = 0.95;
public static List<Author> merge(List<List<Author>> authors){ public static List<Author> merge(List<List<Author>> authors) {
authors.sort(new Comparator<List<Author>>() { authors.sort(new Comparator<List<Author>>() {
@Override @Override
public int compare(List<Author> o1, List<Author> o2) { public int compare(List<Author> o1, List<Author> o2) {
return -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)); return -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2));
} }
}); });
List<Author> author = new ArrayList<>(); List<Author> author = new ArrayList<>();
for(List<Author> a : authors){ for (List<Author> a : authors) {
author = mergeAuthor(author, a); author = mergeAuthor(author, a);
} }
return author; return author;
} }
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) { public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
int pa = countAuthorsPids(a); int pa = countAuthorsPids(a);
int pb = countAuthorsPids(b); int pb = countAuthorsPids(b);
List<Author> base, enrich; List<Author> base, enrich;
int sa = authorsSize(a); int sa = authorsSize(a);
int sb = authorsSize(b); int sb = authorsSize(b);
if (pa == pb) { if (pa == pb) {
base = sa > sb ? a : b; base = sa > sb ? a : b;
enrich = sa > sb ? b : a; enrich = sa > sb ? b : a;
} else { } else {
base = pa > pb ? a : b; base = pa > pb ? a : b;
enrich = pa > pb ? b : a; enrich = pa > pb ? b : a;
} }
enrichPidFromList(base, enrich); enrichPidFromList(base, enrich);
return base; return base;
} }
private static void enrichPidFromList(List<Author> base, List<Author> enrich) { private static void enrichPidFromList(List<Author> base, List<Author> enrich) {
if (base == null || enrich == null) if (base == null || enrich == null)
return; return;
final Map<String, Author> basePidAuthorMap = base final Map<String, Author> basePidAuthorMap = base
.stream() .stream()
.filter(a -> a.getPid() != null && a.getPid().size() > 0) .filter(a -> a.getPid() != null && a.getPid().size() > 0)
.flatMap( .flatMap(
a -> a a -> a
.getPid() .getPid()
.stream() .stream()
.map(p -> new Tuple2<>(pidToComparableString(p), a))) .map(p -> new Tuple2<>(pidToComparableString(p), a)))
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
.stream() .stream()
.filter(a -> a.getPid() != null && a.getPid().size() > 0) .filter(a -> a.getPid() != null && a.getPid().size() > 0)
.flatMap( .flatMap(
a -> a a -> a
.getPid() .getPid()
.stream() .stream()
.filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p))) .filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p)))
.map(p -> new Tuple2<>(p, a))) .map(p -> new Tuple2<>(p, a)))
.collect(Collectors.toList()); .collect(Collectors.toList());
pidToEnrich pidToEnrich
.forEach( .forEach(
a -> { a -> {
Optional<Tuple2<Double, Author>> simAuthor = base Optional<Tuple2<Double, Author>> simAuthor = base
.stream() .stream()
.map(ba -> new Tuple2<>(sim(ba, a._2()), ba)) .map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
.max(Comparator.comparing(Tuple2::_1)); .max(Comparator.comparing(Tuple2::_1));
if (simAuthor.isPresent() && simAuthor.get()._1() > THRESHOLD) { if (simAuthor.isPresent() && simAuthor.get()._1() > THRESHOLD) {
Author r = simAuthor.get()._2(); Author r = simAuthor.get()._2();
if (r.getPid() == null) { if (r.getPid() == null) {
r.setPid(new ArrayList<>()); r.setPid(new ArrayList<>());
} }
r.getPid().add(a._1()); r.getPid().add(a._1());
} }
}); });
} }
public static String pidToComparableString(StructuredProperty pid){ public static String pidToComparableString(StructuredProperty pid) {
return (pid.getQualifier()!=null? pid.getQualifier().getClassid()!=null?pid.getQualifier().getClassid().toLowerCase():"":"") + (pid.getValue()!=null? pid.getValue().toLowerCase():""); return (pid.getQualifier() != null
} ? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : ""
: "") + (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
}
public static int countAuthorsPids(List<Author> authors) { public static int countAuthorsPids(List<Author> authors) {
if (authors == null) if (authors == null)
return 0; return 0;
return (int) authors.stream().filter(AuthorMerger::hasPid).count(); return (int) authors.stream().filter(AuthorMerger::hasPid).count();
} }
private static int authorsSize(List<Author> authors) { private static int authorsSize(List<Author> authors) {
if (authors == null) if (authors == null)
return 0; return 0;
return authors.size(); return authors.size();
} }
private static Double sim(Author a, Author b) { private static Double sim(Author a, Author b) {
final Person pa = parse(a); final Person pa = parse(a);
final Person pb = parse(b); final Person pb = parse(b);
if (pa.isAccurate() & pb.isAccurate()) { if (pa.isAccurate() & pb.isAccurate()) {
return new JaroWinkler() return new JaroWinkler()
.score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())); .score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString()));
} else { } else {
return new JaroWinkler() return new JaroWinkler()
.score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname())); .score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
} }
} }
private static boolean hasPid(Author a) { private static boolean hasPid(Author a) {
if (a == null || a.getPid() == null || a.getPid().size() == 0) if (a == null || a.getPid() == null || a.getPid().size() == 0)
return false; return false;
return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue())); return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue()));
} }
private static Person parse(Author author) { private static Person parse(Author author) {
if (StringUtils.isNotBlank(author.getSurname())) { if (StringUtils.isNotBlank(author.getSurname())) {
return new Person(author.getSurname() + ", " + author.getName(), false); return new Person(author.getSurname() + ", " + author.getName(), false);
} else { } else {
return new Person(author.getFullname(), false); return new Person(author.getFullname(), false);
} }
} }
private static String normalize(final String s) { private static String normalize(final String s) {
return nfd(s) return nfd(s)
.toLowerCase() .toLowerCase()
// do not compact the regexes in a single expression, would cause StackOverflowError // do not compact the regexes in a single expression, would cause StackOverflowError
// in case // in case
// of large input strings // of large input strings
.replaceAll("(\\W)+", " ") .replaceAll("(\\W)+", " ")
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
.replaceAll("(\\p{Punct})+", " ") .replaceAll("(\\p{Punct})+", " ")
.replaceAll("(\\d)+", " ") .replaceAll("(\\d)+", " ")
.replaceAll("(\\n)+", " ") .replaceAll("(\\n)+", " ")
.trim(); .trim();
} }
private static String nfd(final String s) { private static String nfd(final String s) {
return Normalizer.normalize(s, Normalizer.Form.NFD); return Normalizer.normalize(s, Normalizer.Form.NFD);
} }
} }

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.oa.dedup; package eu.dnetlib.dhp.oa.dedup;
import java.io.Serializable; import java.io.Serializable;
@ -73,7 +74,8 @@ public class DedupRecordFactory {
} }
public static <T extends OafEntity> T entityMerger( public static <T extends OafEntity> T entityMerger(
String id, Iterator<Tuple2<String, T>> entities, long ts, DataInfo dataInfo, Class<T> clazz) throws IllegalAccessException, InstantiationException { String id, Iterator<Tuple2<String, T>> entities, long ts, DataInfo dataInfo, Class<T> clazz)
throws IllegalAccessException, InstantiationException {
T entity = clazz.newInstance(); T entity = clazz.newInstance();
@ -87,14 +89,14 @@ public class DedupRecordFactory {
entity.mergeFrom(duplicate); entity.mergeFrom(duplicate);
if (ModelSupport.isSubClass(duplicate, Result.class)) { if (ModelSupport.isSubClass(duplicate, Result.class)) {
Result r1 = (Result) duplicate; Result r1 = (Result) duplicate;
if (r1.getAuthor() != null && r1.getAuthor().size()>0) if (r1.getAuthor() != null && r1.getAuthor().size() > 0)
authors.add(r1.getAuthor()); authors.add(r1.getAuthor());
if (r1.getDateofacceptance() != null) if (r1.getDateofacceptance() != null)
dates.add(r1.getDateofacceptance().getValue()); dates.add(r1.getDateofacceptance().getValue());
} }
}); });
//set authors and date // set authors and date
if (ModelSupport.isSubClass(entity, Result.class)) { if (ModelSupport.isSubClass(entity, Result.class)) {
((Result) entity).setDateofacceptance(DatePicker.pick(dates)); ((Result) entity).setDateofacceptance(DatePicker.pick(dates));
((Result) entity).setAuthor(AuthorMerger.merge(authors)); ((Result) entity).setAuthor(AuthorMerger.merge(authors));

View File

@ -1,5 +1,8 @@
package eu.dnetlib.dhp.oa.dedup; package eu.dnetlib.dhp.oa.dedup;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.FileReader; import java.io.FileReader;
import java.io.IOException; import java.io.IOException;
@ -7,15 +10,13 @@ import java.io.Serializable;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.*; import java.util.*;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.pace.util.MapDocumentUtil;
import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.map.ObjectMapper;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import scala.Tuple2;
import static org.junit.jupiter.api.Assertions.assertEquals; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.pace.util.MapDocumentUtil;
import scala.Tuple2;
public class EntityMergerTest implements Serializable { public class EntityMergerTest implements Serializable {
@ -30,9 +31,9 @@ public class EntityMergerTest implements Serializable {
public void setUp() throws Exception { public void setUp() throws Exception {
testEntityBasePath = Paths testEntityBasePath = Paths
.get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/json").toURI()) .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/json").toURI())
.toFile() .toFile()
.getAbsolutePath(); .getAbsolutePath();
publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class); publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class);
@ -45,7 +46,8 @@ public class EntityMergerTest implements Serializable {
@Test @Test
public void publicationMergerTest() throws InstantiationException, IllegalAccessException { public void publicationMergerTest() throws InstantiationException, IllegalAccessException {
Publication pub_merged = DedupRecordFactory.entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class); Publication pub_merged = DedupRecordFactory
.entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class);
assertEquals(dedupId, pub_merged.getId()); assertEquals(dedupId, pub_merged.getId());
@ -59,36 +61,36 @@ public class EntityMergerTest implements Serializable {
assertEquals(pub_merged.getDateoftransformation(), pub_top.getDateoftransformation()); assertEquals(pub_merged.getDateoftransformation(), pub_top.getDateoftransformation());
assertEquals(pub_merged.getOaiprovenance(), pub_top.getOaiprovenance()); assertEquals(pub_merged.getOaiprovenance(), pub_top.getOaiprovenance());
assertEquals(pub_merged.getDateofcollection(), pub_top.getDateofcollection()); assertEquals(pub_merged.getDateofcollection(), pub_top.getDateofcollection());
assertEquals(pub_merged.getInstance().size(),3); assertEquals(pub_merged.getInstance().size(), 3);
assertEquals(pub_merged.getCountry().size(), 2); assertEquals(pub_merged.getCountry().size(), 2);
assertEquals(pub_merged.getSubject().size(), 0); assertEquals(pub_merged.getSubject().size(), 0);
assertEquals(pub_merged.getTitle().size(), 2); assertEquals(pub_merged.getTitle().size(), 2);
assertEquals(pub_merged.getRelevantdate().size(),0); assertEquals(pub_merged.getRelevantdate().size(), 0);
assertEquals(pub_merged.getDescription().size(),0); assertEquals(pub_merged.getDescription().size(), 0);
assertEquals(pub_merged.getSource().size(),0); assertEquals(pub_merged.getSource().size(), 0);
assertEquals(pub_merged.getFulltext().size(),0); assertEquals(pub_merged.getFulltext().size(), 0);
assertEquals(pub_merged.getFormat().size(),0); assertEquals(pub_merged.getFormat().size(), 0);
assertEquals(pub_merged.getContributor().size(),0); assertEquals(pub_merged.getContributor().size(), 0);
assertEquals(pub_merged.getCoverage().size(),0); assertEquals(pub_merged.getCoverage().size(), 0);
assertEquals(pub_merged.getContext().size(),0); assertEquals(pub_merged.getContext().size(), 0);
assertEquals(pub_merged.getExternalReference().size(),0); assertEquals(pub_merged.getExternalReference().size(), 0);
assertEquals(pub_merged.getOriginalId().size(),3); assertEquals(pub_merged.getOriginalId().size(), 3);
assertEquals(pub_merged.getCollectedfrom().size(),3); assertEquals(pub_merged.getCollectedfrom().size(), 3);
assertEquals(pub_merged.getPid().size(),1); assertEquals(pub_merged.getPid().size(), 1);
assertEquals(pub_merged.getExtraInfo().size(),0); assertEquals(pub_merged.getExtraInfo().size(), 0);
//verify datainfo // verify datainfo
assertEquals(pub_merged.getDataInfo(), dataInfo); assertEquals(pub_merged.getDataInfo(), dataInfo);
//verify datepicker // verify datepicker
assertEquals(pub_merged.getDateofacceptance().getValue(), "2018-09-30"); assertEquals(pub_merged.getDateofacceptance().getValue(), "2018-09-30");
//verify authors // verify authors
assertEquals(pub_merged.getAuthor().size(), 9); assertEquals(pub_merged.getAuthor().size(), 9);
assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4); assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4);
} }
public DataInfo setDI(){ public DataInfo setDI() {
DataInfo dataInfo = new DataInfo(); DataInfo dataInfo = new DataInfo();
dataInfo.setTrust("0.9"); dataInfo.setTrust("0.9");
dataInfo.setDeletedbyinference(false); dataInfo.setDeletedbyinference(false);
@ -97,13 +99,13 @@ public class EntityMergerTest implements Serializable {
return dataInfo; return dataInfo;
} }
public Publication getTopPub(List<Tuple2<String, Publication>> publications){ public Publication getTopPub(List<Tuple2<String, Publication>> publications) {
Double maxTrust = 0.0; Double maxTrust = 0.0;
Publication maxPub = new Publication(); Publication maxPub = new Publication();
for (Tuple2<String, Publication> publication : publications) { for (Tuple2<String, Publication> publication : publications) {
Double pubTrust = Double.parseDouble(publication._2().getDataInfo().getTrust()); Double pubTrust = Double.parseDouble(publication._2().getDataInfo().getTrust());
if(pubTrust > maxTrust){ if (pubTrust > maxTrust) {
maxTrust = pubTrust; maxTrust = pubTrust;
maxPub = publication._2(); maxPub = publication._2();
} }
@ -118,11 +120,11 @@ public class EntityMergerTest implements Serializable {
reader = new BufferedReader(new FileReader(path)); reader = new BufferedReader(new FileReader(path));
String line = reader.readLine(); String line = reader.readLine();
while (line != null) { while (line != null) {
res.add( res
.add(
new Tuple2<>( new Tuple2<>(
MapDocumentUtil.getJPathString("$.id", line), MapDocumentUtil.getJPathString("$.id", line),
new ObjectMapper().readValue(line, clazz)) new ObjectMapper().readValue(line, clazz)));
);
// read next line // read next line
line = reader.readLine(); line = reader.readLine();
} }
@ -134,5 +136,4 @@ public class EntityMergerTest implements Serializable {
return res; return res;
} }
} }

View File

@ -10,7 +10,16 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -50,6 +59,10 @@ public abstract class AbstractMdRecordToOafMapper {
protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4"; protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4";
protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3";
protected static final Qualifier ORCID_PID_TYPE = qualifier(
"ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES);
protected static final Qualifier MAG_PID_TYPE = qualifier(
"MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES);
protected static final Map<String, String> nsContext = new HashMap<>(); protected static final Map<String, String> nsContext = new HashMap<>();
@ -75,8 +88,7 @@ public abstract class AbstractMdRecordToOafMapper {
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
final Document doc = DocumentHelper final Document doc = DocumentHelper
.parseText( .parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3));
xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3));
final String type = doc.valueOf("//dr:CobjCategory/@type"); final String type = doc.valueOf("//dr:CobjCategory/@type");
final KeyValue collectedFrom = getProvenanceDatasource( final KeyValue collectedFrom = getProvenanceDatasource(
@ -103,7 +115,7 @@ public abstract class AbstractMdRecordToOafMapper {
} }
} }
private KeyValue getProvenanceDatasource(Document doc, String xpathId, String xpathName) { private KeyValue getProvenanceDatasource(final Document doc, final String xpathId, final String xpathName) {
final String dsId = doc.valueOf(xpathId); final String dsId = doc.valueOf(xpathId);
final String dsName = doc.valueOf(xpathName); final String dsName = doc.valueOf(xpathName);
@ -111,9 +123,7 @@ public abstract class AbstractMdRecordToOafMapper {
return null; return null;
} }
return keyValue( return keyValue(createOpenaireId(10, dsId, true), dsName);
createOpenaireId(10, dsId, true),
dsName);
} }
protected List<Oaf> createOafs( protected List<Oaf> createOafs(
@ -211,8 +221,14 @@ public abstract class AbstractMdRecordToOafMapper {
return res; return res;
} }
protected Relation getRelation(String source, String target, String relType, String subRelType, String relClass, protected Relation getRelation(final String source,
KeyValue collectedFrom, DataInfo info, long lastUpdateTimestamp) { final String target,
final String relType,
final String subRelType,
final String relClass,
final KeyValue collectedFrom,
final DataInfo info,
final long lastUpdateTimestamp) {
final Relation rel = new Relation(); final Relation rel = new Relation();
rel.setRelType(relType); rel.setRelType(relType);
rel.setSubRelType(subRelType); rel.setSubRelType(subRelType);
@ -289,7 +305,10 @@ public abstract class AbstractMdRecordToOafMapper {
protected abstract Qualifier prepareResourceType(Document doc, DataInfo info); protected abstract Qualifier prepareResourceType(Document doc, DataInfo info);
protected abstract List<Instance> prepareInstances( protected abstract List<Instance> prepareInstances(
Document doc, DataInfo info, KeyValue collectedfrom, KeyValue hostedby); Document doc,
DataInfo info,
KeyValue collectedfrom,
KeyValue hostedby);
protected abstract List<Field<String>> prepareSources(Document doc, DataInfo info); protected abstract List<Field<String>> prepareSources(Document doc, DataInfo info);
@ -314,13 +333,16 @@ public abstract class AbstractMdRecordToOafMapper {
protected abstract List<Author> prepareAuthors(Document doc, DataInfo info); protected abstract List<Author> prepareAuthors(Document doc, DataInfo info);
protected abstract List<Field<String>> prepareOtherResearchProductTools( protected abstract List<Field<String>> prepareOtherResearchProductTools(
Document doc, DataInfo info); Document doc,
DataInfo info);
protected abstract List<Field<String>> prepareOtherResearchProductContactGroups( protected abstract List<Field<String>> prepareOtherResearchProductContactGroups(
Document doc, DataInfo info); Document doc,
DataInfo info);
protected abstract List<Field<String>> prepareOtherResearchProductContactPersons( protected abstract List<Field<String>> prepareOtherResearchProductContactPersons(
Document doc, DataInfo info); Document doc,
DataInfo info);
protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info); protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info);
@ -329,7 +351,8 @@ public abstract class AbstractMdRecordToOafMapper {
protected abstract List<StructuredProperty> prepareSoftwareLicenses(Document doc, DataInfo info); protected abstract List<StructuredProperty> prepareSoftwareLicenses(Document doc, DataInfo info);
protected abstract List<Field<String>> prepareSoftwareDocumentationUrls( protected abstract List<Field<String>> prepareSoftwareDocumentationUrls(
Document doc, DataInfo info); Document doc,
DataInfo info);
protected abstract List<GeoLocation> prepareDatasetGeoLocations(Document doc, DataInfo info); protected abstract List<GeoLocation> prepareDatasetGeoLocations(Document doc, DataInfo info);
@ -358,26 +381,17 @@ public abstract class AbstractMdRecordToOafMapper {
final String vol = n.valueOf("@vol"); final String vol = n.valueOf("@vol");
final String edition = n.valueOf("@edition"); final String edition = n.valueOf("@edition");
if (StringUtils.isNotBlank(name)) { if (StringUtils.isNotBlank(name)) {
return journal( return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info);
name,
issnPrinted,
issnOnline,
issnLinking,
ep,
iss,
sp,
vol,
edition,
null,
null,
info);
} }
} }
return null; return null;
} }
protected Qualifier prepareQualifier( protected Qualifier prepareQualifier(
final Node node, final String xpath, final String schemeId, final String schemeName) { final Node node,
final String xpath,
final String schemeId,
final String schemeName) {
final String classId = node.valueOf(xpath); final String classId = node.valueOf(xpath);
final String className = code2name.get(classId); final String className = code2name.get(classId);
return qualifier(classId, className, schemeId, schemeName); return qualifier(classId, className, schemeId, schemeName);
@ -401,7 +415,10 @@ public abstract class AbstractMdRecordToOafMapper {
} }
protected List<StructuredProperty> prepareListStructProps( protected List<StructuredProperty> prepareListStructProps(
final Node node, final String xpath, final Qualifier qualifier, final DataInfo info) { final Node node,
final String xpath,
final Qualifier qualifier,
final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>(); final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : node.selectNodes(xpath)) { for (final Object o : node.selectNodes(xpath)) {
final Node n = (Node) o; final Node n = (Node) o;
@ -411,19 +428,17 @@ public abstract class AbstractMdRecordToOafMapper {
} }
protected List<StructuredProperty> prepareListStructProps( protected List<StructuredProperty> prepareListStructProps(
final Node node, final String xpath, final DataInfo info) { final Node node,
final String xpath,
final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>(); final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : node.selectNodes(xpath)) { for (final Object o : node.selectNodes(xpath)) {
final Node n = (Node) o; final Node n = (Node) o;
res res
.add( .add(
structuredProperty( structuredProperty(
n.getText(), n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"),
n.valueOf("@classid"), n.valueOf("@schemename"), info));
n.valueOf("@classname"),
n.valueOf("@schemeid"),
n.valueOf("@schemename"),
info));
} }
return res; return res;
} }
@ -449,8 +464,7 @@ public abstract class AbstractMdRecordToOafMapper {
final Node n = doc.selectSingleNode("//oaf:datainfo"); final Node n = doc.selectSingleNode("//oaf:datainfo");
if (n == null) { if (n == null) {
return dataInfo( return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9");
false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9");
} }
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
@ -464,12 +478,8 @@ public abstract class AbstractMdRecordToOafMapper {
final String trust = n.valueOf("./oaf:trust"); final String trust = n.valueOf("./oaf:trust");
return dataInfo( return dataInfo(
deletedbyinference, deletedbyinference, inferenceprovenance, inferred, false,
inferenceprovenance, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust);
inferred,
false,
qualifier(paClassId, paClassName, paSchemeId, paSchemeName),
trust);
} }
protected Field<String> prepareField(final Node node, final String xpath, final DataInfo info) { protected Field<String> prepareField(final Node node, final String xpath, final DataInfo info) {
@ -477,7 +487,9 @@ public abstract class AbstractMdRecordToOafMapper {
} }
protected List<Field<String>> prepareListFields( protected List<Field<String>> prepareListFields(
final Node node, final String xpath, final DataInfo info) { final Node node,
final String xpath,
final DataInfo info) {
return listFields(info, prepareListString(node, xpath)); return listFields(info, prepareListString(node, xpath));
} }

View File

@ -1,10 +1,19 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DATASET;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
import java.util.*; import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -15,8 +24,15 @@ import org.dom4j.Node;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class OafToOafMapper extends AbstractMdRecordToOafMapper { public class OafToOafMapper extends AbstractMdRecordToOafMapper {
@ -39,14 +55,25 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
author.setSurname(p.getNormalisedSurname()); author.setSurname(p.getNormalisedSurname());
} }
final String pid = e.attributeValue("nameIdentifier"); final String pid = e.valueOf("./@nameIdentifier");
final String pidType = e.attributeValue("nameIdentifierScheme"); final String type = e
.valueOf("./@nameIdentifierScheme")
.trim()
.toUpperCase()
.replaceAll(" ", "")
.replaceAll("_", "");
author.setPid(new ArrayList<>()); author.setPid(new ArrayList<>());
if (StringUtils.isNotBlank(pid) && StringUtils.isNotBlank(pidType)) {
author if (StringUtils.isNotBlank(pid)) {
.getPid() if (type.startsWith("ORCID")) {
.add(structuredProperty(pid, qualifier(pidType, pidType, DNET_PID_TYPES, DNET_PID_TYPES), info)); final String cleanedId = pid
.replaceAll("http://orcid.org/", "")
.replaceAll("https://orcid.org/", "");
author.getPid().add(structuredProperty(cleanedId, ORCID_PID_TYPE, info));
} else if (type.startsWith("MAGID")) {
author.getPid().add(structuredProperty(pid, MAG_PID_TYPE, info));
}
} }
res.add(author); res.add(author);
@ -104,28 +131,21 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
final Instance instance = new Instance(); final Instance instance = new Instance();
instance instance
.setInstancetype( .setInstancetype(
prepareQualifier( prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
doc,
"//dr:CobjCategory",
DNET_PUBLICATION_RESOURCE,
DNET_PUBLICATION_RESOURCE));
instance.setCollectedfrom(collectedfrom); instance.setCollectedfrom(collectedfrom);
instance.setHostedby(hostedby); instance.setHostedby(hostedby);
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
instance instance
.setAccessright( .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES));
prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES));
instance.setLicense(field(doc.valueOf("//oaf:license"), info)); instance.setLicense(field(doc.valueOf("//oaf:license"), info));
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info)); instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
instance instance
.setProcessingchargeamount( .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
field(doc.valueOf("//oaf:processingchargeamount"), info));
instance instance
.setProcessingchargecurrency( .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
List<Node> nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); final List<Node> nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier"));
instance instance
.setUrl( .setUrl(
nodes nodes
@ -158,19 +178,22 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected Field<String> prepareSoftwareCodeRepositoryUrl( protected Field<String> prepareSoftwareCodeRepositoryUrl(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@Override @Override
protected List<StructuredProperty> prepareSoftwareLicenses( protected List<StructuredProperty> prepareSoftwareLicenses(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
@Override @Override
protected List<Field<String>> prepareSoftwareDocumentationUrls( protected List<Field<String>> prepareSoftwareDocumentationUrls(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
@ -182,13 +205,15 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected Field<String> prepareDatasetMetadataVersionNumber( protected Field<String> prepareDatasetMetadataVersionNumber(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@Override @Override
protected Field<String> prepareDatasetLastMetadataUpdate( protected Field<String> prepareDatasetLastMetadataUpdate(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return null; // NOT PRESENT IN OAF return null; // NOT PRESENT IN OAF
} }
@ -216,19 +241,22 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected List<Field<String>> prepareOtherResearchProductTools( protected List<Field<String>> prepareOtherResearchProductTools(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
@Override @Override
protected List<Field<String>> prepareOtherResearchProductContactGroups( protected List<Field<String>> prepareOtherResearchProductContactGroups(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }
@Override @Override
protected List<Field<String>> prepareOtherResearchProductContactPersons( protected List<Field<String>> prepareOtherResearchProductContactPersons(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return new ArrayList<>(); // NOT PRESENT IN OAF return new ArrayList<>(); // NOT PRESENT IN OAF
} }

View File

@ -4,16 +4,31 @@ package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_DATE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_RESOURCE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTS;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PART_OF;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENTED_BY;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENT_TO;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PART;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.SUPPLEMENT;
import java.util.*; import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.Node; import org.dom4j.Node;
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Field; import eu.dnetlib.dhp.schema.oaf.Field;
@ -22,7 +37,6 @@ import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class OdfToOafMapper extends AbstractMdRecordToOafMapper { public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@ -48,7 +62,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final String fullname = n.valueOf("./datacite:creatorName"); final String fullname = n.valueOf("./datacite:creatorName");
author.setFullname(fullname); author.setFullname(fullname);
PacePerson pp = new PacePerson(fullname, false); final PacePerson pp = new PacePerson(fullname, false);
final String name = n.valueOf("./datacite:givenName"); final String name = n.valueOf("./datacite:givenName");
if (StringUtils.isBlank(name) & pp.isAccurate()) { if (StringUtils.isBlank(name) & pp.isAccurate()) {
author.setName(pp.getNormalisedFirstName()); author.setName(pp.getNormalisedFirstName());
@ -63,6 +77,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
author.setSurname(surname); author.setSurname(surname);
} }
if (StringUtils.isBlank(author.getFullname())) {
author.setFullname(String.format("%s, %s", author.getSurname(), author.getName()));
}
author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info)); author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info));
author.setPid(preparePids(n, info)); author.setPid(preparePids(n, info));
author.setRank(pos++); author.setRank(pos++);
@ -74,13 +92,21 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
private List<StructuredProperty> preparePids(final Node n, final DataInfo info) { private List<StructuredProperty> preparePids(final Node n, final DataInfo info) {
final List<StructuredProperty> res = new ArrayList<>(); final List<StructuredProperty> res = new ArrayList<>();
for (final Object o : n.selectNodes("./datacite:nameIdentifier")) { for (final Object o : n.selectNodes("./datacite:nameIdentifier")) {
res
.add( final String id = ((Node) o).getText();
structuredProperty( final String type = ((Node) o)
((Node) o).getText(), .valueOf("./@nameIdentifierScheme")
prepareQualifier( .trim()
(Node) o, "./@nameIdentifierScheme", DNET_PID_TYPES, DNET_PID_TYPES), .toUpperCase()
info)); .replaceAll(" ", "")
.replaceAll("_", "");
if (type.startsWith("ORCID")) {
final String cleanedId = id.replaceAll("http://orcid.org/", "").replaceAll("https://orcid.org/", "");
res.add(structuredProperty(cleanedId, ORCID_PID_TYPE, info));
} else if (type.startsWith("MAGID")) {
res.add(structuredProperty(id, MAG_PID_TYPE, info));
}
} }
return res; return res;
} }
@ -95,21 +121,18 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final Instance instance = new Instance(); final Instance instance = new Instance();
instance instance
.setInstancetype( .setInstancetype(
prepareQualifier( prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
instance.setCollectedfrom(collectedfrom); instance.setCollectedfrom(collectedfrom);
instance.setHostedby(hostedby); instance.setHostedby(hostedby);
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
instance instance
.setAccessright( .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES));
prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES));
instance.setLicense(field(doc.valueOf("//oaf:license"), info)); instance.setLicense(field(doc.valueOf("//oaf:license"), info));
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info)); instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
instance instance
.setProcessingchargecurrency( .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
final Set<String> url = new HashSet<>(); final Set<String> url = new HashSet<>();
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) { for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
@ -149,11 +172,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
res res
.add( .add(
structuredProperty( structuredProperty(
((Node) o).getText(), ((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE,
"UNKNOWN",
"UNKNOWN",
DNET_DATA_CITE_DATE,
DNET_DATA_CITE_DATE,
info)); info));
} }
} }
@ -197,53 +216,52 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected List<Field<String>> prepareOtherResearchProductTools( protected List<Field<String>> prepareOtherResearchProductTools(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return new ArrayList<>(); // Not present in ODF ??? return new ArrayList<>(); // Not present in ODF ???
} }
@Override @Override
protected List<Field<String>> prepareOtherResearchProductContactGroups( protected List<Field<String>> prepareOtherResearchProductContactGroups(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return prepareListFields( return prepareListFields(
doc, doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info);
"//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName",
info);
} }
@Override @Override
protected List<Field<String>> prepareOtherResearchProductContactPersons( protected List<Field<String>> prepareOtherResearchProductContactPersons(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return prepareListFields( return prepareListFields(
doc, doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info);
"//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName",
info);
} }
@Override @Override
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
return prepareQualifier( return prepareQualifier(doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages");
doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages");
} }
@Override @Override
protected Field<String> prepareSoftwareCodeRepositoryUrl( protected Field<String> prepareSoftwareCodeRepositoryUrl(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return null; // Not present in ODF ??? return null; // Not present in ODF ???
} }
@Override @Override
protected List<StructuredProperty> prepareSoftwareLicenses( protected List<StructuredProperty> prepareSoftwareLicenses(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return new ArrayList<>(); // Not present in ODF ??? return new ArrayList<>(); // Not present in ODF ???
} }
@Override @Override
protected List<Field<String>> prepareSoftwareDocumentationUrls( protected List<Field<String>> prepareSoftwareDocumentationUrls(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return prepareListFields( return prepareListFields(
doc, doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info);
"//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']",
info);
} }
// DATASETS // DATASETS
@ -264,13 +282,15 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected Field<String> prepareDatasetMetadataVersionNumber( protected Field<String> prepareDatasetMetadataVersionNumber(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return null; // Not present in ODF ??? return null; // Not present in ODF ???
} }
@Override @Override
protected Field<String> prepareDatasetLastMetadataUpdate( protected Field<String> prepareDatasetLastMetadataUpdate(
final Document doc, final DataInfo info) { final Document doc,
final DataInfo info) {
return prepareField(doc, "//datacite:date[@dateType='Updated']", info); return prepareField(doc, "//datacite:date[@dateType='Updated']", info);
} }
@ -346,9 +366,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
return prepareQualifier( return prepareQualifier(
doc, doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE,
"//*[local-name() = 'resource']//*[local-name() = 'resourceType']",
DNET_DATA_CITE_RESOURCE,
DNET_DATA_CITE_RESOURCE); DNET_DATA_CITE_RESOURCE);
} }
} }

View File

@ -21,7 +21,14 @@ import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension; import org.mockito.junit.jupiter.MockitoExtension;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class MappersTest { public class MappersTest {
@ -54,13 +61,13 @@ public class MappersTest {
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
assertTrue(p.getAuthor().size() > 0); assertTrue(p.getAuthor().size() > 0);
Optional<Author> author = p final Optional<Author> author = p
.getAuthor() .getAuthor()
.stream() .stream()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.findFirst(); .findFirst();
assertTrue(author.isPresent()); assertTrue(author.isPresent());
StructuredProperty pid = author final StructuredProperty pid = author
.get() .get()
.getPid() .getPid()
.stream() .stream()
@ -68,7 +75,7 @@ public class MappersTest {
.get(); .get();
assertEquals("0000-0001-6651-1178", pid.getValue()); assertEquals("0000-0001-6651-1178", pid.getValue());
assertEquals("ORCID", pid.getQualifier().getClassid()); assertEquals("ORCID", pid.getQualifier().getClassid());
assertEquals("ORCID", pid.getQualifier().getClassname()); assertEquals("Open Researcher and Contributor ID", pid.getQualifier().getClassname());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
assertEquals("Votsi,Nefta", author.get().getFullname()); assertEquals("Votsi,Nefta", author.get().getFullname());
@ -121,13 +128,13 @@ public class MappersTest {
assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue()));
assertTrue(d.getAuthor().size() > 0); assertTrue(d.getAuthor().size() > 0);
Optional<Author> author = d final Optional<Author> author = d
.getAuthor() .getAuthor()
.stream() .stream()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.findFirst(); .findFirst();
assertTrue(author.isPresent()); assertTrue(author.isPresent());
StructuredProperty pid = author final StructuredProperty pid = author
.get() .get()
.getPid() .getPid()
.stream() .stream()
@ -135,7 +142,7 @@ public class MappersTest {
.get(); .get();
assertEquals("0000-0001-9074-1619", pid.getValue()); assertEquals("0000-0001-9074-1619", pid.getValue());
assertEquals("ORCID", pid.getQualifier().getClassid()); assertEquals("ORCID", pid.getQualifier().getClassid());
assertEquals("ORCID", pid.getQualifier().getClassname()); assertEquals("Open Researcher and Contributor ID", pid.getQualifier().getClassname());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename()); assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
assertEquals("Baracchini, Theo", author.get().getFullname()); assertEquals("Baracchini, Theo", author.get().getFullname());
@ -143,13 +150,13 @@ public class MappersTest {
assertEquals("Theo", author.get().getName()); assertEquals("Theo", author.get().getName());
assertEquals(1, author.get().getAffiliation().size()); assertEquals(1, author.get().getAffiliation().size());
Optional<Field<String>> opAff = author final Optional<Field<String>> opAff = author
.get() .get()
.getAffiliation() .getAffiliation()
.stream() .stream()
.findFirst(); .findFirst();
assertTrue(opAff.isPresent()); assertTrue(opAff.isPresent());
Field<String> affiliation = opAff.get(); final Field<String> affiliation = opAff.get();
assertEquals("ISTI-CNR", affiliation.getValue()); assertEquals("ISTI-CNR", affiliation.getValue());
assertTrue(d.getSubject().size() > 0); assertTrue(d.getSubject().size() > 0);