forked from D-Net/dnet-hadoop
Merge branch 'master' into provision_indexing
This commit is contained in:
commit
946598cfba
|
@ -1,21 +1,24 @@
|
||||||
package eu.dnetlib.dhp.oa.dedup;
|
|
||||||
|
|
||||||
import com.wcohen.ss.JaroWinkler;
|
package eu.dnetlib.dhp.oa.dedup;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
||||||
import eu.dnetlib.pace.model.Person;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
import java.text.Normalizer;
|
import java.text.Normalizer;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import com.wcohen.ss.JaroWinkler;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import eu.dnetlib.pace.model.Person;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class AuthorMerger {
|
public class AuthorMerger {
|
||||||
|
|
||||||
private static final Double THRESHOLD = 0.95;
|
private static final Double THRESHOLD = 0.95;
|
||||||
|
|
||||||
public static List<Author> merge(List<List<Author>> authors){
|
public static List<Author> merge(List<List<Author>> authors) {
|
||||||
|
|
||||||
authors.sort(new Comparator<List<Author>>() {
|
authors.sort(new Comparator<List<Author>>() {
|
||||||
@Override
|
@Override
|
||||||
|
@ -26,7 +29,7 @@ public class AuthorMerger {
|
||||||
|
|
||||||
List<Author> author = new ArrayList<>();
|
List<Author> author = new ArrayList<>();
|
||||||
|
|
||||||
for(List<Author> a : authors){
|
for (List<Author> a : authors) {
|
||||||
author = mergeAuthor(author, a);
|
author = mergeAuthor(author, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -93,8 +96,10 @@ public class AuthorMerger {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String pidToComparableString(StructuredProperty pid){
|
public static String pidToComparableString(StructuredProperty pid) {
|
||||||
return (pid.getQualifier()!=null? pid.getQualifier().getClassid()!=null?pid.getQualifier().getClassid().toLowerCase():"":"") + (pid.getValue()!=null? pid.getValue().toLowerCase():"");
|
return (pid.getQualifier() != null
|
||||||
|
? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : ""
|
||||||
|
: "") + (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int countAuthorsPids(List<Author> authors) {
|
public static int countAuthorsPids(List<Author> authors) {
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.dedup;
|
package eu.dnetlib.dhp.oa.dedup;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
@ -73,7 +74,8 @@ public class DedupRecordFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <T extends OafEntity> T entityMerger(
|
public static <T extends OafEntity> T entityMerger(
|
||||||
String id, Iterator<Tuple2<String, T>> entities, long ts, DataInfo dataInfo, Class<T> clazz) throws IllegalAccessException, InstantiationException {
|
String id, Iterator<Tuple2<String, T>> entities, long ts, DataInfo dataInfo, Class<T> clazz)
|
||||||
|
throws IllegalAccessException, InstantiationException {
|
||||||
|
|
||||||
T entity = clazz.newInstance();
|
T entity = clazz.newInstance();
|
||||||
|
|
||||||
|
@ -87,14 +89,14 @@ public class DedupRecordFactory {
|
||||||
entity.mergeFrom(duplicate);
|
entity.mergeFrom(duplicate);
|
||||||
if (ModelSupport.isSubClass(duplicate, Result.class)) {
|
if (ModelSupport.isSubClass(duplicate, Result.class)) {
|
||||||
Result r1 = (Result) duplicate;
|
Result r1 = (Result) duplicate;
|
||||||
if (r1.getAuthor() != null && r1.getAuthor().size()>0)
|
if (r1.getAuthor() != null && r1.getAuthor().size() > 0)
|
||||||
authors.add(r1.getAuthor());
|
authors.add(r1.getAuthor());
|
||||||
if (r1.getDateofacceptance() != null)
|
if (r1.getDateofacceptance() != null)
|
||||||
dates.add(r1.getDateofacceptance().getValue());
|
dates.add(r1.getDateofacceptance().getValue());
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
//set authors and date
|
// set authors and date
|
||||||
if (ModelSupport.isSubClass(entity, Result.class)) {
|
if (ModelSupport.isSubClass(entity, Result.class)) {
|
||||||
((Result) entity).setDateofacceptance(DatePicker.pick(dates));
|
((Result) entity).setDateofacceptance(DatePicker.pick(dates));
|
||||||
((Result) entity).setAuthor(AuthorMerger.merge(authors));
|
((Result) entity).setAuthor(AuthorMerger.merge(authors));
|
||||||
|
|
|
@ -1,5 +1,8 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.dedup;
|
package eu.dnetlib.dhp.oa.dedup;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.FileReader;
|
import java.io.FileReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -7,15 +10,13 @@ import java.io.Serializable;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import eu.dnetlib.pace.util.MapDocumentUtil;
|
|
||||||
import org.codehaus.jackson.map.ObjectMapper;
|
import org.codehaus.jackson.map.ObjectMapper;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.pace.util.MapDocumentUtil;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class EntityMergerTest implements Serializable {
|
public class EntityMergerTest implements Serializable {
|
||||||
|
|
||||||
|
@ -45,7 +46,8 @@ public class EntityMergerTest implements Serializable {
|
||||||
@Test
|
@Test
|
||||||
public void publicationMergerTest() throws InstantiationException, IllegalAccessException {
|
public void publicationMergerTest() throws InstantiationException, IllegalAccessException {
|
||||||
|
|
||||||
Publication pub_merged = DedupRecordFactory.entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class);
|
Publication pub_merged = DedupRecordFactory
|
||||||
|
.entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class);
|
||||||
|
|
||||||
assertEquals(dedupId, pub_merged.getId());
|
assertEquals(dedupId, pub_merged.getId());
|
||||||
|
|
||||||
|
@ -59,36 +61,36 @@ public class EntityMergerTest implements Serializable {
|
||||||
assertEquals(pub_merged.getDateoftransformation(), pub_top.getDateoftransformation());
|
assertEquals(pub_merged.getDateoftransformation(), pub_top.getDateoftransformation());
|
||||||
assertEquals(pub_merged.getOaiprovenance(), pub_top.getOaiprovenance());
|
assertEquals(pub_merged.getOaiprovenance(), pub_top.getOaiprovenance());
|
||||||
assertEquals(pub_merged.getDateofcollection(), pub_top.getDateofcollection());
|
assertEquals(pub_merged.getDateofcollection(), pub_top.getDateofcollection());
|
||||||
assertEquals(pub_merged.getInstance().size(),3);
|
assertEquals(pub_merged.getInstance().size(), 3);
|
||||||
assertEquals(pub_merged.getCountry().size(), 2);
|
assertEquals(pub_merged.getCountry().size(), 2);
|
||||||
assertEquals(pub_merged.getSubject().size(), 0);
|
assertEquals(pub_merged.getSubject().size(), 0);
|
||||||
assertEquals(pub_merged.getTitle().size(), 2);
|
assertEquals(pub_merged.getTitle().size(), 2);
|
||||||
assertEquals(pub_merged.getRelevantdate().size(),0);
|
assertEquals(pub_merged.getRelevantdate().size(), 0);
|
||||||
assertEquals(pub_merged.getDescription().size(),0);
|
assertEquals(pub_merged.getDescription().size(), 0);
|
||||||
assertEquals(pub_merged.getSource().size(),0);
|
assertEquals(pub_merged.getSource().size(), 0);
|
||||||
assertEquals(pub_merged.getFulltext().size(),0);
|
assertEquals(pub_merged.getFulltext().size(), 0);
|
||||||
assertEquals(pub_merged.getFormat().size(),0);
|
assertEquals(pub_merged.getFormat().size(), 0);
|
||||||
assertEquals(pub_merged.getContributor().size(),0);
|
assertEquals(pub_merged.getContributor().size(), 0);
|
||||||
assertEquals(pub_merged.getCoverage().size(),0);
|
assertEquals(pub_merged.getCoverage().size(), 0);
|
||||||
assertEquals(pub_merged.getContext().size(),0);
|
assertEquals(pub_merged.getContext().size(), 0);
|
||||||
assertEquals(pub_merged.getExternalReference().size(),0);
|
assertEquals(pub_merged.getExternalReference().size(), 0);
|
||||||
assertEquals(pub_merged.getOriginalId().size(),3);
|
assertEquals(pub_merged.getOriginalId().size(), 3);
|
||||||
assertEquals(pub_merged.getCollectedfrom().size(),3);
|
assertEquals(pub_merged.getCollectedfrom().size(), 3);
|
||||||
assertEquals(pub_merged.getPid().size(),1);
|
assertEquals(pub_merged.getPid().size(), 1);
|
||||||
assertEquals(pub_merged.getExtraInfo().size(),0);
|
assertEquals(pub_merged.getExtraInfo().size(), 0);
|
||||||
|
|
||||||
//verify datainfo
|
// verify datainfo
|
||||||
assertEquals(pub_merged.getDataInfo(), dataInfo);
|
assertEquals(pub_merged.getDataInfo(), dataInfo);
|
||||||
|
|
||||||
//verify datepicker
|
// verify datepicker
|
||||||
assertEquals(pub_merged.getDateofacceptance().getValue(), "2018-09-30");
|
assertEquals(pub_merged.getDateofacceptance().getValue(), "2018-09-30");
|
||||||
|
|
||||||
//verify authors
|
// verify authors
|
||||||
assertEquals(pub_merged.getAuthor().size(), 9);
|
assertEquals(pub_merged.getAuthor().size(), 9);
|
||||||
assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4);
|
assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
public DataInfo setDI(){
|
public DataInfo setDI() {
|
||||||
DataInfo dataInfo = new DataInfo();
|
DataInfo dataInfo = new DataInfo();
|
||||||
dataInfo.setTrust("0.9");
|
dataInfo.setTrust("0.9");
|
||||||
dataInfo.setDeletedbyinference(false);
|
dataInfo.setDeletedbyinference(false);
|
||||||
|
@ -97,13 +99,13 @@ public class EntityMergerTest implements Serializable {
|
||||||
return dataInfo;
|
return dataInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Publication getTopPub(List<Tuple2<String, Publication>> publications){
|
public Publication getTopPub(List<Tuple2<String, Publication>> publications) {
|
||||||
|
|
||||||
Double maxTrust = 0.0;
|
Double maxTrust = 0.0;
|
||||||
Publication maxPub = new Publication();
|
Publication maxPub = new Publication();
|
||||||
for (Tuple2<String, Publication> publication : publications) {
|
for (Tuple2<String, Publication> publication : publications) {
|
||||||
Double pubTrust = Double.parseDouble(publication._2().getDataInfo().getTrust());
|
Double pubTrust = Double.parseDouble(publication._2().getDataInfo().getTrust());
|
||||||
if(pubTrust > maxTrust){
|
if (pubTrust > maxTrust) {
|
||||||
maxTrust = pubTrust;
|
maxTrust = pubTrust;
|
||||||
maxPub = publication._2();
|
maxPub = publication._2();
|
||||||
}
|
}
|
||||||
|
@ -118,11 +120,11 @@ public class EntityMergerTest implements Serializable {
|
||||||
reader = new BufferedReader(new FileReader(path));
|
reader = new BufferedReader(new FileReader(path));
|
||||||
String line = reader.readLine();
|
String line = reader.readLine();
|
||||||
while (line != null) {
|
while (line != null) {
|
||||||
res.add(
|
res
|
||||||
|
.add(
|
||||||
new Tuple2<>(
|
new Tuple2<>(
|
||||||
MapDocumentUtil.getJPathString("$.id", line),
|
MapDocumentUtil.getJPathString("$.id", line),
|
||||||
new ObjectMapper().readValue(line, clazz))
|
new ObjectMapper().readValue(line, clazz)));
|
||||||
);
|
|
||||||
// read next line
|
// read next line
|
||||||
line = reader.readLine();
|
line = reader.readLine();
|
||||||
}
|
}
|
||||||
|
@ -134,5 +136,4 @@ public class EntityMergerTest implements Serializable {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,7 +10,16 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -50,6 +59,10 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4";
|
protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4";
|
||||||
protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3";
|
protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3";
|
||||||
|
protected static final Qualifier ORCID_PID_TYPE = qualifier(
|
||||||
|
"ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES);
|
||||||
|
protected static final Qualifier MAG_PID_TYPE = qualifier(
|
||||||
|
"MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES);
|
||||||
|
|
||||||
protected static final Map<String, String> nsContext = new HashMap<>();
|
protected static final Map<String, String> nsContext = new HashMap<>();
|
||||||
|
|
||||||
|
@ -75,8 +88,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
|
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
|
||||||
|
|
||||||
final Document doc = DocumentHelper
|
final Document doc = DocumentHelper
|
||||||
.parseText(
|
.parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3));
|
||||||
xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3));
|
|
||||||
|
|
||||||
final String type = doc.valueOf("//dr:CobjCategory/@type");
|
final String type = doc.valueOf("//dr:CobjCategory/@type");
|
||||||
final KeyValue collectedFrom = getProvenanceDatasource(
|
final KeyValue collectedFrom = getProvenanceDatasource(
|
||||||
|
@ -103,7 +115,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private KeyValue getProvenanceDatasource(Document doc, String xpathId, String xpathName) {
|
private KeyValue getProvenanceDatasource(final Document doc, final String xpathId, final String xpathName) {
|
||||||
final String dsId = doc.valueOf(xpathId);
|
final String dsId = doc.valueOf(xpathId);
|
||||||
final String dsName = doc.valueOf(xpathName);
|
final String dsName = doc.valueOf(xpathName);
|
||||||
|
|
||||||
|
@ -111,9 +123,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return keyValue(
|
return keyValue(createOpenaireId(10, dsId, true), dsName);
|
||||||
createOpenaireId(10, dsId, true),
|
|
||||||
dsName);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<Oaf> createOafs(
|
protected List<Oaf> createOafs(
|
||||||
|
@ -211,8 +221,14 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Relation getRelation(String source, String target, String relType, String subRelType, String relClass,
|
protected Relation getRelation(final String source,
|
||||||
KeyValue collectedFrom, DataInfo info, long lastUpdateTimestamp) {
|
final String target,
|
||||||
|
final String relType,
|
||||||
|
final String subRelType,
|
||||||
|
final String relClass,
|
||||||
|
final KeyValue collectedFrom,
|
||||||
|
final DataInfo info,
|
||||||
|
final long lastUpdateTimestamp) {
|
||||||
final Relation rel = new Relation();
|
final Relation rel = new Relation();
|
||||||
rel.setRelType(relType);
|
rel.setRelType(relType);
|
||||||
rel.setSubRelType(subRelType);
|
rel.setSubRelType(subRelType);
|
||||||
|
@ -289,7 +305,10 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected abstract Qualifier prepareResourceType(Document doc, DataInfo info);
|
protected abstract Qualifier prepareResourceType(Document doc, DataInfo info);
|
||||||
|
|
||||||
protected abstract List<Instance> prepareInstances(
|
protected abstract List<Instance> prepareInstances(
|
||||||
Document doc, DataInfo info, KeyValue collectedfrom, KeyValue hostedby);
|
Document doc,
|
||||||
|
DataInfo info,
|
||||||
|
KeyValue collectedfrom,
|
||||||
|
KeyValue hostedby);
|
||||||
|
|
||||||
protected abstract List<Field<String>> prepareSources(Document doc, DataInfo info);
|
protected abstract List<Field<String>> prepareSources(Document doc, DataInfo info);
|
||||||
|
|
||||||
|
@ -314,13 +333,16 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected abstract List<Author> prepareAuthors(Document doc, DataInfo info);
|
protected abstract List<Author> prepareAuthors(Document doc, DataInfo info);
|
||||||
|
|
||||||
protected abstract List<Field<String>> prepareOtherResearchProductTools(
|
protected abstract List<Field<String>> prepareOtherResearchProductTools(
|
||||||
Document doc, DataInfo info);
|
Document doc,
|
||||||
|
DataInfo info);
|
||||||
|
|
||||||
protected abstract List<Field<String>> prepareOtherResearchProductContactGroups(
|
protected abstract List<Field<String>> prepareOtherResearchProductContactGroups(
|
||||||
Document doc, DataInfo info);
|
Document doc,
|
||||||
|
DataInfo info);
|
||||||
|
|
||||||
protected abstract List<Field<String>> prepareOtherResearchProductContactPersons(
|
protected abstract List<Field<String>> prepareOtherResearchProductContactPersons(
|
||||||
Document doc, DataInfo info);
|
Document doc,
|
||||||
|
DataInfo info);
|
||||||
|
|
||||||
protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info);
|
protected abstract Qualifier prepareSoftwareProgrammingLanguage(Document doc, DataInfo info);
|
||||||
|
|
||||||
|
@ -329,7 +351,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected abstract List<StructuredProperty> prepareSoftwareLicenses(Document doc, DataInfo info);
|
protected abstract List<StructuredProperty> prepareSoftwareLicenses(Document doc, DataInfo info);
|
||||||
|
|
||||||
protected abstract List<Field<String>> prepareSoftwareDocumentationUrls(
|
protected abstract List<Field<String>> prepareSoftwareDocumentationUrls(
|
||||||
Document doc, DataInfo info);
|
Document doc,
|
||||||
|
DataInfo info);
|
||||||
|
|
||||||
protected abstract List<GeoLocation> prepareDatasetGeoLocations(Document doc, DataInfo info);
|
protected abstract List<GeoLocation> prepareDatasetGeoLocations(Document doc, DataInfo info);
|
||||||
|
|
||||||
|
@ -358,26 +381,17 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final String vol = n.valueOf("@vol");
|
final String vol = n.valueOf("@vol");
|
||||||
final String edition = n.valueOf("@edition");
|
final String edition = n.valueOf("@edition");
|
||||||
if (StringUtils.isNotBlank(name)) {
|
if (StringUtils.isNotBlank(name)) {
|
||||||
return journal(
|
return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info);
|
||||||
name,
|
|
||||||
issnPrinted,
|
|
||||||
issnOnline,
|
|
||||||
issnLinking,
|
|
||||||
ep,
|
|
||||||
iss,
|
|
||||||
sp,
|
|
||||||
vol,
|
|
||||||
edition,
|
|
||||||
null,
|
|
||||||
null,
|
|
||||||
info);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Qualifier prepareQualifier(
|
protected Qualifier prepareQualifier(
|
||||||
final Node node, final String xpath, final String schemeId, final String schemeName) {
|
final Node node,
|
||||||
|
final String xpath,
|
||||||
|
final String schemeId,
|
||||||
|
final String schemeName) {
|
||||||
final String classId = node.valueOf(xpath);
|
final String classId = node.valueOf(xpath);
|
||||||
final String className = code2name.get(classId);
|
final String className = code2name.get(classId);
|
||||||
return qualifier(classId, className, schemeId, schemeName);
|
return qualifier(classId, className, schemeId, schemeName);
|
||||||
|
@ -401,7 +415,10 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<StructuredProperty> prepareListStructProps(
|
protected List<StructuredProperty> prepareListStructProps(
|
||||||
final Node node, final String xpath, final Qualifier qualifier, final DataInfo info) {
|
final Node node,
|
||||||
|
final String xpath,
|
||||||
|
final Qualifier qualifier,
|
||||||
|
final DataInfo info) {
|
||||||
final List<StructuredProperty> res = new ArrayList<>();
|
final List<StructuredProperty> res = new ArrayList<>();
|
||||||
for (final Object o : node.selectNodes(xpath)) {
|
for (final Object o : node.selectNodes(xpath)) {
|
||||||
final Node n = (Node) o;
|
final Node n = (Node) o;
|
||||||
|
@ -411,19 +428,17 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<StructuredProperty> prepareListStructProps(
|
protected List<StructuredProperty> prepareListStructProps(
|
||||||
final Node node, final String xpath, final DataInfo info) {
|
final Node node,
|
||||||
|
final String xpath,
|
||||||
|
final DataInfo info) {
|
||||||
final List<StructuredProperty> res = new ArrayList<>();
|
final List<StructuredProperty> res = new ArrayList<>();
|
||||||
for (final Object o : node.selectNodes(xpath)) {
|
for (final Object o : node.selectNodes(xpath)) {
|
||||||
final Node n = (Node) o;
|
final Node n = (Node) o;
|
||||||
res
|
res
|
||||||
.add(
|
.add(
|
||||||
structuredProperty(
|
structuredProperty(
|
||||||
n.getText(),
|
n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"),
|
||||||
n.valueOf("@classid"),
|
n.valueOf("@schemename"), info));
|
||||||
n.valueOf("@classname"),
|
|
||||||
n.valueOf("@schemeid"),
|
|
||||||
n.valueOf("@schemename"),
|
|
||||||
info));
|
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -449,8 +464,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final Node n = doc.selectSingleNode("//oaf:datainfo");
|
final Node n = doc.selectSingleNode("//oaf:datainfo");
|
||||||
|
|
||||||
if (n == null) {
|
if (n == null) {
|
||||||
return dataInfo(
|
return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9");
|
||||||
false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
|
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
|
||||||
|
@ -464,12 +478,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final String trust = n.valueOf("./oaf:trust");
|
final String trust = n.valueOf("./oaf:trust");
|
||||||
|
|
||||||
return dataInfo(
|
return dataInfo(
|
||||||
deletedbyinference,
|
deletedbyinference, inferenceprovenance, inferred, false,
|
||||||
inferenceprovenance,
|
qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust);
|
||||||
inferred,
|
|
||||||
false,
|
|
||||||
qualifier(paClassId, paClassName, paSchemeId, paSchemeName),
|
|
||||||
trust);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Field<String> prepareField(final Node node, final String xpath, final DataInfo info) {
|
protected Field<String> prepareField(final Node node, final String xpath, final DataInfo info) {
|
||||||
|
@ -477,7 +487,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<Field<String>> prepareListFields(
|
protected List<Field<String>> prepareListFields(
|
||||||
final Node node, final String xpath, final DataInfo info) {
|
final Node node,
|
||||||
|
final String xpath,
|
||||||
|
final DataInfo info) {
|
||||||
return listFields(info, prepareListString(node, xpath));
|
return listFields(info, prepareListString(node, xpath));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,19 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
||||||
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DATASET;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
@ -15,8 +24,15 @@ import org.dom4j.Node;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
|
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
|
@ -39,14 +55,25 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
author.setSurname(p.getNormalisedSurname());
|
author.setSurname(p.getNormalisedSurname());
|
||||||
}
|
}
|
||||||
|
|
||||||
final String pid = e.attributeValue("nameIdentifier");
|
final String pid = e.valueOf("./@nameIdentifier");
|
||||||
final String pidType = e.attributeValue("nameIdentifierScheme");
|
final String type = e
|
||||||
|
.valueOf("./@nameIdentifierScheme")
|
||||||
|
.trim()
|
||||||
|
.toUpperCase()
|
||||||
|
.replaceAll(" ", "")
|
||||||
|
.replaceAll("_", "");
|
||||||
|
|
||||||
author.setPid(new ArrayList<>());
|
author.setPid(new ArrayList<>());
|
||||||
if (StringUtils.isNotBlank(pid) && StringUtils.isNotBlank(pidType)) {
|
|
||||||
author
|
if (StringUtils.isNotBlank(pid)) {
|
||||||
.getPid()
|
if (type.startsWith("ORCID")) {
|
||||||
.add(structuredProperty(pid, qualifier(pidType, pidType, DNET_PID_TYPES, DNET_PID_TYPES), info));
|
final String cleanedId = pid
|
||||||
|
.replaceAll("http://orcid.org/", "")
|
||||||
|
.replaceAll("https://orcid.org/", "");
|
||||||
|
author.getPid().add(structuredProperty(cleanedId, ORCID_PID_TYPE, info));
|
||||||
|
} else if (type.startsWith("MAGID")) {
|
||||||
|
author.getPid().add(structuredProperty(pid, MAG_PID_TYPE, info));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
res.add(author);
|
res.add(author);
|
||||||
|
@ -104,28 +131,21 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
final Instance instance = new Instance();
|
final Instance instance = new Instance();
|
||||||
instance
|
instance
|
||||||
.setInstancetype(
|
.setInstancetype(
|
||||||
prepareQualifier(
|
prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
|
||||||
doc,
|
|
||||||
"//dr:CobjCategory",
|
|
||||||
DNET_PUBLICATION_RESOURCE,
|
|
||||||
DNET_PUBLICATION_RESOURCE));
|
|
||||||
instance.setCollectedfrom(collectedfrom);
|
instance.setCollectedfrom(collectedfrom);
|
||||||
instance.setHostedby(hostedby);
|
instance.setHostedby(hostedby);
|
||||||
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
||||||
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
|
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
|
||||||
instance
|
instance
|
||||||
.setAccessright(
|
.setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES));
|
||||||
prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES));
|
|
||||||
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
||||||
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
||||||
instance
|
instance
|
||||||
.setProcessingchargeamount(
|
.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
||||||
field(doc.valueOf("//oaf:processingchargeamount"), info));
|
|
||||||
instance
|
instance
|
||||||
.setProcessingchargecurrency(
|
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||||
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
|
||||||
|
|
||||||
List<Node> nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier"));
|
final List<Node> nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier"));
|
||||||
instance
|
instance
|
||||||
.setUrl(
|
.setUrl(
|
||||||
nodes
|
nodes
|
||||||
|
@ -158,19 +178,22 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Field<String> prepareSoftwareCodeRepositoryUrl(
|
protected Field<String> prepareSoftwareCodeRepositoryUrl(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return null; // NOT PRESENT IN OAF
|
return null; // NOT PRESENT IN OAF
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<StructuredProperty> prepareSoftwareLicenses(
|
protected List<StructuredProperty> prepareSoftwareLicenses(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return new ArrayList<>(); // NOT PRESENT IN OAF
|
return new ArrayList<>(); // NOT PRESENT IN OAF
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareSoftwareDocumentationUrls(
|
protected List<Field<String>> prepareSoftwareDocumentationUrls(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return new ArrayList<>(); // NOT PRESENT IN OAF
|
return new ArrayList<>(); // NOT PRESENT IN OAF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -182,13 +205,15 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Field<String> prepareDatasetMetadataVersionNumber(
|
protected Field<String> prepareDatasetMetadataVersionNumber(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return null; // NOT PRESENT IN OAF
|
return null; // NOT PRESENT IN OAF
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Field<String> prepareDatasetLastMetadataUpdate(
|
protected Field<String> prepareDatasetLastMetadataUpdate(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return null; // NOT PRESENT IN OAF
|
return null; // NOT PRESENT IN OAF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -216,19 +241,22 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareOtherResearchProductTools(
|
protected List<Field<String>> prepareOtherResearchProductTools(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return new ArrayList<>(); // NOT PRESENT IN OAF
|
return new ArrayList<>(); // NOT PRESENT IN OAF
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareOtherResearchProductContactGroups(
|
protected List<Field<String>> prepareOtherResearchProductContactGroups(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return new ArrayList<>(); // NOT PRESENT IN OAF
|
return new ArrayList<>(); // NOT PRESENT IN OAF
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareOtherResearchProductContactPersons(
|
protected List<Field<String>> prepareOtherResearchProductContactPersons(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return new ArrayList<>(); // NOT PRESENT IN OAF
|
return new ArrayList<>(); // NOT PRESENT IN OAF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,16 +4,31 @@ package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_DATE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_RESOURCE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTS;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PART_OF;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENTED_BY;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENT_TO;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PART;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.SUPPLEMENT;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
|
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
@ -22,7 +37,6 @@ import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
@ -48,7 +62,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
final String fullname = n.valueOf("./datacite:creatorName");
|
final String fullname = n.valueOf("./datacite:creatorName");
|
||||||
author.setFullname(fullname);
|
author.setFullname(fullname);
|
||||||
|
|
||||||
PacePerson pp = new PacePerson(fullname, false);
|
final PacePerson pp = new PacePerson(fullname, false);
|
||||||
final String name = n.valueOf("./datacite:givenName");
|
final String name = n.valueOf("./datacite:givenName");
|
||||||
if (StringUtils.isBlank(name) & pp.isAccurate()) {
|
if (StringUtils.isBlank(name) & pp.isAccurate()) {
|
||||||
author.setName(pp.getNormalisedFirstName());
|
author.setName(pp.getNormalisedFirstName());
|
||||||
|
@ -63,6 +77,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
author.setSurname(surname);
|
author.setSurname(surname);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (StringUtils.isBlank(author.getFullname())) {
|
||||||
|
author.setFullname(String.format("%s, %s", author.getSurname(), author.getName()));
|
||||||
|
}
|
||||||
|
|
||||||
author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info));
|
author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info));
|
||||||
author.setPid(preparePids(n, info));
|
author.setPid(preparePids(n, info));
|
||||||
author.setRank(pos++);
|
author.setRank(pos++);
|
||||||
|
@ -74,13 +92,21 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
private List<StructuredProperty> preparePids(final Node n, final DataInfo info) {
|
private List<StructuredProperty> preparePids(final Node n, final DataInfo info) {
|
||||||
final List<StructuredProperty> res = new ArrayList<>();
|
final List<StructuredProperty> res = new ArrayList<>();
|
||||||
for (final Object o : n.selectNodes("./datacite:nameIdentifier")) {
|
for (final Object o : n.selectNodes("./datacite:nameIdentifier")) {
|
||||||
res
|
|
||||||
.add(
|
final String id = ((Node) o).getText();
|
||||||
structuredProperty(
|
final String type = ((Node) o)
|
||||||
((Node) o).getText(),
|
.valueOf("./@nameIdentifierScheme")
|
||||||
prepareQualifier(
|
.trim()
|
||||||
(Node) o, "./@nameIdentifierScheme", DNET_PID_TYPES, DNET_PID_TYPES),
|
.toUpperCase()
|
||||||
info));
|
.replaceAll(" ", "")
|
||||||
|
.replaceAll("_", "");
|
||||||
|
|
||||||
|
if (type.startsWith("ORCID")) {
|
||||||
|
final String cleanedId = id.replaceAll("http://orcid.org/", "").replaceAll("https://orcid.org/", "");
|
||||||
|
res.add(structuredProperty(cleanedId, ORCID_PID_TYPE, info));
|
||||||
|
} else if (type.startsWith("MAGID")) {
|
||||||
|
res.add(structuredProperty(id, MAG_PID_TYPE, info));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -95,21 +121,18 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
final Instance instance = new Instance();
|
final Instance instance = new Instance();
|
||||||
instance
|
instance
|
||||||
.setInstancetype(
|
.setInstancetype(
|
||||||
prepareQualifier(
|
prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
|
||||||
doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
|
|
||||||
instance.setCollectedfrom(collectedfrom);
|
instance.setCollectedfrom(collectedfrom);
|
||||||
instance.setHostedby(hostedby);
|
instance.setHostedby(hostedby);
|
||||||
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
||||||
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
|
instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
|
||||||
instance
|
instance
|
||||||
.setAccessright(
|
.setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES));
|
||||||
prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES));
|
|
||||||
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
||||||
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
||||||
instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
||||||
instance
|
instance
|
||||||
.setProcessingchargecurrency(
|
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||||
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
|
||||||
|
|
||||||
final Set<String> url = new HashSet<>();
|
final Set<String> url = new HashSet<>();
|
||||||
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
|
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
|
||||||
|
@ -149,11 +172,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
res
|
res
|
||||||
.add(
|
.add(
|
||||||
structuredProperty(
|
structuredProperty(
|
||||||
((Node) o).getText(),
|
((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE,
|
||||||
"UNKNOWN",
|
|
||||||
"UNKNOWN",
|
|
||||||
DNET_DATA_CITE_DATE,
|
|
||||||
DNET_DATA_CITE_DATE,
|
|
||||||
info));
|
info));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -197,53 +216,52 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareOtherResearchProductTools(
|
protected List<Field<String>> prepareOtherResearchProductTools(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return new ArrayList<>(); // Not present in ODF ???
|
return new ArrayList<>(); // Not present in ODF ???
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareOtherResearchProductContactGroups(
|
protected List<Field<String>> prepareOtherResearchProductContactGroups(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return prepareListFields(
|
return prepareListFields(
|
||||||
doc,
|
doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info);
|
||||||
"//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName",
|
|
||||||
info);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareOtherResearchProductContactPersons(
|
protected List<Field<String>> prepareOtherResearchProductContactPersons(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return prepareListFields(
|
return prepareListFields(
|
||||||
doc,
|
doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info);
|
||||||
"//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName",
|
|
||||||
info);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
|
protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
|
||||||
return prepareQualifier(
|
return prepareQualifier(doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages");
|
||||||
doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Field<String> prepareSoftwareCodeRepositoryUrl(
|
protected Field<String> prepareSoftwareCodeRepositoryUrl(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return null; // Not present in ODF ???
|
return null; // Not present in ODF ???
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<StructuredProperty> prepareSoftwareLicenses(
|
protected List<StructuredProperty> prepareSoftwareLicenses(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return new ArrayList<>(); // Not present in ODF ???
|
return new ArrayList<>(); // Not present in ODF ???
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareSoftwareDocumentationUrls(
|
protected List<Field<String>> prepareSoftwareDocumentationUrls(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return prepareListFields(
|
return prepareListFields(
|
||||||
doc,
|
doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info);
|
||||||
"//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']",
|
|
||||||
info);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// DATASETS
|
// DATASETS
|
||||||
|
@ -264,13 +282,15 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Field<String> prepareDatasetMetadataVersionNumber(
|
protected Field<String> prepareDatasetMetadataVersionNumber(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return null; // Not present in ODF ???
|
return null; // Not present in ODF ???
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Field<String> prepareDatasetLastMetadataUpdate(
|
protected Field<String> prepareDatasetLastMetadataUpdate(
|
||||||
final Document doc, final DataInfo info) {
|
final Document doc,
|
||||||
|
final DataInfo info) {
|
||||||
return prepareField(doc, "//datacite:date[@dateType='Updated']", info);
|
return prepareField(doc, "//datacite:date[@dateType='Updated']", info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -346,9 +366,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
@Override
|
@Override
|
||||||
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
|
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
|
||||||
return prepareQualifier(
|
return prepareQualifier(
|
||||||
doc,
|
doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE,
|
||||||
"//*[local-name() = 'resource']//*[local-name() = 'resourceType']",
|
|
||||||
DNET_DATA_CITE_RESOURCE,
|
|
||||||
DNET_DATA_CITE_RESOURCE);
|
DNET_DATA_CITE_RESOURCE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,7 +21,14 @@ import org.mockito.Mock;
|
||||||
import org.mockito.junit.jupiter.MockitoExtension;
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
public class MappersTest {
|
public class MappersTest {
|
||||||
|
@ -54,13 +61,13 @@ public class MappersTest {
|
||||||
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
||||||
|
|
||||||
assertTrue(p.getAuthor().size() > 0);
|
assertTrue(p.getAuthor().size() > 0);
|
||||||
Optional<Author> author = p
|
final Optional<Author> author = p
|
||||||
.getAuthor()
|
.getAuthor()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
|
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
|
||||||
.findFirst();
|
.findFirst();
|
||||||
assertTrue(author.isPresent());
|
assertTrue(author.isPresent());
|
||||||
StructuredProperty pid = author
|
final StructuredProperty pid = author
|
||||||
.get()
|
.get()
|
||||||
.getPid()
|
.getPid()
|
||||||
.stream()
|
.stream()
|
||||||
|
@ -68,7 +75,7 @@ public class MappersTest {
|
||||||
.get();
|
.get();
|
||||||
assertEquals("0000-0001-6651-1178", pid.getValue());
|
assertEquals("0000-0001-6651-1178", pid.getValue());
|
||||||
assertEquals("ORCID", pid.getQualifier().getClassid());
|
assertEquals("ORCID", pid.getQualifier().getClassid());
|
||||||
assertEquals("ORCID", pid.getQualifier().getClassname());
|
assertEquals("Open Researcher and Contributor ID", pid.getQualifier().getClassname());
|
||||||
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
|
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
|
||||||
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
|
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
|
||||||
assertEquals("Votsi,Nefta", author.get().getFullname());
|
assertEquals("Votsi,Nefta", author.get().getFullname());
|
||||||
|
@ -121,13 +128,13 @@ public class MappersTest {
|
||||||
assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue()));
|
assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue()));
|
||||||
assertTrue(d.getAuthor().size() > 0);
|
assertTrue(d.getAuthor().size() > 0);
|
||||||
|
|
||||||
Optional<Author> author = d
|
final Optional<Author> author = d
|
||||||
.getAuthor()
|
.getAuthor()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
|
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
|
||||||
.findFirst();
|
.findFirst();
|
||||||
assertTrue(author.isPresent());
|
assertTrue(author.isPresent());
|
||||||
StructuredProperty pid = author
|
final StructuredProperty pid = author
|
||||||
.get()
|
.get()
|
||||||
.getPid()
|
.getPid()
|
||||||
.stream()
|
.stream()
|
||||||
|
@ -135,7 +142,7 @@ public class MappersTest {
|
||||||
.get();
|
.get();
|
||||||
assertEquals("0000-0001-9074-1619", pid.getValue());
|
assertEquals("0000-0001-9074-1619", pid.getValue());
|
||||||
assertEquals("ORCID", pid.getQualifier().getClassid());
|
assertEquals("ORCID", pid.getQualifier().getClassid());
|
||||||
assertEquals("ORCID", pid.getQualifier().getClassname());
|
assertEquals("Open Researcher and Contributor ID", pid.getQualifier().getClassname());
|
||||||
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
|
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemeid());
|
||||||
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
|
assertEquals(ModelConstants.DNET_PID_TYPES, pid.getQualifier().getSchemename());
|
||||||
assertEquals("Baracchini, Theo", author.get().getFullname());
|
assertEquals("Baracchini, Theo", author.get().getFullname());
|
||||||
|
@ -143,13 +150,13 @@ public class MappersTest {
|
||||||
assertEquals("Theo", author.get().getName());
|
assertEquals("Theo", author.get().getName());
|
||||||
|
|
||||||
assertEquals(1, author.get().getAffiliation().size());
|
assertEquals(1, author.get().getAffiliation().size());
|
||||||
Optional<Field<String>> opAff = author
|
final Optional<Field<String>> opAff = author
|
||||||
.get()
|
.get()
|
||||||
.getAffiliation()
|
.getAffiliation()
|
||||||
.stream()
|
.stream()
|
||||||
.findFirst();
|
.findFirst();
|
||||||
assertTrue(opAff.isPresent());
|
assertTrue(opAff.isPresent());
|
||||||
Field<String> affiliation = opAff.get();
|
final Field<String> affiliation = opAff.get();
|
||||||
assertEquals("ISTI-CNR", affiliation.getValue());
|
assertEquals("ISTI-CNR", affiliation.getValue());
|
||||||
|
|
||||||
assertTrue(d.getSubject().size() > 0);
|
assertTrue(d.getSubject().size() > 0);
|
||||||
|
|
Loading…
Reference in New Issue