forked from D-Net/dnet-hadoop
code formatting
This commit is contained in:
parent
dc4621b3cb
commit
3cf2796ac6
|
@ -1,159 +1,164 @@
|
||||||
package eu.dnetlib.dhp.oa.dedup;
|
|
||||||
|
|
||||||
import com.wcohen.ss.JaroWinkler;
|
package eu.dnetlib.dhp.oa.dedup;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
||||||
import eu.dnetlib.pace.model.Person;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
import java.text.Normalizer;
|
import java.text.Normalizer;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import com.wcohen.ss.JaroWinkler;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import eu.dnetlib.pace.model.Person;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class AuthorMerger {
|
public class AuthorMerger {
|
||||||
|
|
||||||
private static final Double THRESHOLD = 0.95;
|
private static final Double THRESHOLD = 0.95;
|
||||||
|
|
||||||
public static List<Author> merge(List<List<Author>> authors){
|
public static List<Author> merge(List<List<Author>> authors) {
|
||||||
|
|
||||||
authors.sort(new Comparator<List<Author>>() {
|
authors.sort(new Comparator<List<Author>>() {
|
||||||
@Override
|
@Override
|
||||||
public int compare(List<Author> o1, List<Author> o2) {
|
public int compare(List<Author> o1, List<Author> o2) {
|
||||||
return -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2));
|
return -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
List<Author> author = new ArrayList<>();
|
List<Author> author = new ArrayList<>();
|
||||||
|
|
||||||
for(List<Author> a : authors){
|
for (List<Author> a : authors) {
|
||||||
author = mergeAuthor(author, a);
|
author = mergeAuthor(author, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
return author;
|
return author;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
|
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b) {
|
||||||
int pa = countAuthorsPids(a);
|
int pa = countAuthorsPids(a);
|
||||||
int pb = countAuthorsPids(b);
|
int pb = countAuthorsPids(b);
|
||||||
List<Author> base, enrich;
|
List<Author> base, enrich;
|
||||||
int sa = authorsSize(a);
|
int sa = authorsSize(a);
|
||||||
int sb = authorsSize(b);
|
int sb = authorsSize(b);
|
||||||
|
|
||||||
if (pa == pb) {
|
if (pa == pb) {
|
||||||
base = sa > sb ? a : b;
|
base = sa > sb ? a : b;
|
||||||
enrich = sa > sb ? b : a;
|
enrich = sa > sb ? b : a;
|
||||||
} else {
|
} else {
|
||||||
base = pa > pb ? a : b;
|
base = pa > pb ? a : b;
|
||||||
enrich = pa > pb ? b : a;
|
enrich = pa > pb ? b : a;
|
||||||
}
|
}
|
||||||
enrichPidFromList(base, enrich);
|
enrichPidFromList(base, enrich);
|
||||||
return base;
|
return base;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void enrichPidFromList(List<Author> base, List<Author> enrich) {
|
private static void enrichPidFromList(List<Author> base, List<Author> enrich) {
|
||||||
if (base == null || enrich == null)
|
if (base == null || enrich == null)
|
||||||
return;
|
return;
|
||||||
final Map<String, Author> basePidAuthorMap = base
|
final Map<String, Author> basePidAuthorMap = base
|
||||||
.stream()
|
.stream()
|
||||||
.filter(a -> a.getPid() != null && a.getPid().size() > 0)
|
.filter(a -> a.getPid() != null && a.getPid().size() > 0)
|
||||||
.flatMap(
|
.flatMap(
|
||||||
a -> a
|
a -> a
|
||||||
.getPid()
|
.getPid()
|
||||||
.stream()
|
.stream()
|
||||||
.map(p -> new Tuple2<>(pidToComparableString(p), a)))
|
.map(p -> new Tuple2<>(pidToComparableString(p), a)))
|
||||||
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
|
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
|
||||||
|
|
||||||
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
|
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
|
||||||
.stream()
|
.stream()
|
||||||
.filter(a -> a.getPid() != null && a.getPid().size() > 0)
|
.filter(a -> a.getPid() != null && a.getPid().size() > 0)
|
||||||
.flatMap(
|
.flatMap(
|
||||||
a -> a
|
a -> a
|
||||||
.getPid()
|
.getPid()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p)))
|
.filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p)))
|
||||||
.map(p -> new Tuple2<>(p, a)))
|
.map(p -> new Tuple2<>(p, a)))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
pidToEnrich
|
pidToEnrich
|
||||||
.forEach(
|
.forEach(
|
||||||
a -> {
|
a -> {
|
||||||
Optional<Tuple2<Double, Author>> simAuthor = base
|
Optional<Tuple2<Double, Author>> simAuthor = base
|
||||||
.stream()
|
.stream()
|
||||||
.map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
|
.map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
|
||||||
.max(Comparator.comparing(Tuple2::_1));
|
.max(Comparator.comparing(Tuple2::_1));
|
||||||
if (simAuthor.isPresent() && simAuthor.get()._1() > THRESHOLD) {
|
if (simAuthor.isPresent() && simAuthor.get()._1() > THRESHOLD) {
|
||||||
Author r = simAuthor.get()._2();
|
Author r = simAuthor.get()._2();
|
||||||
if (r.getPid() == null) {
|
if (r.getPid() == null) {
|
||||||
r.setPid(new ArrayList<>());
|
r.setPid(new ArrayList<>());
|
||||||
}
|
}
|
||||||
r.getPid().add(a._1());
|
r.getPid().add(a._1());
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String pidToComparableString(StructuredProperty pid){
|
public static String pidToComparableString(StructuredProperty pid) {
|
||||||
return (pid.getQualifier()!=null? pid.getQualifier().getClassid()!=null?pid.getQualifier().getClassid().toLowerCase():"":"") + (pid.getValue()!=null? pid.getValue().toLowerCase():"");
|
return (pid.getQualifier() != null
|
||||||
}
|
? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : ""
|
||||||
|
: "") + (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
|
||||||
|
}
|
||||||
|
|
||||||
public static int countAuthorsPids(List<Author> authors) {
|
public static int countAuthorsPids(List<Author> authors) {
|
||||||
if (authors == null)
|
if (authors == null)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return (int) authors.stream().filter(AuthorMerger::hasPid).count();
|
return (int) authors.stream().filter(AuthorMerger::hasPid).count();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int authorsSize(List<Author> authors) {
|
private static int authorsSize(List<Author> authors) {
|
||||||
if (authors == null)
|
if (authors == null)
|
||||||
return 0;
|
return 0;
|
||||||
return authors.size();
|
return authors.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Double sim(Author a, Author b) {
|
private static Double sim(Author a, Author b) {
|
||||||
|
|
||||||
final Person pa = parse(a);
|
final Person pa = parse(a);
|
||||||
final Person pb = parse(b);
|
final Person pb = parse(b);
|
||||||
|
|
||||||
if (pa.isAccurate() & pb.isAccurate()) {
|
if (pa.isAccurate() & pb.isAccurate()) {
|
||||||
return new JaroWinkler()
|
return new JaroWinkler()
|
||||||
.score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString()));
|
.score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString()));
|
||||||
} else {
|
} else {
|
||||||
return new JaroWinkler()
|
return new JaroWinkler()
|
||||||
.score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
|
.score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean hasPid(Author a) {
|
private static boolean hasPid(Author a) {
|
||||||
if (a == null || a.getPid() == null || a.getPid().size() == 0)
|
if (a == null || a.getPid() == null || a.getPid().size() == 0)
|
||||||
return false;
|
return false;
|
||||||
return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue()));
|
return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue()));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Person parse(Author author) {
|
private static Person parse(Author author) {
|
||||||
if (StringUtils.isNotBlank(author.getSurname())) {
|
if (StringUtils.isNotBlank(author.getSurname())) {
|
||||||
return new Person(author.getSurname() + ", " + author.getName(), false);
|
return new Person(author.getSurname() + ", " + author.getName(), false);
|
||||||
} else {
|
} else {
|
||||||
return new Person(author.getFullname(), false);
|
return new Person(author.getFullname(), false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String normalize(final String s) {
|
private static String normalize(final String s) {
|
||||||
return nfd(s)
|
return nfd(s)
|
||||||
.toLowerCase()
|
.toLowerCase()
|
||||||
// do not compact the regexes in a single expression, would cause StackOverflowError
|
// do not compact the regexes in a single expression, would cause StackOverflowError
|
||||||
// in case
|
// in case
|
||||||
// of large input strings
|
// of large input strings
|
||||||
.replaceAll("(\\W)+", " ")
|
.replaceAll("(\\W)+", " ")
|
||||||
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
|
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
|
||||||
.replaceAll("(\\p{Punct})+", " ")
|
.replaceAll("(\\p{Punct})+", " ")
|
||||||
.replaceAll("(\\d)+", " ")
|
.replaceAll("(\\d)+", " ")
|
||||||
.replaceAll("(\\n)+", " ")
|
.replaceAll("(\\n)+", " ")
|
||||||
.trim();
|
.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String nfd(final String s) {
|
private static String nfd(final String s) {
|
||||||
return Normalizer.normalize(s, Normalizer.Form.NFD);
|
return Normalizer.normalize(s, Normalizer.Form.NFD);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.dedup;
|
package eu.dnetlib.dhp.oa.dedup;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
@ -73,7 +74,8 @@ public class DedupRecordFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <T extends OafEntity> T entityMerger(
|
public static <T extends OafEntity> T entityMerger(
|
||||||
String id, Iterator<Tuple2<String, T>> entities, long ts, DataInfo dataInfo, Class<T> clazz) throws IllegalAccessException, InstantiationException {
|
String id, Iterator<Tuple2<String, T>> entities, long ts, DataInfo dataInfo, Class<T> clazz)
|
||||||
|
throws IllegalAccessException, InstantiationException {
|
||||||
|
|
||||||
T entity = clazz.newInstance();
|
T entity = clazz.newInstance();
|
||||||
|
|
||||||
|
@ -87,14 +89,14 @@ public class DedupRecordFactory {
|
||||||
entity.mergeFrom(duplicate);
|
entity.mergeFrom(duplicate);
|
||||||
if (ModelSupport.isSubClass(duplicate, Result.class)) {
|
if (ModelSupport.isSubClass(duplicate, Result.class)) {
|
||||||
Result r1 = (Result) duplicate;
|
Result r1 = (Result) duplicate;
|
||||||
if (r1.getAuthor() != null && r1.getAuthor().size()>0)
|
if (r1.getAuthor() != null && r1.getAuthor().size() > 0)
|
||||||
authors.add(r1.getAuthor());
|
authors.add(r1.getAuthor());
|
||||||
if (r1.getDateofacceptance() != null)
|
if (r1.getDateofacceptance() != null)
|
||||||
dates.add(r1.getDateofacceptance().getValue());
|
dates.add(r1.getDateofacceptance().getValue());
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
//set authors and date
|
// set authors and date
|
||||||
if (ModelSupport.isSubClass(entity, Result.class)) {
|
if (ModelSupport.isSubClass(entity, Result.class)) {
|
||||||
((Result) entity).setDateofacceptance(DatePicker.pick(dates));
|
((Result) entity).setDateofacceptance(DatePicker.pick(dates));
|
||||||
((Result) entity).setAuthor(AuthorMerger.merge(authors));
|
((Result) entity).setAuthor(AuthorMerger.merge(authors));
|
||||||
|
|
|
@ -1,5 +1,8 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.dedup;
|
package eu.dnetlib.dhp.oa.dedup;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.FileReader;
|
import java.io.FileReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -7,15 +10,13 @@ import java.io.Serializable;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import eu.dnetlib.pace.util.MapDocumentUtil;
|
|
||||||
import org.codehaus.jackson.map.ObjectMapper;
|
import org.codehaus.jackson.map.ObjectMapper;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.pace.util.MapDocumentUtil;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class EntityMergerTest implements Serializable {
|
public class EntityMergerTest implements Serializable {
|
||||||
|
|
||||||
|
@ -30,9 +31,9 @@ public class EntityMergerTest implements Serializable {
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
|
|
||||||
testEntityBasePath = Paths
|
testEntityBasePath = Paths
|
||||||
.get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/json").toURI())
|
.get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/json").toURI())
|
||||||
.toFile()
|
.toFile()
|
||||||
.getAbsolutePath();
|
.getAbsolutePath();
|
||||||
|
|
||||||
publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class);
|
publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class);
|
||||||
|
|
||||||
|
@ -45,7 +46,8 @@ public class EntityMergerTest implements Serializable {
|
||||||
@Test
|
@Test
|
||||||
public void publicationMergerTest() throws InstantiationException, IllegalAccessException {
|
public void publicationMergerTest() throws InstantiationException, IllegalAccessException {
|
||||||
|
|
||||||
Publication pub_merged = DedupRecordFactory.entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class);
|
Publication pub_merged = DedupRecordFactory
|
||||||
|
.entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class);
|
||||||
|
|
||||||
assertEquals(dedupId, pub_merged.getId());
|
assertEquals(dedupId, pub_merged.getId());
|
||||||
|
|
||||||
|
@ -59,36 +61,36 @@ public class EntityMergerTest implements Serializable {
|
||||||
assertEquals(pub_merged.getDateoftransformation(), pub_top.getDateoftransformation());
|
assertEquals(pub_merged.getDateoftransformation(), pub_top.getDateoftransformation());
|
||||||
assertEquals(pub_merged.getOaiprovenance(), pub_top.getOaiprovenance());
|
assertEquals(pub_merged.getOaiprovenance(), pub_top.getOaiprovenance());
|
||||||
assertEquals(pub_merged.getDateofcollection(), pub_top.getDateofcollection());
|
assertEquals(pub_merged.getDateofcollection(), pub_top.getDateofcollection());
|
||||||
assertEquals(pub_merged.getInstance().size(),3);
|
assertEquals(pub_merged.getInstance().size(), 3);
|
||||||
assertEquals(pub_merged.getCountry().size(), 2);
|
assertEquals(pub_merged.getCountry().size(), 2);
|
||||||
assertEquals(pub_merged.getSubject().size(), 0);
|
assertEquals(pub_merged.getSubject().size(), 0);
|
||||||
assertEquals(pub_merged.getTitle().size(), 2);
|
assertEquals(pub_merged.getTitle().size(), 2);
|
||||||
assertEquals(pub_merged.getRelevantdate().size(),0);
|
assertEquals(pub_merged.getRelevantdate().size(), 0);
|
||||||
assertEquals(pub_merged.getDescription().size(),0);
|
assertEquals(pub_merged.getDescription().size(), 0);
|
||||||
assertEquals(pub_merged.getSource().size(),0);
|
assertEquals(pub_merged.getSource().size(), 0);
|
||||||
assertEquals(pub_merged.getFulltext().size(),0);
|
assertEquals(pub_merged.getFulltext().size(), 0);
|
||||||
assertEquals(pub_merged.getFormat().size(),0);
|
assertEquals(pub_merged.getFormat().size(), 0);
|
||||||
assertEquals(pub_merged.getContributor().size(),0);
|
assertEquals(pub_merged.getContributor().size(), 0);
|
||||||
assertEquals(pub_merged.getCoverage().size(),0);
|
assertEquals(pub_merged.getCoverage().size(), 0);
|
||||||
assertEquals(pub_merged.getContext().size(),0);
|
assertEquals(pub_merged.getContext().size(), 0);
|
||||||
assertEquals(pub_merged.getExternalReference().size(),0);
|
assertEquals(pub_merged.getExternalReference().size(), 0);
|
||||||
assertEquals(pub_merged.getOriginalId().size(),3);
|
assertEquals(pub_merged.getOriginalId().size(), 3);
|
||||||
assertEquals(pub_merged.getCollectedfrom().size(),3);
|
assertEquals(pub_merged.getCollectedfrom().size(), 3);
|
||||||
assertEquals(pub_merged.getPid().size(),1);
|
assertEquals(pub_merged.getPid().size(), 1);
|
||||||
assertEquals(pub_merged.getExtraInfo().size(),0);
|
assertEquals(pub_merged.getExtraInfo().size(), 0);
|
||||||
|
|
||||||
//verify datainfo
|
// verify datainfo
|
||||||
assertEquals(pub_merged.getDataInfo(), dataInfo);
|
assertEquals(pub_merged.getDataInfo(), dataInfo);
|
||||||
|
|
||||||
//verify datepicker
|
// verify datepicker
|
||||||
assertEquals(pub_merged.getDateofacceptance().getValue(), "2018-09-30");
|
assertEquals(pub_merged.getDateofacceptance().getValue(), "2018-09-30");
|
||||||
|
|
||||||
//verify authors
|
// verify authors
|
||||||
assertEquals(pub_merged.getAuthor().size(), 9);
|
assertEquals(pub_merged.getAuthor().size(), 9);
|
||||||
assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4);
|
assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
public DataInfo setDI(){
|
public DataInfo setDI() {
|
||||||
DataInfo dataInfo = new DataInfo();
|
DataInfo dataInfo = new DataInfo();
|
||||||
dataInfo.setTrust("0.9");
|
dataInfo.setTrust("0.9");
|
||||||
dataInfo.setDeletedbyinference(false);
|
dataInfo.setDeletedbyinference(false);
|
||||||
|
@ -97,13 +99,13 @@ public class EntityMergerTest implements Serializable {
|
||||||
return dataInfo;
|
return dataInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Publication getTopPub(List<Tuple2<String, Publication>> publications){
|
public Publication getTopPub(List<Tuple2<String, Publication>> publications) {
|
||||||
|
|
||||||
Double maxTrust = 0.0;
|
Double maxTrust = 0.0;
|
||||||
Publication maxPub = new Publication();
|
Publication maxPub = new Publication();
|
||||||
for (Tuple2<String, Publication> publication : publications) {
|
for (Tuple2<String, Publication> publication : publications) {
|
||||||
Double pubTrust = Double.parseDouble(publication._2().getDataInfo().getTrust());
|
Double pubTrust = Double.parseDouble(publication._2().getDataInfo().getTrust());
|
||||||
if(pubTrust > maxTrust){
|
if (pubTrust > maxTrust) {
|
||||||
maxTrust = pubTrust;
|
maxTrust = pubTrust;
|
||||||
maxPub = publication._2();
|
maxPub = publication._2();
|
||||||
}
|
}
|
||||||
|
@ -118,11 +120,11 @@ public class EntityMergerTest implements Serializable {
|
||||||
reader = new BufferedReader(new FileReader(path));
|
reader = new BufferedReader(new FileReader(path));
|
||||||
String line = reader.readLine();
|
String line = reader.readLine();
|
||||||
while (line != null) {
|
while (line != null) {
|
||||||
res.add(
|
res
|
||||||
|
.add(
|
||||||
new Tuple2<>(
|
new Tuple2<>(
|
||||||
MapDocumentUtil.getJPathString("$.id", line),
|
MapDocumentUtil.getJPathString("$.id", line),
|
||||||
new ObjectMapper().readValue(line, clazz))
|
new ObjectMapper().readValue(line, clazz)));
|
||||||
);
|
|
||||||
// read next line
|
// read next line
|
||||||
line = reader.readLine();
|
line = reader.readLine();
|
||||||
}
|
}
|
||||||
|
@ -134,5 +136,4 @@ public class EntityMergerTest implements Serializable {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,8 +59,10 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4";
|
protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4";
|
||||||
protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3";
|
protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3";
|
||||||
protected static final Qualifier ORCID_PID_TYPE = qualifier("ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES);
|
protected static final Qualifier ORCID_PID_TYPE = qualifier(
|
||||||
protected static final Qualifier MAG_PID_TYPE = qualifier("MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES);
|
"ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES);
|
||||||
|
protected static final Qualifier MAG_PID_TYPE = qualifier(
|
||||||
|
"MAGIdentifier", "Microsoft Academic Graph Identifier", DNET_PID_TYPES, DNET_PID_TYPES);
|
||||||
|
|
||||||
protected static final Map<String, String> nsContext = new HashMap<>();
|
protected static final Map<String, String> nsContext = new HashMap<>();
|
||||||
|
|
||||||
|
@ -74,7 +76,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
nsContext.put("datacite", DATACITE_SCHEMA_KERNEL_3);
|
nsContext.put("datacite", DATACITE_SCHEMA_KERNEL_3);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
|
protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier(
|
||||||
|
"main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
|
||||||
|
|
||||||
protected AbstractMdRecordToOafMapper(final Map<String, String> code2name) {
|
protected AbstractMdRecordToOafMapper(final Map<String, String> code2name) {
|
||||||
this.code2name = code2name;
|
this.code2name = code2name;
|
||||||
|
@ -88,15 +91,20 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
.parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3));
|
.parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3));
|
||||||
|
|
||||||
final String type = doc.valueOf("//dr:CobjCategory/@type");
|
final String type = doc.valueOf("//dr:CobjCategory/@type");
|
||||||
final KeyValue collectedFrom = getProvenanceDatasource(doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
|
final KeyValue collectedFrom = getProvenanceDatasource(
|
||||||
|
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
|
||||||
|
|
||||||
if (collectedFrom == null) { return null; }
|
if (collectedFrom == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
|
final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
|
||||||
? collectedFrom
|
? collectedFrom
|
||||||
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
|
: getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name");
|
||||||
|
|
||||||
if (hostedBy == null) { return null; }
|
if (hostedBy == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final DataInfo info = prepareDataInfo(doc);
|
final DataInfo info = prepareDataInfo(doc);
|
||||||
final long lastUpdateTimestamp = new Date().getTime();
|
final long lastUpdateTimestamp = new Date().getTime();
|
||||||
|
@ -111,7 +119,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final String dsId = doc.valueOf(xpathId);
|
final String dsId = doc.valueOf(xpathId);
|
||||||
final String dsName = doc.valueOf(xpathName);
|
final String dsName = doc.valueOf(xpathName);
|
||||||
|
|
||||||
if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { return null; }
|
if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
return keyValue(createOpenaireId(10, dsId, true), dsName);
|
return keyValue(createOpenaireId(10, dsId, true), dsName);
|
||||||
}
|
}
|
||||||
|
@ -127,47 +137,47 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final List<Oaf> oafs = new ArrayList<>();
|
final List<Oaf> oafs = new ArrayList<>();
|
||||||
|
|
||||||
switch (type.toLowerCase()) {
|
switch (type.toLowerCase()) {
|
||||||
case "publication":
|
case "publication":
|
||||||
final Publication p = new Publication();
|
final Publication p = new Publication();
|
||||||
populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(p, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||||
p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
|
p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
|
||||||
p.setJournal(prepareJournal(doc, info));
|
p.setJournal(prepareJournal(doc, info));
|
||||||
oafs.add(p);
|
oafs.add(p);
|
||||||
break;
|
break;
|
||||||
case "dataset":
|
case "dataset":
|
||||||
final Dataset d = new Dataset();
|
final Dataset d = new Dataset();
|
||||||
populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(d, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||||
d.setResulttype(DATASET_DEFAULT_RESULTTYPE);
|
d.setResulttype(DATASET_DEFAULT_RESULTTYPE);
|
||||||
d.setStoragedate(prepareDatasetStorageDate(doc, info));
|
d.setStoragedate(prepareDatasetStorageDate(doc, info));
|
||||||
d.setDevice(prepareDatasetDevice(doc, info));
|
d.setDevice(prepareDatasetDevice(doc, info));
|
||||||
d.setSize(prepareDatasetSize(doc, info));
|
d.setSize(prepareDatasetSize(doc, info));
|
||||||
d.setVersion(prepareDatasetVersion(doc, info));
|
d.setVersion(prepareDatasetVersion(doc, info));
|
||||||
d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info));
|
d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info));
|
||||||
d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info));
|
d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info));
|
||||||
d.setGeolocation(prepareDatasetGeoLocations(doc, info));
|
d.setGeolocation(prepareDatasetGeoLocations(doc, info));
|
||||||
oafs.add(d);
|
oafs.add(d);
|
||||||
break;
|
break;
|
||||||
case "software":
|
case "software":
|
||||||
final Software s = new Software();
|
final Software s = new Software();
|
||||||
populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(s, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||||
s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
|
s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
|
||||||
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
|
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
|
||||||
s.setLicense(prepareSoftwareLicenses(doc, info));
|
s.setLicense(prepareSoftwareLicenses(doc, info));
|
||||||
s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info));
|
s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info));
|
||||||
s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info));
|
s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info));
|
||||||
oafs.add(s);
|
oafs.add(s);
|
||||||
break;
|
break;
|
||||||
case "":
|
case "":
|
||||||
case "otherresearchproducts":
|
case "otherresearchproducts":
|
||||||
default:
|
default:
|
||||||
final OtherResearchProduct o = new OtherResearchProduct();
|
final OtherResearchProduct o = new OtherResearchProduct();
|
||||||
populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
populateResultFields(o, doc, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||||
o.setResulttype(ORP_DEFAULT_RESULTTYPE);
|
o.setResulttype(ORP_DEFAULT_RESULTTYPE);
|
||||||
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
|
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
|
||||||
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
|
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
|
||||||
o.setTool(prepareOtherResearchProductTools(doc, info));
|
o.setTool(prepareOtherResearchProductTools(doc, info));
|
||||||
oafs.add(o);
|
oafs.add(o);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!oafs.isEmpty()) {
|
if (!oafs.isEmpty()) {
|
||||||
|
@ -196,9 +206,15 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final String projectId = createOpenaireId(40, originalId, true);
|
final String projectId = createOpenaireId(40, originalId, true);
|
||||||
|
|
||||||
res
|
res
|
||||||
.add(getRelation(docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, lastUpdateTimestamp));
|
.add(
|
||||||
|
getRelation(
|
||||||
|
docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info,
|
||||||
|
lastUpdateTimestamp));
|
||||||
res
|
res
|
||||||
.add(getRelation(projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, lastUpdateTimestamp));
|
.add(
|
||||||
|
getRelation(
|
||||||
|
projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info,
|
||||||
|
lastUpdateTimestamp));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -244,7 +260,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier")));
|
r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier")));
|
||||||
r.setCollectedfrom(Arrays.asList(collectedFrom));
|
r.setCollectedfrom(Arrays.asList(collectedFrom));
|
||||||
r
|
r
|
||||||
.setPid(prepareListStructProps(doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info));
|
.setPid(
|
||||||
|
prepareListStructProps(
|
||||||
|
doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info));
|
||||||
r.setDateofcollection(doc.valueOf("//dr:dateOfCollection"));
|
r.setDateofcollection(doc.valueOf("//dr:dateOfCollection"));
|
||||||
r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation"));
|
r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation"));
|
||||||
r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
||||||
|
@ -362,7 +380,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final String sp = n.valueOf("@sp");
|
final String sp = n.valueOf("@sp");
|
||||||
final String vol = n.valueOf("@vol");
|
final String vol = n.valueOf("@vol");
|
||||||
final String edition = n.valueOf("@edition");
|
final String edition = n.valueOf("@edition");
|
||||||
if (StringUtils.isNotBlank(name)) { return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info); }
|
if (StringUtils.isNotBlank(name)) {
|
||||||
|
return journal(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, null, null, info);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -415,7 +435,10 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
for (final Object o : node.selectNodes(xpath)) {
|
for (final Object o : node.selectNodes(xpath)) {
|
||||||
final Node n = (Node) o;
|
final Node n = (Node) o;
|
||||||
res
|
res
|
||||||
.add(structuredProperty(n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"), n.valueOf("@schemename"), info));
|
.add(
|
||||||
|
structuredProperty(
|
||||||
|
n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"),
|
||||||
|
n.valueOf("@schemename"), info));
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -423,7 +446,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected OAIProvenance prepareOAIprovenance(final Document doc) {
|
protected OAIProvenance prepareOAIprovenance(final Document doc) {
|
||||||
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
||||||
|
|
||||||
if (n == null) { return null; }
|
if (n == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final String identifier = n.valueOf("./*[local-name()='identifier']");
|
final String identifier = n.valueOf("./*[local-name()='identifier']");
|
||||||
final String baseURL = n.valueOf("./*[local-name()='baseURL']");
|
final String baseURL = n.valueOf("./*[local-name()='baseURL']");
|
||||||
|
@ -438,7 +463,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected DataInfo prepareDataInfo(final Document doc) {
|
protected DataInfo prepareDataInfo(final Document doc) {
|
||||||
final Node n = doc.selectSingleNode("//oaf:datainfo");
|
final Node n = doc.selectSingleNode("//oaf:datainfo");
|
||||||
|
|
||||||
if (n == null) { return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); }
|
if (n == null) {
|
||||||
|
return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9");
|
||||||
|
}
|
||||||
|
|
||||||
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
|
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
|
||||||
final String paClassName = n.valueOf("./oaf:provenanceaction/@classname");
|
final String paClassName = n.valueOf("./oaf:provenanceaction/@classname");
|
||||||
|
@ -450,7 +477,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred"));
|
final Boolean inferred = Boolean.parseBoolean(n.valueOf("./oaf:inferred"));
|
||||||
final String trust = n.valueOf("./oaf:trust");
|
final String trust = n.valueOf("./oaf:trust");
|
||||||
|
|
||||||
return dataInfo(deletedbyinference, inferenceprovenance, inferred, false, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust);
|
return dataInfo(
|
||||||
|
deletedbyinference, inferenceprovenance, inferred, false,
|
||||||
|
qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Field<String> prepareField(final Node node, final String xpath, final DataInfo info) {
|
protected Field<String> prepareField(final Node node, final String xpath, final DataInfo info) {
|
||||||
|
|
|
@ -56,7 +56,8 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
final String pid = e.valueOf("./@nameIdentifier");
|
final String pid = e.valueOf("./@nameIdentifier");
|
||||||
final String type = e.valueOf("./@nameIdentifierScheme")
|
final String type = e
|
||||||
|
.valueOf("./@nameIdentifierScheme")
|
||||||
.trim()
|
.trim()
|
||||||
.toUpperCase()
|
.toUpperCase()
|
||||||
.replaceAll(" ", "")
|
.replaceAll(" ", "")
|
||||||
|
@ -66,7 +67,9 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
if (StringUtils.isNotBlank(pid)) {
|
if (StringUtils.isNotBlank(pid)) {
|
||||||
if (type.startsWith("ORCID")) {
|
if (type.startsWith("ORCID")) {
|
||||||
final String cleanedId = pid.replaceAll("http://orcid.org/", "").replaceAll("https://orcid.org/", "");
|
final String cleanedId = pid
|
||||||
|
.replaceAll("http://orcid.org/", "")
|
||||||
|
.replaceAll("https://orcid.org/", "");
|
||||||
author.getPid().add(structuredProperty(cleanedId, ORCID_PID_TYPE, info));
|
author.getPid().add(structuredProperty(cleanedId, ORCID_PID_TYPE, info));
|
||||||
} else if (type.startsWith("MAGID")) {
|
} else if (type.startsWith("MAGID")) {
|
||||||
author.getPid().add(structuredProperty(pid, MAG_PID_TYPE, info));
|
author.getPid().add(structuredProperty(pid, MAG_PID_TYPE, info));
|
||||||
|
@ -127,7 +130,8 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
final Instance instance = new Instance();
|
final Instance instance = new Instance();
|
||||||
instance
|
instance
|
||||||
.setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
|
.setInstancetype(
|
||||||
|
prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
|
||||||
instance.setCollectedfrom(collectedfrom);
|
instance.setCollectedfrom(collectedfrom);
|
||||||
instance.setHostedby(hostedby);
|
instance.setHostedby(hostedby);
|
||||||
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
||||||
|
@ -143,13 +147,14 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
final List<Node> nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier"));
|
final List<Node> nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier"));
|
||||||
instance
|
instance
|
||||||
.setUrl(nodes
|
.setUrl(
|
||||||
.stream()
|
nodes
|
||||||
.filter(n -> StringUtils.isNotBlank(n.getText()))
|
.stream()
|
||||||
.map(n -> n.getText().trim())
|
.filter(n -> StringUtils.isNotBlank(n.getText()))
|
||||||
.filter(u -> u.startsWith("http"))
|
.map(n -> n.getText().trim())
|
||||||
.distinct()
|
.filter(u -> u.startsWith("http"))
|
||||||
.collect(Collectors.toCollection(ArrayList::new)));
|
.distinct()
|
||||||
|
.collect(Collectors.toCollection(ArrayList::new)));
|
||||||
|
|
||||||
return Lists.newArrayList(instance);
|
return Lists.newArrayList(instance);
|
||||||
}
|
}
|
||||||
|
@ -274,9 +279,15 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
final String otherId = createOpenaireId(50, originalId, false);
|
final String otherId = createOpenaireId(50, originalId, false);
|
||||||
|
|
||||||
res
|
res
|
||||||
.add(getRelation(docId, otherId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, lastUpdateTimestamp));
|
.add(
|
||||||
|
getRelation(
|
||||||
|
docId, otherId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info,
|
||||||
|
lastUpdateTimestamp));
|
||||||
res
|
res
|
||||||
.add(getRelation(otherId, docId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info, lastUpdateTimestamp));
|
.add(
|
||||||
|
getRelation(
|
||||||
|
otherId, docId, RESULT_RESULT, PUBLICATION_DATASET, IS_RELATED_TO, collectedFrom, info,
|
||||||
|
lastUpdateTimestamp));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
|
|
@ -94,7 +94,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
for (final Object o : n.selectNodes("./datacite:nameIdentifier")) {
|
for (final Object o : n.selectNodes("./datacite:nameIdentifier")) {
|
||||||
|
|
||||||
final String id = ((Node) o).getText();
|
final String id = ((Node) o).getText();
|
||||||
final String type = ((Node) o).valueOf("./@nameIdentifierScheme")
|
final String type = ((Node) o)
|
||||||
|
.valueOf("./@nameIdentifierScheme")
|
||||||
.trim()
|
.trim()
|
||||||
.toUpperCase()
|
.toUpperCase()
|
||||||
.replaceAll(" ", "")
|
.replaceAll(" ", "")
|
||||||
|
@ -119,7 +120,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
final Instance instance = new Instance();
|
final Instance instance = new Instance();
|
||||||
instance
|
instance
|
||||||
.setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
|
.setInstancetype(
|
||||||
|
prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
|
||||||
instance.setCollectedfrom(collectedfrom);
|
instance.setCollectedfrom(collectedfrom);
|
||||||
instance.setHostedby(hostedby);
|
instance.setHostedby(hostedby);
|
||||||
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
|
||||||
|
@ -168,7 +170,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
&& !dateType.equalsIgnoreCase("Updated")
|
&& !dateType.equalsIgnoreCase("Updated")
|
||||||
&& !dateType.equalsIgnoreCase("Available")) {
|
&& !dateType.equalsIgnoreCase("Available")) {
|
||||||
res
|
res
|
||||||
.add(structuredProperty(((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, info));
|
.add(
|
||||||
|
structuredProperty(
|
||||||
|
((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE,
|
||||||
|
info));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
@ -220,14 +225,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
protected List<Field<String>> prepareOtherResearchProductContactGroups(
|
protected List<Field<String>> prepareOtherResearchProductContactGroups(
|
||||||
final Document doc,
|
final Document doc,
|
||||||
final DataInfo info) {
|
final DataInfo info) {
|
||||||
return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info);
|
return prepareListFields(
|
||||||
|
doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<Field<String>> prepareOtherResearchProductContactPersons(
|
protected List<Field<String>> prepareOtherResearchProductContactPersons(
|
||||||
final Document doc,
|
final Document doc,
|
||||||
final DataInfo info) {
|
final DataInfo info) {
|
||||||
return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info);
|
return prepareListFields(
|
||||||
|
doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -253,7 +260,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
protected List<Field<String>> prepareSoftwareDocumentationUrls(
|
protected List<Field<String>> prepareSoftwareDocumentationUrls(
|
||||||
final Document doc,
|
final Document doc,
|
||||||
final DataInfo info) {
|
final DataInfo info) {
|
||||||
return prepareListFields(doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info);
|
return prepareListFields(
|
||||||
|
doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info);
|
||||||
}
|
}
|
||||||
|
|
||||||
// DATASETS
|
// DATASETS
|
||||||
|
@ -327,16 +335,29 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
if (type.equalsIgnoreCase("IsSupplementTo")) {
|
if (type.equalsIgnoreCase("IsSupplementTo")) {
|
||||||
res
|
res
|
||||||
.add(getRelation(docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, lastUpdateTimestamp));
|
.add(
|
||||||
|
getRelation(
|
||||||
|
docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info,
|
||||||
|
lastUpdateTimestamp));
|
||||||
res
|
res
|
||||||
.add(getRelation(otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, lastUpdateTimestamp));
|
.add(
|
||||||
|
getRelation(
|
||||||
|
otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info,
|
||||||
|
lastUpdateTimestamp));
|
||||||
} else if (type.equals("IsPartOf")) {
|
} else if (type.equals("IsPartOf")) {
|
||||||
|
|
||||||
res
|
res
|
||||||
.add(getRelation(docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, lastUpdateTimestamp));
|
.add(
|
||||||
|
getRelation(
|
||||||
|
docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info,
|
||||||
|
lastUpdateTimestamp));
|
||||||
res
|
res
|
||||||
.add(getRelation(otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, lastUpdateTimestamp));
|
.add(
|
||||||
} else {}
|
getRelation(
|
||||||
|
otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info,
|
||||||
|
lastUpdateTimestamp));
|
||||||
|
} else {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
@ -344,6 +365,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
|
protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
|
||||||
return prepareQualifier(doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE, DNET_DATA_CITE_RESOURCE);
|
return prepareQualifier(
|
||||||
|
doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE,
|
||||||
|
DNET_DATA_CITE_RESOURCE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue