forked from D-Net/dnet-hadoop
Merge branch 'master' of code-repo.d4science.org:D-Net/dnet-hadoop
This commit is contained in:
commit
d6aada4957
|
@ -6,7 +6,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp-build</artifactId>
|
<artifactId>dhp-build</artifactId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<artifactId>dhp-build-assembly-resources</artifactId>
|
<artifactId>dhp-build-assembly-resources</artifactId>
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp-build</artifactId>
|
<artifactId>dhp-build</artifactId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<artifactId>dhp-build-properties-maven-plugin</artifactId>
|
<artifactId>dhp-build-properties-maven-plugin</artifactId>
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
|
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp-code-style</artifactId>
|
<artifactId>dhp-code-style</artifactId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
|
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp</artifactId>
|
<artifactId>dhp</artifactId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>dhp-build</artifactId>
|
<artifactId>dhp-build</artifactId>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp</artifactId>
|
<artifactId>dhp</artifactId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
<relativePath>../</relativePath>
|
<relativePath>../</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp</artifactId>
|
<artifactId>dhp</artifactId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
<relativePath>../</relativePath>
|
<relativePath>../</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,69 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.common;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
|
||||||
|
public class LicenseComparator implements Comparator<Qualifier> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(Qualifier left, Qualifier right) {
|
||||||
|
|
||||||
|
if (left == null && right == null)
|
||||||
|
return 0;
|
||||||
|
if (left == null)
|
||||||
|
return 1;
|
||||||
|
if (right == null)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
String lClass = left.getClassid();
|
||||||
|
String rClass = right.getClassid();
|
||||||
|
|
||||||
|
if (lClass.equals(rClass))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (lClass.equals("OPEN SOURCE"))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals("OPEN SOURCE"))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals("OPEN"))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals("OPEN"))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals("6MONTHS"))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals("6MONTHS"))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals("12MONTHS"))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals("12MONTHS"))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals("EMBARGO"))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals("EMBARGO"))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals("RESTRICTED"))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals("RESTRICTED"))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals("CLOSED"))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals("CLOSED"))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (lClass.equals("UNKNOWN"))
|
||||||
|
return -1;
|
||||||
|
if (rClass.equals("UNKNOWN"))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
// Else (but unlikely), lexicographical ordering will do.
|
||||||
|
return lClass.compareTo(rClass);
|
||||||
|
}
|
||||||
|
}
|
|
@ -8,7 +8,7 @@ public class DataInfo implements Serializable {
|
||||||
|
|
||||||
private Boolean invisible = false;
|
private Boolean invisible = false;
|
||||||
private Boolean inferred;
|
private Boolean inferred;
|
||||||
private Boolean deletedbyinference;
|
private Boolean deletedbyinference = false;
|
||||||
private String trust;
|
private String trust;
|
||||||
private String inferenceprovenance;
|
private String inferenceprovenance;
|
||||||
private Qualifier provenanceaction;
|
private Qualifier provenanceaction;
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
public class Field<T> implements Serializable {
|
public class Field<T> implements Serializable {
|
||||||
|
|
||||||
|
@ -39,6 +40,6 @@ public class Field<T> implements Serializable {
|
||||||
if (getClass() != obj.getClass())
|
if (getClass() != obj.getClass())
|
||||||
return false;
|
return false;
|
||||||
Field<T> other = (Field<T>) obj;
|
Field<T> other = (Field<T>) obj;
|
||||||
return getValue().equals(other.getValue());
|
return Objects.equals(getValue(), other.getValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -106,6 +106,7 @@ public abstract class OafEntity extends Oaf implements Serializable {
|
||||||
.stream(lists)
|
.stream(lists)
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.flatMap(List::stream)
|
.flatMap(List::stream)
|
||||||
|
.filter(Objects::nonNull)
|
||||||
.distinct()
|
.distinct()
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
|
@ -244,7 +244,25 @@ public class Result extends OafEntity implements Serializable {
|
||||||
|
|
||||||
subject = mergeLists(subject, r.getSubject());
|
subject = mergeLists(subject, r.getSubject());
|
||||||
|
|
||||||
|
//merge title lists: main title with higher trust and distinct between the others
|
||||||
|
StructuredProperty baseMainTitle = null;
|
||||||
|
if(title != null) {
|
||||||
|
baseMainTitle = getMainTitle(title);
|
||||||
|
title.remove(baseMainTitle);
|
||||||
|
}
|
||||||
|
|
||||||
|
StructuredProperty newMainTitle = null;
|
||||||
|
if(r.getTitle() != null) {
|
||||||
|
newMainTitle = getMainTitle(r.getTitle());
|
||||||
|
r.getTitle().remove(newMainTitle);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (newMainTitle != null && compareTrust(this, r) < 0 )
|
||||||
|
baseMainTitle = newMainTitle;
|
||||||
|
|
||||||
title = mergeLists(title, r.getTitle());
|
title = mergeLists(title, r.getTitle());
|
||||||
|
if (title != null && baseMainTitle != null)
|
||||||
|
title.add(baseMainTitle);
|
||||||
|
|
||||||
relevantdate = mergeLists(relevantdate, r.getRelevantdate());
|
relevantdate = mergeLists(relevantdate, r.getRelevantdate());
|
||||||
|
|
||||||
|
@ -294,4 +312,14 @@ public class Result extends OafEntity implements Serializable {
|
||||||
}
|
}
|
||||||
return a.size() > b.size() ? a : b;
|
return a.size() > b.size() ? a : b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private StructuredProperty getMainTitle(List<StructuredProperty> titles) {
|
||||||
|
//need to check if the list of titles contains more than 1 main title? (in that case, we should chose which main title select in the list)
|
||||||
|
for (StructuredProperty title: titles) {
|
||||||
|
if (title.getQualifier() != null && title.getQualifier().getClassid() != null)
|
||||||
|
if (title.getQualifier().getClassid().equals("main title"))
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>dhp-actionmanager</artifactId>
|
<artifactId>dhp-actionmanager</artifactId>
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>dhp-aggregation</artifactId>
|
<artifactId>dhp-aggregation</artifactId>
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,9 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
|
||||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<artifactId>dhp-dedup-openaire</artifactId>
|
<artifactId>dhp-dedup-openaire</artifactId>
|
||||||
|
|
|
@ -20,12 +20,7 @@ public class AuthorMerger {
|
||||||
|
|
||||||
public static List<Author> merge(List<List<Author>> authors) {
|
public static List<Author> merge(List<List<Author>> authors) {
|
||||||
|
|
||||||
authors.sort(new Comparator<List<Author>>() {
|
authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
|
||||||
@Override
|
|
||||||
public int compare(List<Author> o1, List<Author> o2) {
|
|
||||||
return -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2));
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
List<Author> author = new ArrayList<>();
|
List<Author> author = new ArrayList<>();
|
||||||
|
|
||||||
|
@ -86,20 +81,30 @@ public class AuthorMerger {
|
||||||
.stream()
|
.stream()
|
||||||
.map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
|
.map(ba -> new Tuple2<>(sim(ba, a._2()), ba))
|
||||||
.max(Comparator.comparing(Tuple2::_1));
|
.max(Comparator.comparing(Tuple2::_1));
|
||||||
if (simAuthor.isPresent() && simAuthor.get()._1() > THRESHOLD) {
|
|
||||||
|
if (simAuthor.isPresent()) {
|
||||||
|
double th = THRESHOLD;
|
||||||
|
// increase the threshold if the surname is too short
|
||||||
|
if (simAuthor.get()._2().getSurname() != null
|
||||||
|
&& simAuthor.get()._2().getSurname().length() <= 3)
|
||||||
|
th = 0.99;
|
||||||
|
|
||||||
|
if (simAuthor.get()._1() > th) {
|
||||||
Author r = simAuthor.get()._2();
|
Author r = simAuthor.get()._2();
|
||||||
if (r.getPid() == null) {
|
if (r.getPid() == null) {
|
||||||
r.setPid(new ArrayList<>());
|
r.setPid(new ArrayList<>());
|
||||||
}
|
}
|
||||||
r.getPid().add(a._1());
|
r.getPid().add(a._1());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String pidToComparableString(StructuredProperty pid) {
|
public static String pidToComparableString(StructuredProperty pid) {
|
||||||
return (pid.getQualifier() != null
|
return (pid.getQualifier() != null
|
||||||
? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : ""
|
? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : ""
|
||||||
: "") + (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
|
: "")
|
||||||
|
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int countAuthorsPids(List<Author> authors) {
|
public static int countAuthorsPids(List<Author> authors) {
|
||||||
|
@ -120,9 +125,10 @@ public class AuthorMerger {
|
||||||
final Person pa = parse(a);
|
final Person pa = parse(a);
|
||||||
final Person pb = parse(b);
|
final Person pb = parse(b);
|
||||||
|
|
||||||
|
// if both are accurate (e.g. they have name and surname)
|
||||||
if (pa.isAccurate() & pb.isAccurate()) {
|
if (pa.isAccurate() & pb.isAccurate()) {
|
||||||
return new JaroWinkler()
|
return new JaroWinkler().score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString())) * 0.5
|
||||||
.score(normalize(pa.getSurnameString()), normalize(pb.getSurnameString()));
|
+ new JaroWinkler().score(normalize(pa.getNameString()), normalize(pb.getNameString())) * 0.5;
|
||||||
} else {
|
} else {
|
||||||
return new JaroWinkler()
|
return new JaroWinkler()
|
||||||
.score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
|
.score(normalize(pa.getNormalisedFullname()), normalize(pb.getNormalisedFullname()));
|
||||||
|
|
|
@ -21,6 +21,7 @@ import scala.Tuple2;
|
||||||
public class EntityMergerTest implements Serializable {
|
public class EntityMergerTest implements Serializable {
|
||||||
|
|
||||||
List<Tuple2<String, Publication>> publications;
|
List<Tuple2<String, Publication>> publications;
|
||||||
|
List<Tuple2<String, Publication>> publications2;
|
||||||
|
|
||||||
String testEntityBasePath;
|
String testEntityBasePath;
|
||||||
DataInfo dataInfo;
|
DataInfo dataInfo;
|
||||||
|
@ -36,6 +37,7 @@ public class EntityMergerTest implements Serializable {
|
||||||
.getAbsolutePath();
|
.getAbsolutePath();
|
||||||
|
|
||||||
publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class);
|
publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class);
|
||||||
|
publications2 = readSample(testEntityBasePath + "/publication_merge2.json", Publication.class);
|
||||||
|
|
||||||
pub_top = getTopPub(publications);
|
pub_top = getTopPub(publications);
|
||||||
|
|
||||||
|
@ -88,6 +90,25 @@ public class EntityMergerTest implements Serializable {
|
||||||
// verify authors
|
// verify authors
|
||||||
assertEquals(pub_merged.getAuthor().size(), 9);
|
assertEquals(pub_merged.getAuthor().size(), 9);
|
||||||
assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4);
|
assertEquals(AuthorMerger.countAuthorsPids(pub_merged.getAuthor()), 4);
|
||||||
|
|
||||||
|
//verify title
|
||||||
|
int count = 0;
|
||||||
|
for (StructuredProperty title: pub_merged.getTitle()){
|
||||||
|
if (title.getQualifier().getClassid().equals("main title"))
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
assertEquals(count, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void publicationMergerTest2() throws InstantiationException, IllegalAccessException {
|
||||||
|
|
||||||
|
Publication pub_merged = DedupRecordFactory
|
||||||
|
.entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class);
|
||||||
|
|
||||||
|
assertEquals(pub_merged.getAuthor().size(), 27);
|
||||||
|
// insert assertions here
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public DataInfo setDI() {
|
public DataInfo setDI() {
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -3,7 +3,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,9 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
|
||||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -10,23 +10,10 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Date;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
|
@ -34,6 +21,7 @@ import org.dom4j.DocumentFactory;
|
||||||
import org.dom4j.DocumentHelper;
|
import org.dom4j.DocumentHelper;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.LicenseComparator;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Context;
|
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
|
@ -285,7 +273,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
r.setCoverage(prepareCoverages(doc, info));
|
r.setCoverage(prepareCoverages(doc, info));
|
||||||
r.setContext(prepareContexts(doc, info));
|
r.setContext(prepareContexts(doc, info));
|
||||||
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
r.setExternalReference(new ArrayList<>()); // NOT PRESENT IN MDSTORES
|
||||||
r.setInstance(prepareInstances(doc, info, collectedFrom, hostedBy));
|
final List<Instance> instances = prepareInstances(doc, info, collectedFrom, hostedBy);
|
||||||
|
r.setInstance(instances);
|
||||||
|
r.setBestaccessright(getBestAccessRights(instances));
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<Context> prepareContexts(final Document doc, final DataInfo info) {
|
private List<Context> prepareContexts(final Document doc, final DataInfo info) {
|
||||||
|
@ -368,6 +358,34 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
protected abstract Field<String> prepareDatasetStorageDate(Document doc, DataInfo info);
|
protected abstract Field<String> prepareDatasetStorageDate(Document doc, DataInfo info);
|
||||||
|
|
||||||
|
protected static Qualifier getBestAccessRights(List<Instance> instanceList) {
|
||||||
|
if (instanceList != null) {
|
||||||
|
final Optional<Qualifier> min = instanceList
|
||||||
|
.stream()
|
||||||
|
.map(i -> i.getAccessright())
|
||||||
|
.min(new LicenseComparator());
|
||||||
|
|
||||||
|
final Qualifier rights = min.isPresent() ? min.get() : new Qualifier();
|
||||||
|
|
||||||
|
if (StringUtils.isBlank(rights.getClassid())) {
|
||||||
|
rights.setClassid(UNKNOWN);
|
||||||
|
}
|
||||||
|
if (StringUtils.isBlank(rights.getClassname())
|
||||||
|
|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
|
||||||
|
rights.setClassname(NOT_AVAILABLE);
|
||||||
|
}
|
||||||
|
if (StringUtils.isBlank(rights.getSchemeid())) {
|
||||||
|
rights.setSchemeid(DNET_ACCESS_MODES);
|
||||||
|
}
|
||||||
|
if (StringUtils.isBlank(rights.getSchemename())) {
|
||||||
|
rights.setSchemename(DNET_ACCESS_MODES);
|
||||||
|
}
|
||||||
|
|
||||||
|
return rights;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
private Journal prepareJournal(final Document doc, final DataInfo info) {
|
private Journal prepareJournal(final Document doc, final DataInfo info) {
|
||||||
final Node n = doc.selectSingleNode("//oaf:journal");
|
final Node n = doc.selectSingleNode("//oaf:journal");
|
||||||
if (n != null) {
|
if (n != null) {
|
||||||
|
|
|
@ -85,8 +85,19 @@ public class MappersTest {
|
||||||
assertTrue(p.getSubject().size() > 0);
|
assertTrue(p.getSubject().size() > 0);
|
||||||
assertTrue(StringUtils.isNotBlank(p.getJournal().getIssnOnline()));
|
assertTrue(StringUtils.isNotBlank(p.getJournal().getIssnOnline()));
|
||||||
assertTrue(StringUtils.isNotBlank(p.getJournal().getName()));
|
assertTrue(StringUtils.isNotBlank(p.getJournal().getName()));
|
||||||
assertTrue(p.getInstance().size() > 0);
|
|
||||||
|
|
||||||
|
assertNotNull(p.getInstance());
|
||||||
|
assertTrue(p.getInstance().size() > 0);
|
||||||
|
p
|
||||||
|
.getInstance()
|
||||||
|
.stream()
|
||||||
|
.forEach(i -> {
|
||||||
|
assertNotNull(i.getAccessright());
|
||||||
|
assertEquals("OPEN", i.getAccessright().getClassid());
|
||||||
|
});
|
||||||
|
|
||||||
|
assertNotNull(p.getBestaccessright());
|
||||||
|
assertEquals("OPEN", p.getBestaccessright().getClassid());
|
||||||
assertValidId(r1.getSource());
|
assertValidId(r1.getSource());
|
||||||
assertValidId(r1.getTarget());
|
assertValidId(r1.getTarget());
|
||||||
assertValidId(r2.getSource());
|
assertValidId(r2.getSource());
|
||||||
|
@ -164,6 +175,16 @@ public class MappersTest {
|
||||||
assertTrue(d.getContext().size() > 0);
|
assertTrue(d.getContext().size() > 0);
|
||||||
assertTrue(d.getContext().get(0).getId().length() > 0);
|
assertTrue(d.getContext().get(0).getId().length() > 0);
|
||||||
|
|
||||||
|
assertNotNull(d.getInstance());
|
||||||
|
assertTrue(d.getInstance().size() > 0);
|
||||||
|
d
|
||||||
|
.getInstance()
|
||||||
|
.stream()
|
||||||
|
.forEach(i -> {
|
||||||
|
assertNotNull(i.getAccessright());
|
||||||
|
assertEquals("OPEN", i.getAccessright().getClassid());
|
||||||
|
});
|
||||||
|
|
||||||
assertValidId(r1.getSource());
|
assertValidId(r1.getSource());
|
||||||
assertValidId(r1.getTarget());
|
assertValidId(r1.getTarget());
|
||||||
assertValidId(r2.getSource());
|
assertValidId(r2.getSource());
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<artifactId>dhp-stats-update</artifactId>
|
<artifactId>dhp-stats-update</artifactId>
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp</artifactId>
|
<artifactId>dhp</artifactId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
<relativePath>../</relativePath>
|
<relativePath>../</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|
2
pom.xml
2
pom.xml
|
@ -3,7 +3,7 @@
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp</artifactId>
|
<artifactId>dhp</artifactId>
|
||||||
<version>1.2.1-SNAPSHOT</version>
|
<version>1.2.2-SNAPSHOT</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
|
|
||||||
<licenses>
|
<licenses>
|
||||||
|
|
Loading…
Reference in New Issue