forked from D-Net/dnet-hadoop
some more tests for authormerger
This commit is contained in:
parent
6e987fc084
commit
1ea66e8917
|
@ -90,8 +90,6 @@ public class DoiBoostAuthorMerger {
|
||||||
|
|
||||||
|
|
||||||
private static void enrichPidFromList(List<Author> base, List<Author> enrich) {
|
private static void enrichPidFromList(List<Author> base, List<Author> enrich) {
|
||||||
if(base == null || enrich == null)
|
|
||||||
return ;
|
|
||||||
|
|
||||||
//search authors having identifiers in the enrich list
|
//search authors having identifiers in the enrich list
|
||||||
final List<Author> authorsWithPids = enrich
|
final List<Author> authorsWithPids = enrich
|
||||||
|
@ -115,11 +113,11 @@ public class DoiBoostAuthorMerger {
|
||||||
for (Tuple2<Author, Double> t : t2._2()) {
|
for (Tuple2<Author, Double> t : t2._2()) {
|
||||||
String mapEntry = DHPUtils.md5(t._1().getFullname());
|
String mapEntry = DHPUtils.md5(t._1().getFullname());
|
||||||
AuthorAssoc aa = assocMap.get(mapEntry);
|
AuthorAssoc aa = assocMap.get(mapEntry);
|
||||||
if(aa.getScore() < t._2()){
|
if(aa.getScore() < t._2() && aa.getScore() < 0.9){
|
||||||
aa.setScore(t._2());
|
aa.setScore(t._2());
|
||||||
aa.setTo_be_enriched(new ArrayList<>());
|
aa.setTo_be_enriched(new ArrayList<>());
|
||||||
aa.getTo_be_enriched().add(t2._1());
|
aa.getTo_be_enriched().add(t2._1());
|
||||||
}else if(aa.getScore() == t._2()){
|
}else if(t._2() > 0.9){
|
||||||
aa.getTo_be_enriched().add(t2._1());
|
aa.getTo_be_enriched().add(t2._1());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -307,4 +307,100 @@ public class DoiBoostAuthorMergerTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldMergeTest1() throws JsonProcessingException {
|
||||||
|
|
||||||
|
authors = readSample(publicationsBasePath + "/should_appear_author1.json", Publication.class)
|
||||||
|
.stream()
|
||||||
|
.map(p -> p._2().getAuthor())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
|
||||||
|
List<Author> merge = DoiBoostAuthorMerger.merge(authors, true);
|
||||||
|
|
||||||
|
Assertions.assertTrue(6 == merge.stream().filter(a -> a.getPid() !=null)
|
||||||
|
.filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))).count());
|
||||||
|
|
||||||
|
Assertions.assertTrue(34 == merge.stream().filter(a -> a.getPid() !=null)
|
||||||
|
.filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))).count());
|
||||||
|
|
||||||
|
merge.stream().filter(a -> a.getRank() == 26)
|
||||||
|
.forEach(a ->
|
||||||
|
Assertions.assertTrue(a.getPid()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(pid -> pid.getValue().equals("0000-0002-2445-5275")
|
||||||
|
&& pid.getQualifier().getClassid().equals(ModelConstants.ORCID)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldMergeTest2() throws JsonProcessingException {
|
||||||
|
|
||||||
|
authors = readSample(publicationsBasePath + "/should_appear_author2.json", Publication.class)
|
||||||
|
.stream()
|
||||||
|
.map(p -> p._2().getAuthor())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
|
||||||
|
List<Author> merge = DoiBoostAuthorMerger.merge(authors, true);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Assertions.assertTrue(5 == merge.stream().filter(a -> a.getPid() !=null)
|
||||||
|
.filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))).count());
|
||||||
|
|
||||||
|
Assertions.assertTrue(34 == merge.stream().filter(a -> a.getPid() !=null)
|
||||||
|
.filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))).count());
|
||||||
|
|
||||||
|
merge.stream().filter(a -> a.getFullname().equals("da luz geraldo eduardo"))
|
||||||
|
.forEach(a ->
|
||||||
|
Assertions.assertTrue(a.getPid()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(pid -> pid.getValue().equals("http://orcid.org/0000-0003-2434-0387")
|
||||||
|
&& pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shouldNotMergeTest1() throws JsonProcessingException {
|
||||||
|
|
||||||
|
authors = readSample(publicationsBasePath + "/should_appear_author3.json", Publication.class)
|
||||||
|
.stream()
|
||||||
|
.map(p -> p._2().getAuthor())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
|
||||||
|
List<Author> merge = DoiBoostAuthorMerger.merge(authors, true);
|
||||||
|
|
||||||
|
System.out.println("Merge ");
|
||||||
|
for (Author author : merge) {
|
||||||
|
System.out.println(authorToString(author));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assertions.assertTrue(5 == merge.stream().filter(a -> a.getPid() !=null)
|
||||||
|
// .filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID))).count());
|
||||||
|
//
|
||||||
|
// Assertions.assertTrue(34 == merge.stream().filter(a -> a.getPid() !=null)
|
||||||
|
// .filter(a -> a.getPid().stream().anyMatch(p -> p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING))).count());
|
||||||
|
//
|
||||||
|
// merge.stream().filter(a -> a.getFullname().equals("da luz geraldo eduardo"))
|
||||||
|
// .forEach(a ->
|
||||||
|
// Assertions.assertTrue(a.getPid()
|
||||||
|
// .stream()
|
||||||
|
// .anyMatch(pid -> pid.getValue().equals("http://orcid.org/0000-0003-2434-0387")
|
||||||
|
// && pid.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)
|
||||||
|
// )
|
||||||
|
// )
|
||||||
|
// );
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue