forked from D-Net/dnet-hadoop
code formatting
This commit is contained in:
parent
7bfc35df5e
commit
d8b5f43a7e
|
@ -73,7 +73,7 @@ public class AuthorMerger {
|
||||||
if (base == null || enrich == null)
|
if (base == null || enrich == null)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
//<pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
|
// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
|
||||||
final Map<String, Author> basePidAuthorMap = base
|
final Map<String, Author> basePidAuthorMap = base
|
||||||
.stream()
|
.stream()
|
||||||
.filter(a -> a.getPid() != null && a.getPid().size() > 0)
|
.filter(a -> a.getPid() != null && a.getPid().size() > 0)
|
||||||
|
@ -84,7 +84,7 @@ public class AuthorMerger {
|
||||||
.map(p -> new Tuple2<>(pidToComparableString(p), a)))
|
.map(p -> new Tuple2<>(pidToComparableString(p), a)))
|
||||||
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
|
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
|
||||||
|
|
||||||
//<pid, Author> (list of pid that are missing in the other list)
|
// <pid, Author> (list of pid that are missing in the other list)
|
||||||
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
|
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
|
||||||
.stream()
|
.stream()
|
||||||
.filter(a -> a.getPid() != null && a.getPid().size() > 0)
|
.filter(a -> a.getPid() != null && a.getPid().size() > 0)
|
||||||
|
@ -96,7 +96,6 @@ public class AuthorMerger {
|
||||||
.map(p -> new Tuple2<>(p, a)))
|
.map(p -> new Tuple2<>(p, a)))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
|
||||||
pidToEnrich
|
pidToEnrich
|
||||||
.forEach(
|
.forEach(
|
||||||
a -> {
|
a -> {
|
||||||
|
|
|
@ -1,14 +1,5 @@
|
||||||
package eu.dnetlib.dhp.oa.merge;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
package eu.dnetlib.dhp.oa.merge;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
||||||
import eu.dnetlib.pace.util.MapDocumentUtil;
|
|
||||||
import org.codehaus.jackson.map.ObjectMapper;
|
|
||||||
import org.junit.jupiter.api.Assertions;
|
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.FileReader;
|
import java.io.FileReader;
|
||||||
|
@ -18,6 +9,17 @@ import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.codehaus.jackson.map.ObjectMapper;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import eu.dnetlib.pace.util.MapDocumentUtil;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class AuthorMergerTest {
|
public class AuthorMergerTest {
|
||||||
|
|
||||||
String publicationsBasePath;
|
String publicationsBasePath;
|
||||||
|
@ -32,19 +34,19 @@ public class AuthorMergerTest {
|
||||||
.toFile()
|
.toFile()
|
||||||
.getAbsolutePath();
|
.getAbsolutePath();
|
||||||
|
|
||||||
authors =
|
authors = readSample(publicationsBasePath + "/publications_with_authors.json", Publication.class)
|
||||||
readSample(publicationsBasePath + "/publications_with_authors.json", Publication.class)
|
|
||||||
.stream()
|
.stream()
|
||||||
.map(p -> p._2().getAuthor()).collect(Collectors.toList());
|
.map(p -> p._2().getAuthor())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void mergeTest() { //used in the dedup: threshold set to 0.95
|
public void mergeTest() { // used in the dedup: threshold set to 0.95
|
||||||
|
|
||||||
for (List<Author> authors1: authors){
|
for (List<Author> authors1 : authors) {
|
||||||
System.out.println("List " + (authors.indexOf(authors1)+1));
|
System.out.println("List " + (authors.indexOf(authors1) + 1));
|
||||||
for (Author author: authors1){
|
for (Author author : authors1) {
|
||||||
System.out.println(authorToString(author));
|
System.out.println(authorToString(author));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -52,7 +54,7 @@ public class AuthorMergerTest {
|
||||||
List<Author> merge = AuthorMerger.merge(authors);
|
List<Author> merge = AuthorMerger.merge(authors);
|
||||||
|
|
||||||
System.out.println("Merge ");
|
System.out.println("Merge ");
|
||||||
for (Author author: merge) {
|
for (Author author : merge) {
|
||||||
System.out.println(authorToString(author));
|
System.out.println(authorToString(author));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,7 +85,7 @@ public class AuthorMergerTest {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String authorToString(Author a){
|
public String authorToString(Author a) {
|
||||||
|
|
||||||
String print = "Fullname = ";
|
String print = "Fullname = ";
|
||||||
print += a.getFullname() + " pid = [";
|
print += a.getFullname() + " pid = [";
|
||||||
|
|
Loading…
Reference in New Issue