1
0
Fork 0

code formatting

This commit is contained in:
Claudio Atzori 2020-12-22 14:59:03 +01:00
parent 7bfc35df5e
commit d8b5f43a7e
2 changed files with 73 additions and 72 deletions

View File

@ -73,7 +73,7 @@ public class AuthorMerger {
if (base == null || enrich == null) if (base == null || enrich == null)
return; return;
//<pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list) // <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
final Map<String, Author> basePidAuthorMap = base final Map<String, Author> basePidAuthorMap = base
.stream() .stream()
.filter(a -> a.getPid() != null && a.getPid().size() > 0) .filter(a -> a.getPid() != null && a.getPid().size() > 0)
@ -84,7 +84,7 @@ public class AuthorMerger {
.map(p -> new Tuple2<>(pidToComparableString(p), a))) .map(p -> new Tuple2<>(pidToComparableString(p), a)))
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
//<pid, Author> (list of pid that are missing in the other list) // <pid, Author> (list of pid that are missing in the other list)
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
.stream() .stream()
.filter(a -> a.getPid() != null && a.getPid().size() > 0) .filter(a -> a.getPid() != null && a.getPid().size() > 0)
@ -96,7 +96,6 @@ public class AuthorMerger {
.map(p -> new Tuple2<>(p, a))) .map(p -> new Tuple2<>(p, a)))
.collect(Collectors.toList()); .collect(Collectors.toList());
pidToEnrich pidToEnrich
.forEach( .forEach(
a -> { a -> {

View File

@ -1,14 +1,5 @@
package eu.dnetlib.dhp.oa.merge;
import eu.dnetlib.dhp.schema.oaf.Author; package eu.dnetlib.dhp.oa.merge;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.pace.util.MapDocumentUtil;
import org.codehaus.jackson.map.ObjectMapper;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import scala.Tuple2;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.FileReader; import java.io.FileReader;
@ -18,80 +9,91 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.codehaus.jackson.map.ObjectMapper;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.pace.util.MapDocumentUtil;
import scala.Tuple2;
public class AuthorMergerTest { public class AuthorMergerTest {
String publicationsBasePath; String publicationsBasePath;
List<List<Author>> authors; List<List<Author>> authors;
@BeforeEach @BeforeEach
public void setUp() throws Exception { public void setUp() throws Exception {
publicationsBasePath = Paths publicationsBasePath = Paths
.get(AuthorMergerTest.class.getResource("/eu/dnetlib/dhp/oa/merge").toURI()) .get(AuthorMergerTest.class.getResource("/eu/dnetlib/dhp/oa/merge").toURI())
.toFile() .toFile()
.getAbsolutePath(); .getAbsolutePath();
authors = authors = readSample(publicationsBasePath + "/publications_with_authors.json", Publication.class)
readSample(publicationsBasePath + "/publications_with_authors.json", Publication.class) .stream()
.stream() .map(p -> p._2().getAuthor())
.map(p -> p._2().getAuthor()).collect(Collectors.toList()); .collect(Collectors.toList());
} }
@Test @Test
public void mergeTest() { //used in the dedup: threshold set to 0.95 public void mergeTest() { // used in the dedup: threshold set to 0.95
for (List<Author> authors1: authors){ for (List<Author> authors1 : authors) {
System.out.println("List " + (authors.indexOf(authors1)+1)); System.out.println("List " + (authors.indexOf(authors1) + 1));
for (Author author: authors1){ for (Author author : authors1) {
System.out.println(authorToString(author)); System.out.println(authorToString(author));
} }
} }
List<Author> merge = AuthorMerger.merge(authors); List<Author> merge = AuthorMerger.merge(authors);
System.out.println("Merge "); System.out.println("Merge ");
for (Author author: merge) { for (Author author : merge) {
System.out.println(authorToString(author)); System.out.println(authorToString(author));
} }
Assertions.assertEquals(7, merge.size()); Assertions.assertEquals(7, merge.size());
} }
public <T> List<Tuple2<String, T>> readSample(String path, Class<T> clazz) { public <T> List<Tuple2<String, T>> readSample(String path, Class<T> clazz) {
List<Tuple2<String, T>> res = new ArrayList<>(); List<Tuple2<String, T>> res = new ArrayList<>();
BufferedReader reader; BufferedReader reader;
try { try {
reader = new BufferedReader(new FileReader(path)); reader = new BufferedReader(new FileReader(path));
String line = reader.readLine(); String line = reader.readLine();
while (line != null) { while (line != null) {
res res
.add( .add(
new Tuple2<>( new Tuple2<>(
MapDocumentUtil.getJPathString("$.id", line), MapDocumentUtil.getJPathString("$.id", line),
new ObjectMapper().readValue(line, clazz))); new ObjectMapper().readValue(line, clazz)));
// read next line // read next line
line = reader.readLine(); line = reader.readLine();
} }
reader.close(); reader.close();
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
return res; return res;
} }
public String authorToString(Author a){ public String authorToString(Author a) {
String print = "Fullname = "; String print = "Fullname = ";
print += a.getFullname() + " pid = ["; print += a.getFullname() + " pid = [";
if (a.getPid() != null) if (a.getPid() != null)
for (StructuredProperty sp : a.getPid()) { for (StructuredProperty sp : a.getPid()) {
print += sp.toComparableString() + " "; print += sp.toComparableString() + " ";
} }
print += "]"; print += "]";
return print; return print;
} }
} }