package eu.dnetlib.dhp.oa.dedup; import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.io.Serializable; import java.nio.file.Paths; import java.util.*; import java.util.stream.Collectors; import org.codehaus.jackson.map.ObjectMapper; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; public class EntityMergerTest implements Serializable { private List> publications; private List> publications2; private List> publications3; private List> publications4; private List> publications5; private String testEntityBasePath; private DataInfo dataInfo; private String dedupId = "00|dedup_id::1"; private Publication pub_top; @BeforeEach public void setUp() throws Exception { testEntityBasePath = Paths .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/json").toURI()) .toFile() .getAbsolutePath(); publications = readSample(testEntityBasePath + "/publication_merge.json", Publication.class); publications2 = readSample(testEntityBasePath + "/publication_merge2.json", Publication.class); publications3 = readSample(testEntityBasePath + "/publication_merge3.json", Publication.class); publications4 = readSample(testEntityBasePath + "/publication_merge4.json", Publication.class); publications5 = readSample(testEntityBasePath + "/publication_merge5.json", Publication.class); pub_top = getTopPub(publications); dataInfo = setDI(); } @Test public void softwareMergerTest() throws InstantiationException, IllegalAccessException { List> softwares = readSample( testEntityBasePath + "/software_merge.json", Software.class); Software merged = DedupRecordFactory .entityMerger(dedupId, softwares.iterator(), 0, dataInfo, Software.class); assertEquals("OPEN SOURCE", merged.getBestaccessright().getClassid()); assertEquals("50|doi_dedup___::0968af610a356656706657e4f234b340", merged.getId()); } @Test public void publicationMergerTest() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class); // verify id assertEquals("50|doi_dedup___::0968af610a356656706657e4f234b340", pub_merged.getId()); assertEquals(pub_top.getJournal(), pub_merged.getJournal()); assertEquals("OPEN", pub_merged.getBestaccessright().getClassid()); assertEquals(pub_top.getResulttype(), pub_merged.getResulttype()); assertEquals(pub_top.getLanguage(), pub_merged.getLanguage()); assertEquals(pub_top.getPublisher(), pub_merged.getPublisher()); assertEquals(pub_top.getEmbargoenddate(), pub_merged.getEmbargoenddate()); assertEquals(pub_top.getResourcetype().getClassid(), ""); assertEquals(pub_top.getDateoftransformation(), pub_merged.getDateoftransformation()); assertEquals(pub_top.getOaiprovenance(), pub_merged.getOaiprovenance()); assertEquals(pub_top.getDateofcollection(), pub_merged.getDateofcollection()); assertEquals(3, pub_merged.getInstance().size()); assertEquals(2, pub_merged.getCountry().size()); assertEquals(0, pub_merged.getSubject().size()); assertEquals(2, pub_merged.getTitle().size()); assertEquals(0, pub_merged.getRelevantdate().size()); assertEquals(0, pub_merged.getDescription().size()); assertEquals(0, pub_merged.getSource().size()); assertEquals(0, pub_merged.getFulltext().size()); assertEquals(0, pub_merged.getFormat().size()); assertEquals(0, pub_merged.getContributor().size()); assertEquals(0, pub_merged.getCoverage().size()); assertEquals(0, pub_merged.getContext().size()); assertEquals(0, pub_merged.getExternalReference().size()); assertEquals(3, pub_merged.getOriginalId().size()); assertEquals(3, pub_merged.getCollectedfrom().size()); assertEquals(1, pub_merged.getPid().size()); assertEquals(0, pub_merged.getExtraInfo().size()); // verify datainfo assertEquals(dataInfo, pub_merged.getDataInfo()); // verify datepicker assertEquals("2018-09-30", pub_merged.getDateofacceptance().getValue()); // verify authors assertEquals(9, pub_merged.getAuthor().size()); assertEquals(4, AuthorMerger.countAuthorsPids(pub_merged.getAuthor())); // verify title int count = 0; for (StructuredProperty title : pub_merged.getTitle()) { if (title.getQualifier().getClassid().equals("main title")) count++; } assertEquals(1, count); } @Test public void publicationMergerTest2() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class); // verify id assertEquals("50|doi_dedup___::0ca46ff10b2b4c756191719d85302b14", pub_merged.getId()); assertEquals(27, pub_merged.getAuthor().size()); } @Test public void publicationMergerTest3() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications3.iterator(), 0, dataInfo, Publication.class); // verify id assertEquals("50|doi_dedup___::0ca46ff10b2b4c756191719d85302b14", pub_merged.getId()); } @Test public void publicationMergerTest4() throws InstantiationException, IllegalStateException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications4.iterator(), 0, dataInfo, Publication.class); // verify id assertEquals("50|dedup_wf_001::0ca46ff10b2b4c756191719d85302b14", pub_merged.getId()); } @Test public void publicationMergerTest5() throws InstantiationException, IllegalStateException, IllegalAccessException { System.out .println( publications5 .stream() .map(p -> p._2().getId()) .collect(Collectors.toList())); Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications5.iterator(), 0, dataInfo, Publication.class); // verify id assertEquals("50|dedup_wf_001::0ca46ff10b2b4c756191719d85302b14", pub_merged.getId()); } public DataInfo setDI() { DataInfo dataInfo = new DataInfo(); dataInfo.setTrust("0.9"); dataInfo.setDeletedbyinference(false); dataInfo.setInferenceprovenance("testing"); dataInfo.setInferred(true); return dataInfo; } public Publication getTopPub(List> publications) { Double maxTrust = 0.0; Publication maxPub = new Publication(); for (Tuple2 publication : publications) { Double pubTrust = Double.parseDouble(publication._2().getDataInfo().getTrust()); if (pubTrust > maxTrust) { maxTrust = pubTrust; maxPub = publication._2(); } } return maxPub; } public List> readSample(String path, Class clazz) { List> res = new ArrayList<>(); BufferedReader reader; try { reader = new BufferedReader(new FileReader(path)); String line = reader.readLine(); while (line != null) { res .add( new Tuple2<>( MapDocumentUtil.getJPathString("$.id", line), new ObjectMapper().readValue(line, clazz))); // read next line line = reader.readLine(); } reader.close(); } catch (IOException e) { e.printStackTrace(); } return res; } }