diff --git a/dhp-workflows/dhp-patch/src/test/java/eu/dnetlib/dhp/patchrefereed/PatchRefereedTest.java b/dhp-workflows/dhp-patch/src/test/java/eu/dnetlib/dhp/patchrefereed/PatchRefereedTest.java index 97d7de283a..e691f27da1 100644 --- a/dhp-workflows/dhp-patch/src/test/java/eu/dnetlib/dhp/patchrefereed/PatchRefereedTest.java +++ b/dhp-workflows/dhp-patch/src/test/java/eu/dnetlib/dhp/patchrefereed/PatchRefereedTest.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.patchrefereed; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; @@ -22,6 +24,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Publication; import jdk.nashorn.internal.ir.annotations.Ignore; @@ -75,7 +78,7 @@ public class PatchRefereedTest { "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", "-outputPath", workingDir.toString() + "/publication", "-preparedInfoPath", - getClass().getResource("/eu/dnetlib/dhp/patchrefereed/simpleTest/preparedInfo.json").getPath() + getClass().getResource("/eu/dnetlib/dhp/patchrefereed/simpleTest/preparedInfo3.json").getPath() }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -361,7 +364,7 @@ public class PatchRefereedTest { verificationDataset.createOrReplaceTempView("dataset"); - Dataset tmp1 = spark.sql("select inst.refereed from dataset lateral view explode(instance) i as inst"); + Dataset tmp1 = spark.sql("select id, inst.refereed from dataset lateral view explode(instance) i as inst"); Assertions.assertEquals(4, tmp1.count()); @@ -387,4 +390,98 @@ public class PatchRefereedTest { .count()); } + + @Test + public void test10() throws Exception { + + SparkPatchRefereed + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-inputPath", + getClass().getResource("/eu/dnetlib/dhp/patchrefereed/test/match3/prod_publication.json").getPath(), + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", + "-outputPath", workingDir.toString() + "/publication", + "-preparedInfoPath", + getClass().getResource("/eu/dnetlib/dhp/patchrefereed/test/match3/preparedInfo.json").getPath() + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/publication") + .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); + + Assertions.assertEquals(4, tmp.count()); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Publication.class)); + + verificationDataset.createOrReplaceTempView("dataset"); + + Dataset tmp1 = spark.sql("select id, inst.refereed from dataset lateral view explode(instance) i as inst"); + + Assertions.assertEquals(4, tmp1.count()); + + tmp1.show(false); + + Assertions.assertEquals(2, tmp1.filter("refereed.classname = 'Unknown' and refereed.classid ='0000'").count()); + + Assertions + .assertEquals( + 2, tmp1.filter("refereed.classname = 'nonPeerReviewed' and refereed.classid ='0002'").count()); + + Assertions + .assertEquals( + 1, tmp1 + .filter( + "id = '50|core_ac_uk__::05d41a3ffee162ae2561befeb718ccf7' and " + + "refereed.classname = 'Unknown' and refereed.classid ='0000' ") + .count()); + + Assertions + .assertEquals( + 1, tmp1 + .filter( + "id = '50|core_ac_uk__::b32ca62afc4299e1f43515bc3c33b58f' and " + + "refereed.classname = 'Unknown' and refereed.classid ='0000'") + .count()); + + Assertions + .assertEquals( + 1, tmp1 + .filter( + "id = '50|core_ac_uk__::127d5c16ad41fe6cf2d196abbae5596e' and " + + "refereed.classname = 'nonPeerReviewed' and refereed.classid ='0002' ") + .count()); + + Assertions + .assertEquals( + 1, tmp1 + .filter( + "id = '50|core_ac_uk__::4810ff7dc1a3db8c2568d5fe7245e067' and " + + "refereed.classname = 'nonPeerReviewed' and refereed.classid ='0002'") + .count()); + + } + + @Test + public void remove() { + KeyValue cf1 = new KeyValue(); + KeyValue cf2 = new KeyValue(); + + cf1.setKey("10|openaire____::0a836ef43dcb67bb7cbd4dd509b11b73"); + cf1.setValue("CORE (RIOXX-UK Aggregator)"); + + cf2.setKey("10|openaire____::0a836ef43dcb67bb7cbd4dd509b11b73"); + cf2.setValue("CORE (RIOXX-UK Aggregator)"); + + System.out.println(cf1.equals(cf2)); + + List url1 = Arrays.asList("http://wrap.warwick.ac.uk/66746/1/WRAP_THESIS_Ritchie_2014.pdf"); + + List url2 = Arrays.asList("http://wrap.warwick.ac.uk/66746/1/WRAP_THESIS_Ritchie_2014.pdf"); + + System.out.println(SparkPatchRefereed.equals(url1, url2)); + } } diff --git a/dhp-workflows/dhp-patch/src/test/java/eu/dnetlib/dhp/patchrefereed/PrepareInfoTest.java b/dhp-workflows/dhp-patch/src/test/java/eu/dnetlib/dhp/patchrefereed/PrepareInfoTest.java index f002595ba2..11e096287a 100644 --- a/dhp-workflows/dhp-patch/src/test/java/eu/dnetlib/dhp/patchrefereed/PrepareInfoTest.java +++ b/dhp-workflows/dhp-patch/src/test/java/eu/dnetlib/dhp/patchrefereed/PrepareInfoTest.java @@ -75,7 +75,7 @@ public class PrepareInfoTest { .textFile(workingDir.toString() + "/resultInstance") .map(item -> OBJECT_MAPPER.readValue(item, ResultInstance.class)); - Assertions.assertEquals(1, tmp.count()); + Assertions.assertEquals(0, tmp.count()); } @@ -97,7 +97,7 @@ public class PrepareInfoTest { .textFile(workingDir.toString() + "/resultInstance") .map(item -> OBJECT_MAPPER.readValue(item, ResultInstance.class)); - Assertions.assertEquals(1, tmp.count()); + Assertions.assertEquals(0, tmp.count()); } @Test @@ -118,7 +118,28 @@ public class PrepareInfoTest { .textFile(workingDir.toString() + "/resultInstance") .map(item -> OBJECT_MAPPER.readValue(item, ResultInstance.class)); - Assertions.assertEquals(3, tmp.count()); + Assertions.assertEquals(2, tmp.count()); + } + + @Test + public void test4() throws Exception { + SparkPrepareResultInstanceList + .main( + new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-inputPath", + getClass().getResource("/eu/dnetlib/dhp/patchrefereed/prepareinfo/publication3.json").getPath(), + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", + "-outputPath", workingDir.toString() + "/resultInstance" + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/resultInstance") + .map(item -> OBJECT_MAPPER.readValue(item, ResultInstance.class)); + + Assertions.assertEquals(2, tmp.count()); } }