From 4b01dc60e665d9cf596bbf890ab0fd1b943b898c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 14 Apr 2020 15:28:00 +0200 Subject: [PATCH] test unit for result to project propagation --- .../ProjectPropagationJobTest.java | 194 ++++++++++++++++++ .../sample/relation/relation_100.json.gz | Bin 0 -> 3639 bytes 2 files changed, 194 insertions(+) create mode 100644 dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java create mode 100644 dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/projecttoresult/sample/relation/relation_100.json.gz diff --git a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java b/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java new file mode 100644 index 000000000..54c7721a0 --- /dev/null +++ b/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java @@ -0,0 +1,194 @@ +package eu.dnetlib.dhp.projecttoresult; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.countrypropagation.CountryPropagationJobTest; +import eu.dnetlib.dhp.countrypropagation.SparkCountryPropagationJob2; +import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Software; +import org.apache.commons.io.FileUtils; +import org.apache.neethi.Assertion; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.Tuple2; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; + +public class ProjectPropagationJobTest { + + private static final Logger log = LoggerFactory.getLogger(ProjectPropagationJobTest.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static final ClassLoader cl = ProjectPropagationJobTest.class.getClassLoader(); + + private static SparkSession spark; + + private static Path workingDir; + + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files.createTempDirectory(ProjectPropagationJobTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(ProjectPropagationJobTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + + spark = SparkSession + .builder() + .appName(ProjectPropagationJobTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + /** + * There are no new relations to be added. All the possible relations have already been linked with the project in the graph + * @throws Exception + */ + @Test + public void NoUpdateTest() throws Exception { + + SparkResultToProjectThroughSemRelJob3.main(new String[]{ + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", getClass().getResource("/eu/dnetlib/dhp/projecttoresult/sample/relation").getPath(), + "-hive_metastore_uris", "", + "-writeUpdate", "false", + "-saveGraph", "true", + "-outputPath", workingDir.toString() + "/relation", + "-potentialUpdatePath", getClass().getResource("/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates").getPath(), + "-alreadyLinkedPath",getClass().getResource("/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked").getPath(), + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc.textFile(workingDir.toString()+"/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + Assertions.assertEquals(0, tmp.count()); + + + } + + /** + * All the possible updates will produce a new relation. No relations are already linked in the grpha + * @throws Exception + */ + @Test + public void UpdateTenTest() throws Exception{ + SparkResultToProjectThroughSemRelJob3.main(new String[]{ + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", getClass().getResource("/eu/dnetlib/dhp/projecttoresult/sample/relation").getPath(), + "-hive_metastore_uris", "", + "-writeUpdate", "false", + "-saveGraph", "true", + "-outputPath", workingDir.toString() + "/relation", + "-potentialUpdatePath", getClass().getResource("/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates").getPath(), + "-alreadyLinkedPath",getClass().getResource("/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked").getPath(), + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc.textFile(workingDir.toString()+"/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + //got 20 new relations because "produces" and "isProducedBy" are added + Assertions.assertEquals(20, tmp.count()); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + Assertions.assertEquals(10, verificationDs.filter("relClass = 'produces'").count()); + Assertions.assertEquals(10, verificationDs.filter("relClass = 'isProducedBy'").count()); + + Assertions.assertEquals(10, verificationDs.filter(r -> r.getSource().substring(0, 2). equals("50") && + r.getTarget().substring(0,2).equals("40") && + r.getRelClass().equals("isProducedBy")).count()); + Assertions.assertEquals(10, verificationDs.filter(r -> r.getSource().substring(0, 2). equals("40") && + r.getTarget().substring(0,2).equals("50") && + r.getRelClass().equals("produces")).count()); + + verificationDs.createOrReplaceTempView("temporary"); + + Assertions.assertEquals(20, spark.sql("Select * from temporary where datainfo.inferenceprovenance = 'propagation'").count()); + } + + /** + * One of the relations in the possible updates is already linked to the project in the graph. + * All the others are not. There will be 9 new associations leading to 18 new relations + * @throws Exception + */ + @Test + public void UpdateMixTest() throws Exception{ + SparkResultToProjectThroughSemRelJob3.main(new String[]{ + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", getClass().getResource("/eu/dnetlib/dhp/projecttoresult/sample/relation").getPath(), + "-hive_metastore_uris", "", + "-writeUpdate", "false", + "-saveGraph", "true", + "-outputPath", workingDir.toString() + "/relation", + "-potentialUpdatePath", getClass().getResource("/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates").getPath(), + "-alreadyLinkedPath",getClass().getResource("/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked").getPath(), + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc.textFile(workingDir.toString()+"/relation") + .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + +// JavaRDD tmp = sc.textFile("/tmp/relation") +// .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); + + + //got 20 new relations because "produces" and "isProducedBy" are added + Assertions.assertEquals(18, tmp.count()); + + Dataset verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class)); + + Assertions.assertEquals(9, verificationDs.filter("relClass = 'produces'").count()); + Assertions.assertEquals(9, verificationDs.filter("relClass = 'isProducedBy'").count()); + + Assertions.assertEquals(9, verificationDs.filter(r -> r.getSource().substring(0, 2). equals("50") && + r.getTarget().substring(0,2).equals("40") && + r.getRelClass().equals("isProducedBy")).count()); + Assertions.assertEquals(9, verificationDs.filter(r -> r.getSource().substring(0, 2). equals("40") && + r.getTarget().substring(0,2).equals("50") && + r.getRelClass().equals("produces")).count()); + + verificationDs.createOrReplaceTempView("temporary"); + + Assertions.assertEquals(18, spark.sql("Select * from temporary where datainfo.inferenceprovenance = 'propagation'").count()); + + } +} diff --git a/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/projecttoresult/sample/relation/relation_100.json.gz b/dhp-workflows/dhp-propagation/src/test/resources/eu/dnetlib/dhp/projecttoresult/sample/relation/relation_100.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..82a685ad26e1be86032ef82871fdff3058039741 GIT binary patch literal 3639 zcmX|@cQhON8^(i+qDJizMN3fxwaTs0XpOjbsjZh=yHsM-XvC^oHCiKTt5vP7EwPGF z5?jQMB8k08t@>&2J-DpY^S#Y3pZoM_ZWd z(=|BXypplZ9V^4h>SD{i{SEDu`{Un_h=s$iT2VFA@fb%?r8m5F%3-I$!H(bF=U@#U zzv5D~@mV0tVa-UOt??vr?pw^srgGJ1R6K@>QgyIkk0Da3ls{{(G0)EKkq!JX$7~+1RXIBD>k?YvID*=&eaqO!D~Hym zFZ+o3cG_)`u_{^R;4?=bhgre$HuUr~USN1jMRTMdcRn&(v6)24$~zCJA{HE@!YJ`` zhtou^!?#tR&Kpk)-A_KF@VCVKJSfg4PJwt)C}|hP7KM;ZpJz0WcMBOoIUiNKOe=5&~Wt) zL3G5*(iS~Vv0RY48PViXpahtPiMQGuA14=WR3MHXqv2GA5<6E$O=$o$l!5viV=noEfk>*=oRGz zK-86x3bW?R?$^Iw$fO%VQ(rnzzE__jO!4OGO6#t?QT`&lMNwKt(Z8W7z9Hq2Bm&W)9E0JBJf7i?d5{KmC7OdY4PHBX};ymK^#K}Yp726SdK;*MI_YU(@`D& zaYP4xyvll}aaQ{9^K`}R#w@+!6L^p^Yg8Nk=<)4Sb>0I%ReKGuef(%bKPtMJO@fM6 zZj*tU{<=QMqeOVbobIS=c}shdEmZ0cukK7n-OFj_i7mxCPehofL&GLoqMLJL?&SR% zqSRss{n4ZlkA~i6S77ds?joT^GcW{m?pvO~ti<+ipc*W9_JFxRraPJ`pMeyS9G5Bj z70z2dQp;$BATvFcIA;D~9SiRv8ikT$eZ-ERpLBkfoeQ&fFq^*_Ckg)WD%uce#W`Zd zk^aEa^=B~42+Gk9WV&yuz#U^DdFYK&wzpM9j@~kkm>j|q5UEMdGm+1YoQ#nZ5Yg$M z_k0pt3`O5@>y9^T`$;N*U#inf^oNgeAL0>|KGPI)Ml@GaDo%+(3%a8i$kP(?uoc|wF5)Cht0ULnO*qowF~HmpLu#M~0)E~BzhJwk>$$yN~Wt#8d#O$HKjehsXK~HXluw!3b zn?$6BR0yyhsK%;4cIx!+Z%K;MXx&n~H9b5P{!2`j4%LX3ZJB2kTv68(-Qg0zI5NJ?0wq(eMbYr+dmO&W%L ztaIq2yk-Md0|#aJiqroH&iZOL$@%Of2|iGt-8fVb)P4aOnTq~D8>G5k#L!-r_YJsjfVB`2ans%0+L8{-1P9enx z($c@t=TTdlLDM|Lf952KxZ7*i{gH9&q1U=PmCGTi`hGflS~IEk(^il4X4n)UWgc`pm6*lObo@q7xD@`HHlU{Cr!|@)!120%8FDUQ<8xW^VSV)* zeO?34y0BcZF~2$sz_z?Y)hJ{&TTOE@j@3irkwq|>kQ$3nN=*BRtm5vpIn7pByAaC_ z?TQ$;+d+af4Z3r%g*s0`k6=z6N_wKwe-vxV&$V48oORnE__?E!lW{8LrUrLkB&erk zm5%1gw-+RYv=4qy2+{gY;tSn`p-}XLmV&`y3l2M zzzWHc8O5(fz6u*9IqN3S)Uzb<(wi7(j1n(iCbb}ZQGRZ$7TE1_&6gp_%Z8=H@-rU| z#Y!Bt)@HBq6+&7MLPPEuDGoXF44a*EXijrm(FwQ>d)9SZ_+Ll&ROGw|W9kAG^0|=f z&=arO-jB2$77-k%=*q)=VGWuTlY*!0x|ixXU6xdtG@^6A=qLczxB2`3wR9P{Wz^6I zHCL;2@hhGWI0kcz_5#8lQjG1mB?Qpp;F6&?s(WUw-A0iQh4!XG7deAT;Ftw6pt^Gh zhBzEk_0Q$jw2e1o9wLtfZ`mnPG<(C#ctuss{+_#SDRrgs9-&bD8t#=+w{*C>1K$%y zb>Gt<;KcVIs#?E>WNf?R3nGr^M_4AAroPzhBwc)eQed_c|?7-dH5rKF}L+8^Mk1ln>4%g ze3i3}iRScK)U^d-H4i2JW!ur_k&t-1Ec-l!_9XHY*!rpyF%!FWXD4y1rn^$_nv*Ac zGM_efRZ0))puUyFyQPvo)d!Va;HOg1@=DLCw>s7wx;$Q~qheg}>=#Z1zU{i&o+;md zaT?6fw-;s6u=e;m7Aq^pnFO>Ols~K*Yf!oJ<~`=0<#efF9ci2$sDMK1Gm*l!Se5ka z4$5CGZAg_B(p9%N%E%LQDVe!%{*v1U^ipix1=lkV?t4@CR=Kplr}D8->2JANxV!x|#dOf$?!iFgv-Ba$+V!9m zl=xRKw;R4%y=U7Lt-RM0<~Nk@9AnrT7Ul4T9Z<7~U>fO6K^Mgd!j4mw@1P4*GU1BJ ziGi?@Wk}#h4QFmU)auCkJHSAWT(T zkEHmwlcV%*OVQ5Y%8wGs zu}EQz()XG|;+!Uj_V9;!`xKj7RgMh0jHvl|R*wkv3NydSYmtM@5dHrzznII_--Au+ zAEkZG$KbKf){yoXA1Yenk(JRGcMxQR&$ec2UibD$KZ9EJQkM5PHtGb5;UG@ zK(QCtJ;nwBe`_m@0Yy*jrgBDP0QslkYgd`+IfO1cQTRtfQYhiBm?qx%H zaiyBg{IC za>DVe3UMWu6(-e)2@*EEs6#yfJ*zj9PE>(sbW6YD(3Vi0j`kx#_sDD!E>iZ1eDyt} zNnyO=UaI?E&CxF4W226OJv>*z*UnewUI6lW$6suw+3aF&{bX>(_3q`!J?~y?M&-dq taWxjm&MH`kc`@W6*68G-PkwopAJ(cI