From a90bac3bc92d44f1628dd309ee5e7404b4b06e09 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 16:36:54 +0200 Subject: [PATCH] Graph Dump - added method to test class to verify addition of validation date in projects for community result --- .../oa/graph/dump/UpdateProjectInfoTest.java | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java index 20a46cee08..fd34433c6e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java @@ -5,11 +5,17 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; +import java.util.logging.Filter; +import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.dump.oaf.community.Funder; +import eu.dnetlib.dhp.schema.dump.oaf.community.Project; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; @@ -135,4 +141,85 @@ public class UpdateProjectInfoTest { resultExplodedProvenance.show(false); } + @Test + public void testValidatedRelation() throws Exception{ + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo") + .getPath(); + + SparkUpdateProjectInfo.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-preparedInfoPath", sourcePath + "/preparedInfoValidated", + "-outputPath", workingDir.toString() + "/result", + "-sourcePath", sourcePath + "/publication_extendedmodel" + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); + + verificationDataset.show(false); + + Assertions.assertEquals(2, verificationDataset.count()); + verificationDataset.createOrReplaceTempView("dataset"); + + String query = "select id, MyT.code code, MyT.title title, MyT.funder.name funderName, MyT.funder.shortName funderShortName, " + + + "MyT.funder.jurisdiction funderJurisdiction, MyT.funder.fundingStream fundingStream, MyT.validated " + + "from dataset " + + "lateral view explode(projects) p as MyT "; + + org.apache.spark.sql.Dataset resultExplodedProvenance = spark.sql(query); + + Assertions.assertEquals(2, resultExplodedProvenance.count()); + resultExplodedProvenance.show(false); + + Assertions + .assertEquals( + 2, + resultExplodedProvenance.filter("id = '50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2'").count()); + + Assertions + .assertEquals( + 1, + resultExplodedProvenance + .filter("id = '50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2' and code = '123455'") + .count()); + + Assertions + .assertEquals( + 1, + resultExplodedProvenance + .filter("id = '50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2' and code = '119027'") + .count()); + + Project project = verificationDataset + .map((MapFunction) cr -> cr.getProjects().stream().filter(p -> p.getValidated() != null).collect(Collectors.toList()).get(0) + , Encoders.bean(Project.class)).first(); + + Assertions.assertTrue(project.getFunder().getName().equals("Academy of Finland")); + Assertions.assertTrue(project.getFunder().getShortName().equals("AKA")); + Assertions.assertTrue(project.getFunder().getJurisdiction().equals("FI")); + Assertions.assertTrue(project.getFunder().getFundingStream() == null); + Assertions.assertTrue(project.getValidated().getValidationDate().equals("2021-08-06")); + + + project = verificationDataset + .map((MapFunction) cr -> cr.getProjects().stream().filter(p -> p.getValidated() == null).collect(Collectors.toList()).get(0) + , Encoders.bean(Project.class)).first(); + + + Assertions.assertTrue(project.getFunder().getName().equals("European Commission")); + Assertions.assertTrue(project.getFunder().getShortName().equals("EC")); + Assertions.assertTrue(project.getFunder().getJurisdiction().equals("EU")); + Assertions.assertTrue(project.getFunder().getFundingStream().equals("H2020")); + + + } + }