From 5c8533d1a1de954f0da3f2ffbbb5a7bac78efd7b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 18 Jun 2020 11:20:08 +0200 Subject: [PATCH] changed in the testing classes --- .../dhp/oa/graph/dump/DumpJobTest.java | 62 ++++---- .../oa/graph/dump/SplitForCommunityTest.java | 148 ++++++++++++++++-- .../oa/graph/dump/UpdateProjectInfoTest.java | 16 +- .../dhp/oa/graph/dump/ZenodoUploadTest.java | 37 ++++- 4 files changed, 206 insertions(+), 57 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 73af66f85..eea4e9414 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -151,10 +151,9 @@ public class DumpJobTest { SparkDumpCommunityProducts.main(new String[] { "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/dataset", + "-outputPath", workingDir.toString() + "/result", "-sourcePath", sourcePath, "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-dumpTableName", "eu.dnetlib.dhp.schema.dump.oaf.Dataset", "-communityMap", new Gson().toJson(map) }); @@ -162,12 +161,12 @@ public class DumpJobTest { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/dataset") - .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Dataset.class)); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class)); - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Dataset.class)); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class)); Assertions.assertEquals(90, verificationDataset.count()); // verificationDataset.show(false); @@ -198,7 +197,9 @@ public class DumpJobTest { Assertions.assertTrue(verificationDataset.filter("size(context) > 0").count() == 90); - verificationDataset.select("instance.type").show(false); + Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90); + + // verificationDataset.select("instance.type").show(false); //TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) @@ -214,10 +215,9 @@ public class DumpJobTest { SparkDumpCommunityProducts.main(new String[] { "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/publication", + "-outputPath", workingDir.toString() + "/result", "-sourcePath", sourcePath, "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", - "-dumpTableName", "eu.dnetlib.dhp.schema.dump.oaf.Publication", "-communityMap", new Gson().toJson(map) }); @@ -225,16 +225,18 @@ public class DumpJobTest { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/publication") - .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Publication.class)); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class)); - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Publication.class)); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class)); Assertions.assertEquals(76, verificationDataset.count()); verificationDataset.show(false); + Assertions.assertEquals(76, verificationDataset.filter("type = 'publication'").count()); + //TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) } @@ -249,10 +251,9 @@ public class DumpJobTest { SparkDumpCommunityProducts.main(new String[] { "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/software", + "-outputPath", workingDir.toString() + "/result", "-sourcePath", sourcePath, "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software", - "-dumpTableName", "eu.dnetlib.dhp.schema.dump.oaf.Software", "-communityMap", new Gson().toJson(map) }); @@ -260,14 +261,16 @@ public class DumpJobTest { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/software") - .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Software.class)); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class)); - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Software.class)); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class)); Assertions.assertEquals(6, verificationDataset.count()); + + Assertions.assertEquals(6, verificationDataset.filter("type = 'software'").count()); verificationDataset.show(false); //TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) @@ -284,10 +287,9 @@ public class DumpJobTest { SparkDumpCommunityProducts.main(new String[] { "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/orp", + "-outputPath", workingDir.toString() + "/result", "-sourcePath", sourcePath, "-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct", - "-dumpTableName", "eu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct", "-communityMap", new Gson().toJson(map) }); @@ -295,14 +297,16 @@ public class DumpJobTest { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/orp") - .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct.class)); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class)); - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct.class)); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class)); Assertions.assertEquals(3, verificationDataset.count()); + + Assertions.assertEquals(3, verificationDataset.filter("type = 'other'").count()); verificationDataset.show(false); //TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java index 4d4608889..82556b488 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java @@ -24,6 +24,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; +import eu.dnetlib.dhp.schema.dump.oaf.Result; import eu.dnetlib.dhp.schema.dump.oaf.Software; public class SplitForCommunityTest { @@ -62,7 +63,7 @@ public class SplitForCommunityTest { map.put("dh-ch", "Digital Humanities and Cultural Heritage"); map.put("science-innovation-policy", "Science and Innovation Policy Studies"); map.put("covid-19", "COVID-19"); - map.put("enrmaps", "Energy Research"); + map.put("enermaps", "Energy Research"); map.put("epos", "EPOS"); } @@ -149,28 +150,27 @@ public class SplitForCommunityTest { public void test1() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity/software") + .getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity") .getPath(); - SparkSplitForCommunity.main(new String[] { + SparkSplitForCommunity2.main(new String[] { "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, "-isSparkSessionManaged", Boolean.FALSE.toString(), "-outputPath", workingDir.toString() + "/split", "-sourcePath", sourcePath, - "-resultTableName", "eu.dnetlib.dhp.schema.dump.oaf.Software", "-communityMap", new Gson().toJson(map) }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc + JavaRDD tmp = sc .textFile(workingDir.toString() + "/split/dh-ch") - .map(item -> OBJECT_MAPPER.readValue(item, Software.class)); + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Software.class)); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Result.class)); - Assertions.assertEquals(1, verificationDataset.count()); + Assertions.assertEquals(19, verificationDataset.count()); Assertions .assertEquals( @@ -178,10 +178,10 @@ public class SplitForCommunityTest { tmp = sc .textFile(workingDir.toString() + "/split/egi") - .map(item -> OBJECT_MAPPER.readValue(item, Software.class)); + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Software.class)); + .createDataset(tmp.rdd(), Encoders.bean(Result.class)); Assertions.assertEquals(1, verificationDataset.count()); @@ -191,12 +191,12 @@ public class SplitForCommunityTest { tmp = sc .textFile(workingDir.toString() + "/split/ni") - .map(item -> OBJECT_MAPPER.readValue(item, Software.class)); + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Software.class)); + .createDataset(tmp.rdd(), Encoders.bean(Result.class)); - Assertions.assertEquals(1, verificationDataset.count()); + Assertions.assertEquals(5, verificationDataset.count()); Assertions .assertEquals( @@ -204,12 +204,12 @@ public class SplitForCommunityTest { tmp = sc .textFile(workingDir.toString() + "/split/science-innovation-policy") - .map(item -> OBJECT_MAPPER.readValue(item, Software.class)); + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Software.class)); + .createDataset(tmp.rdd(), Encoders.bean(Result.class)); - Assertions.assertEquals(4, verificationDataset.count()); + Assertions.assertEquals(5, verificationDataset.count()); Assertions .assertEquals( @@ -224,5 +224,119 @@ public class SplitForCommunityTest { .assertEquals( 1, verificationDataset.filter("id = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); + tmp = sc + .textFile(workingDir.toString() + "/split/fet-fp7") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/fet-h2020") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/clarin") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/rda") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/ee") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/fam") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/mes") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/instruct") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/elixir-gr") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/aginfra") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/dariah") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/risis") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/epos") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/beopen") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/euromarine") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/ifremer") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/oa-pg") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/covid-19") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + + tmp = sc + .textFile(workingDir.toString() + "/split/enermaps") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); + + Assertions.assertEquals(0, tmp.count()); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java index a7599c09f..4232e1c8c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java @@ -23,6 +23,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.schema.dump.oaf.Result; import eu.dnetlib.dhp.schema.dump.oaf.Software; public class UpdateProjectInfoTest { @@ -75,20 +76,19 @@ public class UpdateProjectInfoTest { SparkUpdateProjectInfo.main(new String[] { "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-resultTableName", "eu.dnetlib.dhp.schema.dump.oaf.Software", "-preparedInfoPath", sourcePath + "/preparedInfo", - "-outputPath", workingDir.toString() + "/ext/software", - "-sourcePath", sourcePath + "/software" + "-outputPath", workingDir.toString() + "/result", + "-sourcePath", sourcePath + "/software.json" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/ext/software") - .map(item -> OBJECT_MAPPER.readValue(item, Software.class)); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/result") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Software.class)); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Result.class)); verificationDataset.show(false); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java index ca3f91c59..e8f2085c9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java @@ -3,14 +3,45 @@ package eu.dnetlib.dhp.oa.graph.dump; import java.io.IOException; +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -public class TestZenodoConnection { +public class ZenodoUploadTest { @Test - public void test1() throws IOException { + public void testConnection() throws IOException { APIClient s = new APIClient( - "https://sandbox.zenodo.org/api/deposit/depositions?access_token=5ImUj0VC1ICg4ifK5dc3AGzJhcfAB4osxrFlsr8WxHXxjaYgCE0hY8HZcDoe"); + // "https://sandbox.zenodo.org/api/deposit/depositions?access_token=5ImUj0VC1ICg4ifK5dc3AGzJhcfAB4osxrFlsr8WxHXxjaYgCE0hY8HZcDoe"); + "https://sandbox.zenodo.org/api/deposit/depositions"); + + Assertions.assertEquals(201, s.connect()); + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/ni") + .getPath(); + + s.upload(sourcePath, "Neuroinformatics"); + + } +// +// @Test +// public void testGet() throws IOException { +// APIClient s = new APIClient("https://sandbox.zenodo.org/api/deposit/depositions"); +// +// s.get(); +// } + + @Test + public void testUpload() throws IOException { + + APIClient s = new APIClient("https://sandbox.zenodo.org/api/deposit/depositions?access_token=5ImUj0VC1ICg4ifK5dc3AGzJhcfAB4osxrFlsr8WxHXxjaYgCE0hY8HZcDoe"); + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/ni") + .getPath(); + + s.upload(sourcePath, "Neuroinformatics"); + } }