changed in the testing classes

This commit is contained in:
Miriam Baglioni 2020-06-18 11:20:08 +02:00
parent bc8611a95a
commit 5c8533d1a1
4 changed files with 206 additions and 57 deletions

View File

@ -151,10 +151,9 @@ public class DumpJobTest {
SparkDumpCommunityProducts.main(new String[] {
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/dataset",
"-outputPath", workingDir.toString() + "/result",
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-dumpTableName", "eu.dnetlib.dhp.schema.dump.oaf.Dataset",
"-communityMap", new Gson().toJson(map)
});
@ -162,12 +161,12 @@ public class DumpJobTest {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Dataset> tmp = sc
.textFile(workingDir.toString() + "/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Dataset.class));
JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Result> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class));
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.Dataset> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Dataset.class));
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.Result> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class));
Assertions.assertEquals(90, verificationDataset.count());
// verificationDataset.show(false);
@ -198,7 +197,9 @@ public class DumpJobTest {
Assertions.assertTrue(verificationDataset.filter("size(context) > 0").count() == 90);
verificationDataset.select("instance.type").show(false);
Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90);
// verificationDataset.select("instance.type").show(false);
//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
@ -214,10 +215,9 @@ public class DumpJobTest {
SparkDumpCommunityProducts.main(new String[] {
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/publication",
"-outputPath", workingDir.toString() + "/result",
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-dumpTableName", "eu.dnetlib.dhp.schema.dump.oaf.Publication",
"-communityMap", new Gson().toJson(map)
});
@ -225,16 +225,18 @@ public class DumpJobTest {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Publication> tmp = sc
.textFile(workingDir.toString() + "/publication")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Publication.class));
JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Result> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class));
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.Publication> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Publication.class));
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.Result> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class));
Assertions.assertEquals(76, verificationDataset.count());
verificationDataset.show(false);
Assertions.assertEquals(76, verificationDataset.filter("type = 'publication'").count());
//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
}
@ -249,10 +251,9 @@ public class DumpJobTest {
SparkDumpCommunityProducts.main(new String[] {
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/software",
"-outputPath", workingDir.toString() + "/result",
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-dumpTableName", "eu.dnetlib.dhp.schema.dump.oaf.Software",
"-communityMap", new Gson().toJson(map)
});
@ -260,14 +261,16 @@ public class DumpJobTest {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Software> tmp = sc
.textFile(workingDir.toString() + "/software")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Software.class));
JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Result> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class));
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.Software> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Software.class));
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.Result> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class));
Assertions.assertEquals(6, verificationDataset.count());
Assertions.assertEquals(6, verificationDataset.filter("type = 'software'").count());
verificationDataset.show(false);
//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
@ -284,10 +287,9 @@ public class DumpJobTest {
SparkDumpCommunityProducts.main(new String[] {
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/orp",
"-outputPath", workingDir.toString() + "/result",
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-dumpTableName", "eu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct",
"-communityMap", new Gson().toJson(map)
});
@ -295,14 +297,16 @@ public class DumpJobTest {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct> tmp = sc
.textFile(workingDir.toString() + "/orp")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct.class));
JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Result> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class));
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.OtherResearchProduct.class));
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.Result> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class));
Assertions.assertEquals(3, verificationDataset.count());
Assertions.assertEquals(3, verificationDataset.filter("type = 'other'").count());
verificationDataset.show(false);
//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)

View File

@ -24,6 +24,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.schema.dump.oaf.Result;
import eu.dnetlib.dhp.schema.dump.oaf.Software;
public class SplitForCommunityTest {
@ -62,7 +63,7 @@ public class SplitForCommunityTest {
map.put("dh-ch", "Digital Humanities and Cultural Heritage");
map.put("science-innovation-policy", "Science and Innovation Policy Studies");
map.put("covid-19", "COVID-19");
map.put("enrmaps", "Energy Research");
map.put("enermaps", "Energy Research");
map.put("epos", "EPOS");
}
@ -149,28 +150,27 @@ public class SplitForCommunityTest {
public void test1() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity/software")
.getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity")
.getPath();
SparkSplitForCommunity.main(new String[] {
SparkSplitForCommunity2.main(new String[] {
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/split",
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.dump.oaf.Software",
"-communityMap", new Gson().toJson(map)
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Software> tmp = sc
JavaRDD<Result> tmp = sc
.textFile(workingDir.toString() + "/split/dh-ch")
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
org.apache.spark.sql.Dataset<Software> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Software.class));
org.apache.spark.sql.Dataset<Result> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Result.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions.assertEquals(19, verificationDataset.count());
Assertions
.assertEquals(
@ -178,10 +178,10 @@ public class SplitForCommunityTest {
tmp = sc
.textFile(workingDir.toString() + "/split/egi")
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Software.class));
.createDataset(tmp.rdd(), Encoders.bean(Result.class));
Assertions.assertEquals(1, verificationDataset.count());
@ -191,12 +191,12 @@ public class SplitForCommunityTest {
tmp = sc
.textFile(workingDir.toString() + "/split/ni")
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Software.class));
.createDataset(tmp.rdd(), Encoders.bean(Result.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions.assertEquals(5, verificationDataset.count());
Assertions
.assertEquals(
@ -204,12 +204,12 @@ public class SplitForCommunityTest {
tmp = sc
.textFile(workingDir.toString() + "/split/science-innovation-policy")
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Software.class));
.createDataset(tmp.rdd(), Encoders.bean(Result.class));
Assertions.assertEquals(4, verificationDataset.count());
Assertions.assertEquals(5, verificationDataset.count());
Assertions
.assertEquals(
@ -224,5 +224,119 @@ public class SplitForCommunityTest {
.assertEquals(
1, verificationDataset.filter("id = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count());
tmp = sc
.textFile(workingDir.toString() + "/split/fet-fp7")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/fet-h2020")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/clarin")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/rda")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/ee")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/fam")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/mes")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/instruct")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/elixir-gr")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/aginfra")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/dariah")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/risis")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/epos")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/beopen")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/euromarine")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/ifremer")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/oa-pg")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/covid-19")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
tmp = sc
.textFile(workingDir.toString() + "/split/enermaps")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(0, tmp.count());
}
}

View File

@ -23,6 +23,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.dump.oaf.Result;
import eu.dnetlib.dhp.schema.dump.oaf.Software;
public class UpdateProjectInfoTest {
@ -75,20 +76,19 @@ public class UpdateProjectInfoTest {
SparkUpdateProjectInfo.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-resultTableName", "eu.dnetlib.dhp.schema.dump.oaf.Software",
"-preparedInfoPath", sourcePath + "/preparedInfo",
"-outputPath", workingDir.toString() + "/ext/software",
"-sourcePath", sourcePath + "/software"
"-outputPath", workingDir.toString() + "/result",
"-sourcePath", sourcePath + "/software.json"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Software> tmp = sc
.textFile(workingDir.toString() + "/ext/software")
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
JavaRDD<Result> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
org.apache.spark.sql.Dataset<Software> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Software.class));
org.apache.spark.sql.Dataset<Result> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Result.class));
verificationDataset.show(false);

View File

@ -3,14 +3,45 @@ package eu.dnetlib.dhp.oa.graph.dump;
import java.io.IOException;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
public class TestZenodoConnection {
public class ZenodoUploadTest {
@Test
public void test1() throws IOException {
public void testConnection() throws IOException {
APIClient s = new APIClient(
"https://sandbox.zenodo.org/api/deposit/depositions?access_token=5ImUj0VC1ICg4ifK5dc3AGzJhcfAB4osxrFlsr8WxHXxjaYgCE0hY8HZcDoe");
// "https://sandbox.zenodo.org/api/deposit/depositions?access_token=5ImUj0VC1ICg4ifK5dc3AGzJhcfAB4osxrFlsr8WxHXxjaYgCE0hY8HZcDoe");
"https://sandbox.zenodo.org/api/deposit/depositions");
Assertions.assertEquals(201, s.connect());
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/ni")
.getPath();
s.upload(sourcePath, "Neuroinformatics");
}
//
// @Test
// public void testGet() throws IOException {
// APIClient s = new APIClient("https://sandbox.zenodo.org/api/deposit/depositions");
//
// s.get();
// }
@Test
public void testUpload() throws IOException {
APIClient s = new APIClient("https://sandbox.zenodo.org/api/deposit/depositions?access_token=5ImUj0VC1ICg4ifK5dc3AGzJhcfAB4osxrFlsr8WxHXxjaYgCE0hY8HZcDoe");
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/ni")
.getPath();
s.upload(sourcePath, "Neuroinformatics");
}
}