test using communityMapPath instead of isLookUp

This commit is contained in:
Miriam Baglioni 2020-08-10 12:02:55 +02:00
parent fe88904df0
commit b8c26f656c
1 changed files with 271 additions and 271 deletions

View File

@ -28,7 +28,7 @@ import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.Software;
@Disabled //@Disabled
public class DumpJobTest { public class DumpJobTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@ -131,275 +131,275 @@ public class DumpJobTest {
spark.stop(); spark.stop();
} }
// @Test @Test
// public void testMap() { public void testMap() {
// System.out.println(new Gson().toJson(map)); System.out.println(new Gson().toJson(map));
// } }
//
// @Test @Test
// public void testDataset() { public void testDataset() {
//
// final String sourcePath = getClass() final String sourcePath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset.json") .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset.json")
// .getPath(); .getPath();
//
// final String communityMapPath = getClass() final String communityMapPath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
// .getPath(); .getPath();
//
// DumpProducts dump = new DumpProducts(); DumpProducts dump = new DumpProducts();
// dump dump
// .run( .run(
// // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
// false, sourcePath, workingDir.toString() + "/result", map, Dataset.class, false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
// CommunityResult.class, false); CommunityResult.class, false);
//
// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
//
// JavaRDD<CommunityResult> tmp = sc JavaRDD<CommunityResult> tmp = sc
// .textFile(workingDir.toString() + "/result") .textFile(workingDir.toString() + "/result")
// .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
//
// org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
// .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
//
// Assertions.assertEquals(90, verificationDataset.count()); Assertions.assertEquals(90, verificationDataset.count());
//
// Assertions Assertions
// .assertTrue( .assertTrue(
// verificationDataset.filter("bestAccessright.code = 'c_abf2'").count() == verificationDataset verificationDataset.filter("bestAccessright.code = 'c_abf2'").count() == verificationDataset
// .filter("bestAccessright.code = 'c_abf2' and bestAccessright.label = 'OPEN'") .filter("bestAccessright.code = 'c_abf2' and bestAccessright.label = 'OPEN'")
// .count()); .count());
//
// Assertions Assertions
// .assertTrue( .assertTrue(
// verificationDataset.filter("bestAccessright.code = 'c_16ec'").count() == verificationDataset verificationDataset.filter("bestAccessright.code = 'c_16ec'").count() == verificationDataset
// .filter("bestAccessright.code = 'c_16ec' and bestAccessright.label = 'RESTRICTED'") .filter("bestAccessright.code = 'c_16ec' and bestAccessright.label = 'RESTRICTED'")
// .count()); .count());
//
// Assertions Assertions
// .assertTrue( .assertTrue(
// verificationDataset.filter("bestAccessright.code = 'c_14cb'").count() == verificationDataset verificationDataset.filter("bestAccessright.code = 'c_14cb'").count() == verificationDataset
// .filter("bestAccessright.code = 'c_14cb' and bestAccessright.label = 'CLOSED'") .filter("bestAccessright.code = 'c_14cb' and bestAccessright.label = 'CLOSED'")
// .count()); .count());
//
// Assertions Assertions
// .assertTrue( .assertTrue(
// verificationDataset.filter("bestAccessright.code = 'c_f1cf'").count() == verificationDataset verificationDataset.filter("bestAccessright.code = 'c_f1cf'").count() == verificationDataset
// .filter("bestAccessright.code = 'c_f1cf' and bestAccessright.label = 'EMBARGO'") .filter("bestAccessright.code = 'c_f1cf' and bestAccessright.label = 'EMBARGO'")
// .count()); .count());
//
// Assertions.assertTrue(verificationDataset.filter("size(context) > 0").count() == 90); Assertions.assertTrue(verificationDataset.filter("size(context) > 0").count() == 90);
//
// Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90); Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90);
//
////TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) //TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
//
// } }
//
// @Test @Test
// public void testDataset2All() { public void testDataset2All() {
//
// final String sourcePath = getClass() final String sourcePath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned") .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned")
// .getPath(); .getPath();
//
// final String communityMapPath = getClass() final String communityMapPath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
// .getPath(); .getPath();
//
// DumpProducts dump = new DumpProducts(); DumpProducts dump = new DumpProducts();
// dump dump
// .run( .run(
// // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
// false, sourcePath, workingDir.toString() + "/result", map, Dataset.class, false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
// Result.class, true); Result.class, true);
//
// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
//
// JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Result> tmp = sc JavaRDD<eu.dnetlib.dhp.schema.dump.oaf.Result> tmp = sc
// .textFile(workingDir.toString() + "/result") .textFile(workingDir.toString() + "/result")
// .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class)); .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class));
//
// org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.Result> verificationDataset = spark org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.Result> verificationDataset = spark
// .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class)); .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class));
//
// Assertions.assertEquals(5, verificationDataset.count()); Assertions.assertEquals(5, verificationDataset.count());
//
// verificationDataset.show(false); verificationDataset.show(false);
// } }
//
// @Test @Test
// public void testDataset2Communities() { public void testDataset2Communities() {
//
// final String sourcePath = getClass() final String sourcePath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned") .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned")
// .getPath(); .getPath();
//
// final String communityMapPath = getClass() final String communityMapPath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
// .getPath(); .getPath();
//
// DumpProducts dump = new DumpProducts(); DumpProducts dump = new DumpProducts();
// dump dump
// .run( .run(
// // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
// false, sourcePath, workingDir.toString() + "/result", map, Dataset.class, false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
// CommunityResult.class, false); CommunityResult.class, false);
//
// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
//
// JavaRDD<CommunityResult> tmp = sc JavaRDD<CommunityResult> tmp = sc
// .textFile(workingDir.toString() + "/result") .textFile(workingDir.toString() + "/result")
// .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
//
// org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
// .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
//
// Assertions.assertEquals(0, verificationDataset.count()); Assertions.assertEquals(0, verificationDataset.count());
//
// verificationDataset.show(false); verificationDataset.show(false);
// } }
//
// @Test @Test
// public void testPublication() { public void testPublication() {
//
// final String sourcePath = getClass() final String sourcePath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication.json") .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication.json")
// .getPath(); .getPath();
//
// final String communityMapPath = getClass() final String communityMapPath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
// .getPath(); .getPath();
//
// DumpProducts dump = new DumpProducts(); DumpProducts dump = new DumpProducts();
// dump dump
// .run( .run(
// // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
// false, sourcePath, workingDir.toString() + "/result", map, Publication.class, false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
// CommunityResult.class, false); CommunityResult.class, false);
//
// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
//
// JavaRDD<CommunityResult> tmp = sc JavaRDD<CommunityResult> tmp = sc
// .textFile(workingDir.toString() + "/result") .textFile(workingDir.toString() + "/result")
// .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
//
// org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
// .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
//
// Assertions.assertEquals(74, verificationDataset.count()); Assertions.assertEquals(74, verificationDataset.count());
// verificationDataset.show(false); verificationDataset.show(false);
//
// Assertions.assertEquals(74, verificationDataset.filter("type = 'publication'").count()); Assertions.assertEquals(74, verificationDataset.filter("type = 'publication'").count());
//
////TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) //TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
//
// } }
//
// @Test @Test
// public void testSoftware() { public void testSoftware() {
//
// final String sourcePath = getClass() final String sourcePath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software.json") .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software.json")
// .getPath(); .getPath();
//
// final String communityMapPath = getClass() final String communityMapPath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
// .getPath(); .getPath();
//
// DumpProducts dump = new DumpProducts(); DumpProducts dump = new DumpProducts();
// dump dump
// .run( .run(
// // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class, // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class,
// false, sourcePath, workingDir.toString() + "/result", map, Software.class, false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class,
// CommunityResult.class, false); CommunityResult.class, false);
//
// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
//
// JavaRDD<CommunityResult> tmp = sc JavaRDD<CommunityResult> tmp = sc
// .textFile(workingDir.toString() + "/result") .textFile(workingDir.toString() + "/result")
// .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
//
// org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
// .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
//
// Assertions.assertEquals(6, verificationDataset.count()); Assertions.assertEquals(6, verificationDataset.count());
//
// Assertions.assertEquals(6, verificationDataset.filter("type = 'software'").count()); Assertions.assertEquals(6, verificationDataset.filter("type = 'software'").count());
// verificationDataset.show(false); verificationDataset.show(false);
//
////TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) //TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
//
// } }
//
// @Test @Test
// public void testORP() { public void testORP() {
//
// final String sourcePath = getClass() final String sourcePath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp.json") .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp.json")
// .getPath(); .getPath();
//
// final String communityMapPath = getClass() final String communityMapPath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
// .getPath(); .getPath();
//
// DumpProducts dump = new DumpProducts(); DumpProducts dump = new DumpProducts();
// dump dump
// .run( .run(
// // false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class, // false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class,
// false, sourcePath, workingDir.toString() + "/result", map, OtherResearchProduct.class, false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class,
// CommunityResult.class, false); CommunityResult.class, false);
//
// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
//
// JavaRDD<CommunityResult> tmp = sc JavaRDD<CommunityResult> tmp = sc
// .textFile(workingDir.toString() + "/result") .textFile(workingDir.toString() + "/result")
// .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
//
// org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
// .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
//
// Assertions.assertEquals(3, verificationDataset.count()); Assertions.assertEquals(3, verificationDataset.count());
//
// Assertions.assertEquals(3, verificationDataset.filter("type = 'other'").count()); Assertions.assertEquals(3, verificationDataset.filter("type = 'other'").count());
// verificationDataset.show(false); verificationDataset.show(false);
//
////TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) //TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
//
// } }
//
// @Test @Test
// public void testRecord() { public void testRecord() {
// final String sourcePath = getClass() final String sourcePath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json") .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json")
// .getPath(); .getPath();
//
// final String communityMapPath = getClass() final String communityMapPath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
// .getPath(); .getPath();
//
// DumpProducts dump = new DumpProducts(); DumpProducts dump = new DumpProducts();
// dump dump
// .run( .run(
// // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
// false, sourcePath, workingDir.toString() + "/result", map, Publication.class, false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
// CommunityResult.class, false); CommunityResult.class, false);
//
// final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
//
// JavaRDD<CommunityResult> tmp = sc JavaRDD<CommunityResult> tmp = sc
// .textFile(workingDir.toString() + "/result") .textFile(workingDir.toString() + "/result")
// .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
//
// org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
// .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
//
// Assertions.assertEquals(2, verificationDataset.count()); Assertions.assertEquals(2, verificationDataset.count());
// verificationDataset.show(false); verificationDataset.show(false);
//
// Assertions.assertEquals(2, verificationDataset.filter("type = 'publication'").count()); Assertions.assertEquals(2, verificationDataset.filter("type = 'publication'").count());
//
// } }
} }