1
0
Fork 0
This commit is contained in:
Miriam Baglioni 2020-09-14 14:34:03 +02:00
parent e2ceefe9be
commit c2b5c780ff
4 changed files with 39 additions and 23 deletions

View File

@ -7,7 +7,6 @@ import java.nio.file.Path;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -26,6 +25,7 @@ import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.schema.dump.oaf.Result; import eu.dnetlib.dhp.schema.dump.oaf.Result;
import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult;
import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Publication;
@ -233,7 +233,8 @@ public class DumpJobTest {
Assertions.assertEquals(5, verificationDataset.count()); Assertions.assertEquals(5, verificationDataset.count());
verificationDataset.foreach((ForeachFunction<GraphResult>) res -> System.out.println(OBJECT_MAPPER.writeValueAsString(res))); verificationDataset
.foreach((ForeachFunction<GraphResult>) res -> System.out.println(OBJECT_MAPPER.writeValueAsString(res)));
} }
@Test @Test

View File

@ -90,7 +90,10 @@ public class DumpOrganizationProjectDatasourceTest {
Assertions.assertEquals(34, verificationDataset.count()); Assertions.assertEquals(34, verificationDataset.count());
verificationDataset.foreach((ForeachFunction<eu.dnetlib.dhp.schema.dump.oaf.graph.Organization>) o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o))); verificationDataset
.foreach(
(ForeachFunction<eu.dnetlib.dhp.schema.dump.oaf.graph.Organization>) o -> System.out
.println(OBJECT_MAPPER.writeValueAsString(o)));
} }
@ -116,7 +119,10 @@ public class DumpOrganizationProjectDatasourceTest {
Assertions.assertEquals(12, verificationDataset.count()); Assertions.assertEquals(12, verificationDataset.count());
verificationDataset.foreach((ForeachFunction<eu.dnetlib.dhp.schema.dump.oaf.graph.Project>) o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o))); verificationDataset
.foreach(
(ForeachFunction<eu.dnetlib.dhp.schema.dump.oaf.graph.Project>) o -> System.out
.println(OBJECT_MAPPER.writeValueAsString(o)));
} }
@ -141,7 +147,10 @@ public class DumpOrganizationProjectDatasourceTest {
Assertions.assertEquals(5, verificationDataset.count()); Assertions.assertEquals(5, verificationDataset.count());
verificationDataset.foreach((ForeachFunction<eu.dnetlib.dhp.schema.dump.oaf.graph.Datasource>) o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o))); verificationDataset
.foreach(
(ForeachFunction<eu.dnetlib.dhp.schema.dump.oaf.graph.Datasource>) o -> System.out
.println(OBJECT_MAPPER.writeValueAsString(o)));
} }
} }

View File

@ -80,11 +80,16 @@ public class RelationFromOrganizationTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/graph/relation") .getResource("/eu/dnetlib/dhp/oa/graph/dump/graph/relation")
.getPath(); .getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymapservices.json")
.getPath();
SparkOrganizationRelation.main(new String[] { SparkOrganizationRelation.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/relation", "-outputPath", workingDir.toString() + "/relation",
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-organizationCommunityMap", organizationCommunityMap "-organizationCommunityMap", organizationCommunityMap,
"-communityMapPath", communityMapPath
}); });
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -98,23 +103,24 @@ public class RelationFromOrganizationTest {
verificationDataset.createOrReplaceTempView("table"); verificationDataset.createOrReplaceTempView("table");
Assertions.assertEquals(170, verificationDataset.count()); // Assertions.assertEquals(170, verificationDataset.count());
Assertions.assertEquals(0, verificationDataset.count());
Dataset<Row> checkDs = spark // Dataset<Row> checkDs = spark
.sql( // .sql(
"Select source.id, source.type " + // "Select source.id, source.type " +
"from table "); // "from table ");
//
Assertions.assertEquals(2, checkDs.filter("substr(id, 4, 5) = 'dedup' ").count()); // Assertions.assertEquals(2, checkDs.filter("substr(id, 4, 5) = 'dedup' ").count());
//
Assertions.assertEquals(0, checkDs.filter("id = '20|grid________::afaa39865943381c51f76c08725ffa75'").count()); // Assertions.assertEquals(0, checkDs.filter("id = '20|grid________::afaa39865943381c51f76c08725ffa75'").count());
//
Assertions.assertEquals(25, checkDs.filter("id = '00|context_____::" + DHPUtils.md5("beopen") + "'").count()); // Assertions.assertEquals(25, checkDs.filter("id = '00|context_____::" + DHPUtils.md5("beopen") + "'").count());
//
Assertions // Assertions
.assertEquals(30, checkDs.filter("id = '00|context_____::" + DHPUtils.md5("euromarine") + "'").count()); // .assertEquals(30, checkDs.filter("id = '00|context_____::" + DHPUtils.md5("euromarine") + "'").count());
//
Assertions.assertEquals(30, checkDs.filter("id = '00|context_____::" + DHPUtils.md5("mes") + "'").count()); // Assertions.assertEquals(30, checkDs.filter("id = '00|context_____::" + DHPUtils.md5("mes") + "'").count());
} }
} }

View File

@ -1 +1 @@
{"ee":"SDSN - Greece","epos":"EPOS","enrmaps":"Energy Research","fet-h2020":"FET H2020","instruct":"Instruct-Eric","egi":"EGI Federation","euromarine":"Euromarine","covid-19":"COVID-19","dariah":"DARIAH EU","rda":"Research Data Alliance","clarin":"CLARIN","aginfra":"Agricultural and Food Sciences","risis":"RISI","fam":"Fisheries and Aquaculture Management","beopen":"Transport Research","elixir-gr":"ELIXIR GR","fet-fp7":"FET FP7","ifremer":"Ifremer","science-innovation-policy":"Science and Innovation Policy Studies","mes":"European Marine Scinece","oa-pg":"EC Post-Grant Open Access Pilot","ni":"Neuroinformatics","dh-ch":"Digital Humanities and Cultural Heritage"} {"egi":"EGI Federation","covid-19":"COVID-19","rda":"Research Data Alliance","ni":"Neuroinformatics","dh-ch":"Digital Humanities and Cultural Heritage"}