This commit is contained in:
Miriam Baglioni 2020-12-14 13:34:12 +01:00
parent 818665451f
commit 5983f6ffc6
5 changed files with 36 additions and 4 deletions

View File

@ -211,13 +211,12 @@
<error to="Kill"/>
</action>
<join name="join_dump" to="End"/>
<join name="join_dump" to="populate_catalogue"/>
<action name="populate_catalogue">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.gcat.SendToCatalogue</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}</arg>
<!-- <arg>&#45;&#45;hdfsPath</arg><arg>${hdfsPath}</arg>-->
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--gcattoken</arg><arg>${gcattoken}</arg>
<arg>--gcatBaseUrl</arg><arg>${gcatBaseUrl}</arg>

View File

@ -94,4 +94,35 @@ public class DumpJobTest {
}
@Test
public void testDataset() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/dataset_for_dump.json")
.getPath();
SparkDumpRISISCatalogue.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/result",
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-communityName", "science-innovation-policy"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry.class));
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.gcat.CatalogueEntry.class));
Assertions.assertEquals(2, verificationDataset.count());
verificationDataset.show(false);
// verificationDataset.select("instance.type").show(false);
}
}

View File

@ -63,7 +63,7 @@ public class GCatAPIClientTest {
// and '_'.
// You can validate your name using the regular expression : ^[a-z0-9_\\-]{2,100}$
String json = IOUtils
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_test.json"));
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_dat_prova_20201130.json"));
System.out.println("Creating item...");
Assertions.assertEquals(HttpStatus.SC_CREATED, client.publish(json));
System.out.println("item created, now listing...");
@ -86,7 +86,7 @@ public class GCatAPIClientTest {
@Test
public void bulkPublishDATS() throws IOException, URISyntaxException {
BufferedReader reader = new BufferedReader(new FileReader(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/gcat_dat.json")
.getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/dats_20201126")
.getPath()));
String line;

File diff suppressed because one or more lines are too long