From 231ed85aa119d9e57a3eab4b6046cb055c59177e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 8 Jan 2024 11:59:36 +0100 Subject: [PATCH] - --- .../oa/graph/dump/eosc/SelectEoscResultsJobStep1.java | 11 +++++++++++ .../dhp/oa/graph/dump/countryresults/job.properties | 3 +++ 2 files changed, 14 insertions(+) create mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/job.properties diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java index d7f0ece..be34abd 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java @@ -78,6 +78,17 @@ public class SelectEoscResultsJobStep1 implements Serializable { String inputPath, String outputPath, Class inputClazz, String communityMapPath, String eoscDatasourceIdsPath) { +// final StructType structureSchema = new StructType() +// .add("eoscId", DataTypes.StringType) +// .add("graphId", DataTypes.StringType) +// .add("graphName", DataTypes.StringType); +// +// // .fromDDL("`graphId`: STRING, `eoscId`:STRING"); +// org.apache.spark.sql.Dataset df = spark +// .read() +// .schema(structureSchema) +// .json(eoscDatasourceIdsPath); + List df = Utils .readPath(spark, eoscDatasourceIdsPath, MasterDuplicate.class) .collectAsList(); diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/job.properties b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/job.properties new file mode 100644 index 0000000..b2eab7a --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/job.properties @@ -0,0 +1,3 @@ +sourcePath=/tmp/prod_provision/graph/20_graph_blacklisted +outputPath=/tmp/miriam/graph_dumps/country_PT +country=PT \ No newline at end of file