diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java index 057c248a10..19e9a67612 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java @@ -20,15 +20,14 @@ public class QueryInformationSystem { private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + - " and ($x//context/param[./@name = 'status']/text() = 'manager' or $x//context/param[./@name = 'status']/text() = 'all') " + + " and ($x//context/param[./@name = 'status']/text() = 'manager' or $x//context/param[./@name = 'status']/text() = 'all') " + + " return " + " " + "{$x//CONFIGURATION/context/@id}" + "{$x//CONFIGURATION/context/@label}" + ""; - - public CommunityMap getCommunityMap() throws ISLookUpException { return getMap(isLookUp.quickSearchProfile(XQUERY)); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkSplitForCommunity.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkSplitForCommunity.java index 8efeacdeea..a1f0cdfdf8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkSplitForCommunity.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkSplitForCommunity.java @@ -99,14 +99,15 @@ public class SparkSplitForCommunity implements Serializable { private static void printResult(String c, Dataset result, String outputPath) { Dataset community_products = result - .filter(r -> containsCommunity(r, c)); + .filter(r -> containsCommunity(r, c)); - if(community_products.count() > 0){ - community_products.repartition(1) - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath + "/" + c); + if (community_products.count() > 0) { + community_products + .repartition(1) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath + "/" + c); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml index 908770f148..0790b2b44b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml @@ -362,7 +362,7 @@ --outputPath${outputPath} --isLookUpUrl${isLookUpUrl} - + diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index d52be7056f..504090e89c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -63,8 +63,6 @@ public class DumpJobTest { } - - List communityMap = Arrays .asList( "", @@ -122,8 +120,6 @@ public class DumpJobTest { .getOrCreate(); } - - @AfterAll public static void afterAll() throws IOException { FileUtils.deleteDirectory(workingDir.toFile()); @@ -146,7 +142,6 @@ public class DumpJobTest { "-communityMap", new Gson().toJson(map) }); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc @@ -186,7 +181,6 @@ public class DumpJobTest { Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90); - //TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright) } @@ -275,7 +269,6 @@ public class DumpJobTest { "-communityMap", new Gson().toJson(map) }); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc @@ -294,7 +287,6 @@ public class DumpJobTest { } - @Test public void testRecord() throws Exception { final String sourcePath = getClass() diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java index 57089355c5..4817d04dfa 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java @@ -37,7 +37,6 @@ public class ZenodoUploadTest { .toString(); } - @Test public void HDFSConnection() throws IOException { CommunityMap communityMap = new CommunityMap(); @@ -58,7 +57,6 @@ public class ZenodoUploadTest { .getPath()), new Path(workingDir + "/zenodo/dh-ch/dh-ch")); - APIClient client = new APIClient(URL_STRING, ACCESS_TOKEN); client.connect(); @@ -88,7 +86,6 @@ public class ZenodoUploadTest { } - String metadata = "{\"metadata\":{\"access_right\":\"open\",\"communities\":[{\"identifier\":\"openaire-research-graph\"}],\"creators\":[{\"affiliation\":\"CNR - ISTI\",\"name\":\"Manghi, Paolo\",\"orcid\":\"0000-0001-7291-3210\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Atzori, Claudio\",\"orcid\":\"0000-0001-9613-6639\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Bardi, Alessia\",\"orcid\":\"0000-0002-1112-1292\"},{\"affiliation\":\"ISTI - CNR\",\"name\":\"Baglioni, Miriam\",\"orcid\":\"0000-0002-2273-9004\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Shirrwagen, Jochen\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Dimitropoulos, Harry\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"La Bruzzo, Sandro\",\"orcid\":\"0000-0003-2855-1245\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Foufoulas, Ioannis\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Löhden, Aenne\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Bäcker, Amelie\",\"orcid\":\"0000-0001-6015-2063\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Mannocci, Andrea\",\"orcid\":\"0000-0002-5193-7851\"},{\"affiliation\":\"University of Warsaw\",\"name\":\"Horst, Marek\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Czerniak, Andreas\",\"orcid\":\"0000-0003-3883-4169\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Kiatropoulou, Katerina\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Kokogiannaki, Argiro\",\"orcid\":\"0000-0002-3880-0244\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"De Bonis, Michele\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Artini, Michele\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Ottonello, Enrico\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Lempesis, Antonis\"},{\"affiliation\":\"CERN\",\"name\":\"Ioannidis, Alexandros\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Summan, Friedrich\"}],\"description\":\"\\u003cp\\u003eThis dataset contains dumps of the OpenAIRE Research Graph containing metadata records relevant for the research communities and initiatives collaborating with OpenAIRE\\u003c/p\\u003e. \\u003cp\\u003eEach dataset is a zip containing a file with one json per line. Each json is compliant to the schema available at XXXX\\u003c/p\\u003e Note that the file that is offered is not a typical json file: each line contains a separate, self-contained json object. For more information please see http://jsonlines.org\",\"grants\":[{\"id\":\"777541\"},{\"id\":\"824091\"},{\"id\":\"824323\"}],\"keywords\":[\"Open Science\",\"Scholarly Communication\",\"Information Science\"],\"language\":\"eng\",\"license\":\"CC-BY-4.0\",\"title\":\"OpenAIRE Research Graph: Dumps for research communities and initiatives.\",\"upload_type\":\"dataset\",\"version\":\"1.0\"}}"; System.out.println(client.sendMretadata(metadata)); @@ -97,5 +94,4 @@ public class ZenodoUploadTest { } - }