From 5fb58362c58a5d77723f283142a90da924ac5a45 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 4 Aug 2023 17:18:15 +0200 Subject: [PATCH] moved parameter file. Added 40| as prefix on projects for computing the delta --- .../dhp/oa/graph/dump/complete/QueryInformationSystem.java | 5 ++++- .../graph/dump/projectssubset/ProjectsSubsetSparkJob.java | 6 +++++- .../{common => oa/graph/dump}/input_maketar_parameters.json | 0 3 files changed, 9 insertions(+), 2 deletions(-) rename dump/src/main/resources/eu/dnetlib/dhp/{common => oa/graph/dump}/input_maketar_parameters.json (100%) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java index d78370a..262fe1d 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java @@ -176,7 +176,10 @@ public class QueryInformationSystem { for (Object node : el.selectNodes(".//param")) { Node n = (Node) node; if (n.valueOf("./@name").equals("openaireId")) { - return prefix + "|" + n.getText(); + String id = n.getText(); + if (id.startsWith(prefix + "|")) + return id; + return prefix + "|" + id; } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectsSubsetSparkJob.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectsSubsetSparkJob.java index fb94eba..7f83d3c 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectsSubsetSparkJob.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectsSubsetSparkJob.java @@ -58,7 +58,11 @@ public class ProjectsSubsetSparkJob implements Serializable { String projectListPath) { Dataset projectList = spark.read().textFile(projectListPath); Dataset projects; - projects = Utils.readPath(spark, inputPath, Project.class); + projects = Utils.readPath(spark, inputPath, Project.class) + .map((MapFunction) p -> { + p.setId("40|" + p.getId()); + return p; + }, Encoders.bean(Project.class)); projects .joinWith(projectList, projects.col("id").equalTo(projectList.col("value")), "left") .map((MapFunction, Project>) t2 -> { diff --git a/dump/src/main/resources/eu/dnetlib/dhp/common/input_maketar_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_maketar_parameters.json similarity index 100% rename from dump/src/main/resources/eu/dnetlib/dhp/common/input_maketar_parameters.json rename to dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_maketar_parameters.json