Merge branch 'master' into clean_context_master

2022-07-13 15:59:08 +02:00 · 2022-07-13 15:59:08 +02:00 · 2f0d3dffe3
parent ca373979eb 446699c59d
commit 2f0d3dffe3
36 changed files with 997 additions and 557 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
@ -255,7 +255,8 @@ public class ZenodoAPIClient implements Serializable {

 	private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException {

-		ZenodoModelList zenodoModelList = new Gson().fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
+		ZenodoModelList zenodoModelList = new Gson()
+			.fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);

 		for (ZenodoModel zm : zenodoModelList) {
 			if (zm.getConceptrecid().equals(concept_rec_id)) {
@ -263,8 +264,9 @@ public class ZenodoAPIClient implements Serializable {
 				return;
 			}
 		}
-		if(zenodoModelList.size() == 0)
-			throw new MissingConceptDoiException("The concept record id specified was missing in the list of depositions");
+		if (zenodoModelList.size() == 0)
+			throw new MissingConceptDoiException(
+				"The concept record id specified was missing in the list of depositions");
 		setDepositionId(concept_rec_id, page + 1);

 	}
@ -278,11 +280,11 @@ public class ZenodoAPIClient implements Serializable {
 		String url = urlBuilder.build().toString();

 		Request request = new Request.Builder()
-				.url(url)
-				.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
-				.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
-				.get()
-				.build();
+			.url(url)
+			.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
+			.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
+			.get()
+			.build();

 		try (Response response = httpClient.newCall(request).execute()) {

@ -295,7 +297,6 @@ public class ZenodoAPIClient implements Serializable {

 	}

-
 	private String getBucket(String url) throws IOException {
 		OkHttpClient httpClient = new OkHttpClient.Builder()
 			.connectTimeout(600, TimeUnit.SECONDS)
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java
@ -142,7 +142,8 @@ class TransformationJobTest extends AbstractVocabularyTest {

 	@Test
 	@DisplayName("Test TransformSparkJobNode.main with oaiOpenaire_datacite (v4)")
-	void transformTestITGv4OAIdatacite(@TempDir final Path testDir) throws Exception {
+	void transformTestITGv4OAIdatacite(@TempDir
+	final Path testDir) throws Exception {

 		try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) {

@ -152,7 +153,9 @@ class TransformationJobTest extends AbstractVocabularyTest {
 				.getFile();
 			final String mdstore_output = testDir.toString() + "/version";

-			mockupTrasformationRule("simpleTRule", "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid.xsl");
+			mockupTrasformationRule(
+				"simpleTRule",
+				"/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid.xsl");

 			final Map<String, String> parameters = Stream.of(new String[][] {
 				{
@ -203,7 +206,8 @@ class TransformationJobTest extends AbstractVocabularyTest {

 	@Test
 	@DisplayName("Test TransformSparkJobNode.main")
-	void transformTest(@TempDir final Path testDir) throws Exception {
+	void transformTest(@TempDir
+	final Path testDir) throws Exception {

 		try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) {

--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
@ -103,7 +103,7 @@ public class SparkBulkTagJob {
 		ResultTagger resultTagger = new ResultTagger();
 		readPath(spark, inputPath, resultClazz)
 			.map(patchResult(), Encoders.bean(resultClazz))
-				.filter(Objects::nonNull)
+			.filter(Objects::nonNull)
 			.map(
 				(MapFunction<R, R>) value -> resultTagger
 					.enrichContextCriteria(
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java
@ -10,6 +10,7 @@ import java.util.stream.Collectors;

 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FilterFunction;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.api.java.function.MapGroupsFunction;
 import org.apache.spark.sql.Dataset;
@ -81,8 +82,9 @@ public class SparkPrepareResultProject implements Serializable {
 		Dataset<Relation> relation = Utils
 			.readPath(spark, inputPath + "/relation", Relation.class)
 			.filter(
-				"dataInfo.deletedbyinference = false and lower(relClass) = '"
-					+ ModelConstants.IS_PRODUCED_BY.toLowerCase() + "'");
+				(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
+					r.getRelClass().equalsIgnoreCase(ModelConstants.IS_PRODUCED_BY));
+
 		Dataset<eu.dnetlib.dhp.schema.oaf.Project> projects = Utils
 			.readPath(spark, inputPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class);

--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java
@ -7,17 +7,22 @@ import java.io.Serializable;
 import java.util.List;
 import java.util.Objects;
 import java.util.Optional;
+import java.util.stream.Collectors;

 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.ForeachFunction;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.*;
+import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.oa.graph.dump.Utils;
 import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
+import eu.dnetlib.dhp.schema.dump.oaf.community.Funder;
 import eu.dnetlib.dhp.schema.dump.oaf.community.Project;

 /**
@ -33,87 +38,83 @@ public class SparkDumpFunderResults implements Serializable {
 				SparkDumpFunderResults.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json"));
-
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);
-
 		Boolean isSparkSessionManaged = Optional
 			.ofNullable(parser.get("isSparkSessionManaged"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
-
 		final String inputPath = parser.get("sourcePath");
 		log.info("inputPath: {}", inputPath);
-
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);
-
-		final String graphPath = parser.get("graphPath");
-		log.info("relationPath: {}", graphPath);
-
 		SparkConf conf = new SparkConf();
-
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				Utils.removeOutputDir(spark, outputPath);
-				writeResultProjectList(spark, inputPath, outputPath, graphPath);
+				writeResultProjectList(spark, inputPath, outputPath);
 			});
 	}

-	private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath,
-		String graphPath) {
-
-		Dataset<eu.dnetlib.dhp.schema.oaf.Project> project = Utils
-			.readPath(spark, graphPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class);
-
+	private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath) {
 		Dataset<CommunityResult> result = Utils
 			.readPath(spark, inputPath + "/publication", CommunityResult.class)
 			.union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class))
-			.union(Utils.readPath(spark, inputPath + "/orp", CommunityResult.class))
+			.union(Utils.readPath(spark, inputPath + "/otherresearchproduct", CommunityResult.class))
 			.union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class));
-
-		List<String> funderList = project
-			.select("id")
-			.map((MapFunction<Row, String>) value -> value.getString(0).substring(0, 15), Encoders.STRING())
-			.distinct()
-			.collectAsList();
-
+		log.info("Number of result {}", result.count());
+		Dataset<String> tmp = result
+			.flatMap((FlatMapFunction<CommunityResult, String>) cr -> cr.getProjects().stream().map(p -> {
+				return getFunderName(p);
+			}).collect(Collectors.toList()).iterator(), Encoders.STRING())
+			.distinct();
+		List<String> funderList = tmp.collectAsList();
 		funderList.forEach(funder -> {
-			String fundernsp = funder.substring(3);
-			String funderdump;
-			if (fundernsp.startsWith("corda")) {
-				funderdump = "EC_";
-				if (fundernsp.endsWith("h2020")) {
-					funderdump += "H2020";
-				} else {
-					funderdump += "FP7";
-				}
-			} else {
-				funderdump = fundernsp.substring(0, fundernsp.indexOf("_")).toUpperCase();
-			}
-			writeFunderResult(funder, result, outputPath, funderdump);
+			dumpResults(funder, result, outputPath);
 		});
-
 	}

-	private static void dumpResults(String nsp, Dataset<CommunityResult> results, String outputPath,
-		String funderName) {
+	@NotNull
+	private static String getFunderName(Project p) {
+		Optional<Funder> ofunder = Optional.ofNullable(p.getFunder());
+		if (ofunder.isPresent()) {
+			String fName = ofunder.get().getShortName();
+			if (fName.equalsIgnoreCase("ec")) {
+				fName += "_" + ofunder.get().getFundingStream();
+			}
+			return fName;
+		} else {
+			String fName = p.getId().substring(3, p.getId().indexOf("_")).toUpperCase();
+			if (fName.equalsIgnoreCase("ec")) {
+				if (p.getId().contains("h2020")) {
+					fName += "_H2020";
+				} else {
+					fName += "_FP7";
+				}
+			} else if (fName.equalsIgnoreCase("conicytf")) {
+				fName = "CONICYT";
+			} else if (fName.equalsIgnoreCase("dfgf")) {
+				fName = "DFG";
+			} else if (fName.equalsIgnoreCase("tubitakf")) {
+				fName = "TUBITAK";
+			} else if (fName.equalsIgnoreCase("euenvagency")) {
+				fName = "EEA";
+			}
+			return fName;
+		}
+	}

+	private static void dumpResults(String funder, Dataset<CommunityResult> results, String outputPath) {
 		results.map((MapFunction<CommunityResult, CommunityResult>) r -> {
 			if (!Optional.ofNullable(r.getProjects()).isPresent()) {
 				return null;
 			}
 			for (Project p : r.getProjects()) {
-				if (p.getId().startsWith(nsp)) {
-					if (nsp.startsWith("40|irb")) {
-						if (p.getFunder().getShortName().equals(funderName))
-							return r;
-						else
-							return null;
-					}
+				String fName = getFunderName(p);
+				if (fName.equalsIgnoreCase(funder)) {
 					return r;
 				}
 			}
@ -123,18 +124,6 @@ public class SparkDumpFunderResults implements Serializable {
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
-			.json(outputPath + "/" + funderName);
+			.json(outputPath + "/" + funder);
 	}
-
-	private static void writeFunderResult(String funder, Dataset<CommunityResult> results, String outputPath,
-		String funderDump) {
-
-		if (funder.startsWith("40|irb")) {
-			dumpResults(funder, results, outputPath, "HRZZ");
-			dumpResults(funder, results, outputPath, "MZOS");
-		} else
-			dumpResults(funder, results, outputPath, funderDump);
-
-	}
-
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java
@ -5,9 +5,12 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;

 import java.io.Serializable;
 import java.util.Optional;
+import java.util.stream.Collectors;

 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FilterFunction;
+import org.apache.spark.api.java.function.FlatMapFunction;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.api.java.function.MapGroupsFunction;
 import org.apache.spark.sql.Dataset;
@ -18,11 +21,18 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.oa.graph.dump.Constants;
+import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
+import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
 import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
+import eu.dnetlib.dhp.oa.graph.dump.community.ResultProject;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
 import eu.dnetlib.dhp.schema.oaf.Project;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import eu.dnetlib.dhp.schema.oaf.Result;
+import scala.Tuple2;

 /**
 * Selects the results linked to projects. Only for these results the dump will be performed.
@ -58,8 +68,10 @@ public class SparkResultLinkedToProject implements Serializable {
 		final String resultClassName = parser.get("resultTableName");
 		log.info("resultTableName: {}", resultClassName);

-		final String graphPath = parser.get("graphPath");
-		log.info("graphPath: {}", graphPath);
+		final String resultProjectsPath = parser.get("graphPath");
+		log.info("graphPath: {}", resultProjectsPath);
+
+		String communityMapPath = parser.get("communityMapPath");

 		@SuppressWarnings("unchecked")
 		Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
@ -70,43 +82,33 @@ public class SparkResultLinkedToProject implements Serializable {
 			isSparkSessionManaged,
 			spark -> {
 				Utils.removeOutputDir(spark, outputPath);
-				writeResultsLinkedToProjects(spark, inputClazz, inputPath, outputPath, graphPath);
+				writeResultsLinkedToProjects(
+					communityMapPath, spark, inputClazz, inputPath, outputPath, resultProjectsPath);
 			});
 	}

-	private static <R extends Result> void writeResultsLinkedToProjects(SparkSession spark, Class<R> inputClazz,
-		String inputPath, String outputPath, String graphPath) {
+	private static <R extends Result> void writeResultsLinkedToProjects(String communityMapPath, SparkSession spark,
+		Class<R> inputClazz,
+		String inputPath, String outputPath, String resultProjectsPath) {

 		Dataset<R> results = Utils
 			.readPath(spark, inputPath, inputClazz)
-			.filter("dataInfo.deletedbyinference = false and datainfo.invisible = false");
-		Dataset<Relation> relations = Utils
-			.readPath(spark, graphPath + "/relation", Relation.class)
 			.filter(
-				"dataInfo.deletedbyinference = false and lower(relClass) = '"
-					+ ModelConstants.IS_PRODUCED_BY.toLowerCase() + "'");
-		Dataset<Project> project = Utils.readPath(spark, graphPath + "/project", Project.class);
-
-		results.createOrReplaceTempView("result");
-		relations.createOrReplaceTempView("relation");
-		project.createOrReplaceTempView("project");
-
-		Dataset<R> tmp = spark
-			.sql(
-				"Select res.* " +
-					"from relation rel " +
-					"join result res " +
-					"on rel.source = res.id " +
-					"join project p " +
-					"on rel.target = p.id " +
-					"")
-			.as(Encoders.bean(inputClazz));
-		tmp
-			.groupByKey(
-				(MapFunction<R, String>) value -> value
-					.getId(),
-				Encoders.STRING())
-			.mapGroups((MapGroupsFunction<String, R, R>) (k, it) -> it.next(), Encoders.bean(inputClazz))
+				(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() &&
+					!r.getDataInfo().getInvisible());
+		Dataset<ResultProject> resultProjectDataset = Utils
+			.readPath(spark, resultProjectsPath, ResultProject.class);
+		CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
+		results
+			.joinWith(resultProjectDataset, results.col("id").equalTo(resultProjectDataset.col("resultId")))
+			.map((MapFunction<Tuple2<R, ResultProject>, CommunityResult>) t2 -> {
+				CommunityResult cr = (CommunityResult) ResultMapper
+					.map(
+						t2._1(),
+						communityMap, Constants.DUMPTYPE.FUNDER.getType());
+				cr.setProjects(t2._2().getProjectsList());
+				return cr;
+			}, Encoders.bean(CommunityResult.class))
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectsSubsetSparkJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectsSubsetSparkJob.java
@ -0,0 +1,82 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.projectssubset;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.util.Objects;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.Project;
+import scala.Tuple2;
+
+public class ProjectsSubsetSparkJob implements Serializable {
+	private static final Logger log = LoggerFactory.getLogger(ProjectsSubsetSparkJob.class);
+
+	public static void main(String[] args) throws Exception {
+		String jsonConfiguration = IOUtils
+			.toString(
+				ProjectsSubsetSparkJob.class
+					.getResourceAsStream(
+						"/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json"));
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+		parser.parseArgument(args);
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+		final String inputPath = parser.get("sourcePath");
+		log.info("inputPath: {}", inputPath);
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath: {}", outputPath);
+		final String projectListPath = parser.get("projectListPath");
+		log.info("projectListPath: {}", projectListPath);
+		SparkConf conf = new SparkConf();
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> {
+				Utils.removeOutputDir(spark, outputPath);
+				getNewProjectList(spark, inputPath, outputPath, projectListPath);
+			});
+	}
+
+	private static void getNewProjectList(SparkSession spark, String inputPath, String outputPath,
+		String projectListPath) {
+		Dataset<String> projectList = spark.read().textFile(projectListPath);
+		Dataset<Project> projects;
+		projects = Utils.readPath(spark, inputPath, Project.class);
+		projects
+			.joinWith(projectList, projects.col("id").equalTo(projectList.col("value")), "left")
+			.map((MapFunction<Tuple2<Project, String>, Project>) t2 -> {
+				if (Optional.ofNullable(t2._2()).isPresent())
+					return null;
+				return t2._1();
+			}, Encoders.bean(Project.class))
+			.filter(Objects::nonNull)
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(outputPath);
+		Utils
+			.readPath(spark, outputPath, Project.class)
+			.map((MapFunction<Project, String>) p -> p.getId(), Encoders.STRING())
+			.write()
+			.mode(SaveMode.Append)
+			.option("compression", "gzip")
+			.text(projectListPath);
+	}
+}
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json
@ -17,10 +17,10 @@
 		"paramDescription": "true if the spark session is managed, false otherwise",
 		"paramRequired": false
 	},
-{
-	"paramName": "gp",
-	"paramLongName": "graphPath",
-	"paramDescription": "the relationPath",
-	"paramRequired": true
-}
+	{
+		"paramName": "gp",
+		"paramLongName": "graphPath",
+		"paramDescription": "the relationPath",
+		"paramRequired": false
+	}
 ]
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameter_select_relation.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameter_select_relation.json
@ -0,0 +1,20 @@
+[
+  {
+    "paramName":"s",
+    "paramLongName":"sourcePath",
+    "paramDescription": "the path of the sequencial file to read",
+    "paramRequired": true
+  },
+  {
+    "paramName": "out",
+    "paramLongName": "outputPath",
+    "paramDescription": "the path used to store temporary output files",
+    "paramRequired": true
+  },
+  {
+    "paramName": "ssm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "true if the spark session is managed, false otherwise",
+    "paramRequired": false
+  }
+]
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json
@ -28,6 +28,12 @@
 		"paramLongName":"graphPath",
 		"paramDescription": "the path to the relations",
 		"paramRequired": true
+	},
+	{
+		"paramName":"cmp",
+		"paramLongName":"communityMapPath",
+		"paramDescription": "the path to the relations",
+		"paramRequired": true
 	}
 ]

--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json
@ -0,0 +1,27 @@
+[
+  {
+    "paramName":"s",
+    "paramLongName":"sourcePath",
+    "paramDescription": "the path of the sequencial file to read",
+    "paramRequired": true
+  },
+  {
+    "paramName": "out",
+    "paramLongName": "outputPath",
+    "paramDescription": "the path used to store temporary output files",
+    "paramRequired": true
+  },
+  {
+    "paramName": "ssm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "true if the spark session is managed, false otherwise",
+    "paramRequired": false
+  },
+  {
+    "paramName": "pl",
+    "paramLongName": "projectListPath",
+    "paramDescription": "the path of the association result projectlist",
+    "paramRequired": true
+  }
+]
+
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app/config-default.xml
@ -27,4 +27,4 @@
        <name>oozie.launcher.mapreduce.user.classpath.first</name>
        <value>true</value>
    </property>
-</configuration>
+</configuration>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/workflow.xml
@ -0,0 +1,171 @@
+<workflow-app name="dump_graph" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>sourcePath</name>
+            <description>the source path</description>
+        </property>
+        <property>
+            <name>projectListPath</name>
+            <description>the path to the project list</description>
+        </property>
+        <property>
+            <name>outputPath</name>
+            <description>the output path</description>
+        </property>
+        <property>
+            <name>accessToken</name>
+            <description>the access token used for the deposition in Zenodo</description>
+        </property>
+        <property>
+            <name>connectionUrl</name>
+            <description>the connection url for Zenodo</description>
+        </property>
+        <property>
+            <name>metadata</name>
+            <description> the metadata associated to the deposition</description>
+        </property>
+        <property>
+            <name>depositionType</name>
+            <description>the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided)</description>
+        </property>
+        <property>
+            <name>conceptRecordId</name>
+            <description>for new version, the id of the record for the old deposition</description>
+        </property>
+        <property>
+            <name>depositionId</name>
+            <description>the depositionId of a deposition open that has to be added content</description>
+        </property>
+        <property>
+            <name>sparkDriverMemory</name>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>sparkExecutorCores</name>
+            <description>number of cores used by single executor</description>
+        </property>
+        <property>
+            <name>oozieActionShareLibForSpark2</name>
+            <description>oozie action sharelib for spark 2.*</description>
+        </property>
+        <property>
+            <name>spark2ExtraListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+            <description>spark 2.* extra listeners classname</description>
+        </property>
+        <property>
+            <name>spark2SqlQueryExecutionListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
+            <description>spark 2.* sql query execution listeners classname</description>
+        </property>
+        <property>
+            <name>spark2YarnHistoryServerAddress</name>
+            <description>spark 2.* yarn history server address</description>
+        </property>
+        <property>
+            <name>spark2EventLogDir</name>
+            <description>spark 2.* event log dir location</description>
+        </property>
+    </parameters>
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>mapreduce.job.queuename</name>
+                <value>${queueName}</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapred.job.queue.name</name>
+                <value>${oozieLauncherQueueName}</value>
+            </property>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+        </configuration>
+    </global>
+    <start to="dump_project"/>
+    <kill name="Kill">
+        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    <action name="dump_project">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table project </name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/project</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/project</arg>
+            <arg>--communityMapPath</arg><arg>noneed</arg>
+        </spark>
+        <ok to="get_new_projects"/>
+        <error to="Kill"/>
+    </action>
+    <action name="get_new_projects">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table project </name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectsSubsetSparkJob</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${workingDir}/project</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/tar/project</arg>
+            <arg>--projectListPath</arg><arg>${projectListPath}</arg>
+        </spark>
+        <ok to="make_archive"/>
+        <error to="Kill"/>
+    </action>
+    <action name="make_archive">
+        <java>
+            <main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
+            <arg>--hdfsPath</arg><arg>${outputPath}</arg>
+            <arg>--nameNode</arg><arg>${nameNode}</arg>
+            <arg>--sourcePath</arg><arg>${workingDir}/tar</arg>
+        </java>
+        <ok to="send_zenodo"/>
+        <error to="Kill"/>
+    </action>
+    <action name="send_zenodo">
+        <java>
+            <main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
+            <arg>--hdfsPath</arg><arg>${outputPath}</arg>
+            <arg>--nameNode</arg><arg>${nameNode}</arg>
+            <arg>--accessToken</arg><arg>${accessToken}</arg>
+            <arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
+            <arg>--metadata</arg><arg>${metadata}</arg>
+            <arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
+            <arg>--depositionType</arg><arg>${depositionType}</arg>
+            <arg>--depositionId</arg><arg>${depositionId}</arg>
+        </java>
+        <ok to="End"/>
+        <error to="Kill"/>
+    </action>
+    <end name="End"/>
+</workflow-app>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app/workflow.xml
@ -1,347 +0,0 @@
-<workflow-app name="sub_dump_community_funder_results" xmlns="uri:oozie:workflow:0.5">
-    <parameters>
-
-        <property>
-            <name>sourcePath</name>
-            <description>the source path</description>
-        </property>
-        <property>
-            <name>outputPath</name>
-            <description>the output path</description>
-        </property>
-        <property>
-            <name>communityMapPath</name>
-            <description>the path to the community map</description>
-        </property>
-        <property>
-            <name>selectedResults</name>
-            <description>the path the the possible subset ot results to be dumped</description>
-        </property>
-        <property>
-            <name>hiveDbName</name>
-            <description>the target hive database name</description>
-        </property>
-        <property>
-            <name>hiveJdbcUrl</name>
-            <description>hive server jdbc url</description>
-        </property>
-        <property>
-            <name>hiveMetastoreUris</name>
-            <description>hive server metastore URIs</description>
-        </property>
-        <property>
-            <name>sparkDriverMemory</name>
-            <description>memory for driver process</description>
-        </property>
-        <property>
-            <name>sparkExecutorMemory</name>
-            <description>memory for individual executor</description>
-        </property>
-        <property>
-            <name>sparkExecutorCores</name>
-            <description>number of cores used by single executor</description>
-        </property>
-        <property>
-            <name>oozieActionShareLibForSpark2</name>
-            <description>oozie action sharelib for spark 2.*</description>
-        </property>
-        <property>
-            <name>spark2ExtraListeners</name>
-            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
-            <description>spark 2.* extra listeners classname</description>
-        </property>
-        <property>
-            <name>spark2SqlQueryExecutionListeners</name>
-            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
-            <description>spark 2.* sql query execution listeners classname</description>
-        </property>
-        <property>
-            <name>spark2YarnHistoryServerAddress</name>
-            <description>spark 2.* yarn history server address</description>
-        </property>
-        <property>
-            <name>spark2EventLogDir</name>
-            <description>spark 2.* event log dir location</description>
-        </property>
-    </parameters>
-
-    <global>
-        <job-tracker>${jobTracker}</job-tracker>
-        <name-node>${nameNode}</name-node>
-        <configuration>
-            <property>
-                <name>mapreduce.job.queuename</name>
-                <value>${queueName}</value>
-            </property>
-            <property>
-                <name>oozie.launcher.mapred.job.queue.name</name>
-                <value>${oozieLauncherQueueName}</value>
-            </property>
-            <property>
-                <name>oozie.action.sharelib.for.spark</name>
-                <value>${oozieActionShareLibForSpark2}</value>
-            </property>
-
-        </configuration>
-    </global>
-
-    <start to="fork_dump"/>
-
-    <kill name="Kill">
-        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
-    </kill>
-
-
-
-
-    <fork name="fork_dump">
-        <path start="dump_publication"/>
-        <path start="dump_dataset"/>
-        <path start="dump_orp"/>
-        <path start="dump_software"/>
-    </fork>
-
-    <action name="dump_publication">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Dump table publication for community/funder related products</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${selectedResults}/publication</arg>
-            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/dump/publication</arg>
-            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
-            <arg>--dumpType</arg><arg>${dumpType}</arg>
-        </spark>
-        <ok to="join_dump"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="dump_dataset">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Dump table dataset for community/funder related products</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${selectedResults}/dataset</arg>
-            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/dump/dataset</arg>
-            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
-        </spark>
-        <ok to="join_dump"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="dump_orp">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Dump table ORP for community related products</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${selectedResults}/otherresearchproduct</arg>
-            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
-            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
-        </spark>
-        <ok to="join_dump"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="dump_software">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Dump table software for community related products</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${selectedResults}/software</arg>
-            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/dump/software</arg>
-            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
-        </spark>
-        <ok to="join_dump"/>
-        <error to="Kill"/>
-    </action>
-
-    <join name="join_dump" to="prepareResultProject"/>
-
-    <action name="prepareResultProject">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Prepare association result subset of project info</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
-            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
-        </spark>
-        <ok to="fork_extendWithProject"/>
-        <error to="Kill"/>
-    </action>
-
-    <fork name="fork_extendWithProject">
-        <path start="extend_publication"/>
-        <path start="extend_dataset"/>
-        <path start="extend_orp"/>
-        <path start="extend_software"/>
-    </fork>
-
-    <action name="extend_publication">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Extend dumped publications with information about project</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${workingDir}/dump/publication</arg>
-            <arg>--outputPath</arg><arg>${outputPath}/ext/publication</arg>
-            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
-        </spark>
-        <ok to="join_extend"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="extend_dataset">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Extend dumped dataset with information about project</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${workingDir}/dump/dataset</arg>
-            <arg>--outputPath</arg><arg>${outputPath}/ext/dataset</arg>
-            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
-        </spark>
-        <ok to="join_extend"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="extend_orp">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Extend dumped ORP with information about project</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
-            <arg>--outputPath</arg><arg>${outputPath}/ext/orp</arg>
-            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
-        </spark>
-        <ok to="join_extend"/>
-        <error to="Kill"/>
-    </action>
-
-    <action name="extend_software">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Extend dumped software with information about project</name>
-            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
-            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
-            </spark-opts>
-            <arg>--sourcePath</arg><arg>${workingDir}/dump/software</arg>
-            <arg>--outputPath</arg><arg>${outputPath}/ext/software</arg>
-            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
-        </spark>
-        <ok to="join_extend"/>
-        <error to="Kill"/>
-    </action>
-    <join name="join_extend" to="End"/>
-
-    <end name="End"/>
-
-</workflow-app>
-
-
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/import.txt
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/import.txt
@ -1,2 +0,0 @@
-## This is a classpath-based import file (this header is required)
-dump_common classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml
@ -77,42 +77,259 @@
        </configuration>
    </global>

-    <start to="common_action_community_funder"/>
+    <start to="fork_dump"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

-    <action name="common_action_community_funder">
-        <sub-workflow>
-            <app-path>${wf:appPath()}/dump_common
-            </app-path>
-            <propagate-configuration/>
-            <configuration>
-                <property>
-                    <name>sourcePath</name>
-                    <value>${sourcePath}</value>
-                </property>
-                <property>
-                    <name>selectedResults</name>
-                    <value>${sourcePath}</value>
-                </property>
-                <property>
-                    <name>communityMapPath</name>
-                    <value>${workingDir}/communityMap</value>
-                </property>
-                <property>
-                    <name>outputPath</name>
-                    <value>${workingDir}</value>
-                </property>
-            </configuration>
-        </sub-workflow>
-        <ok to="splitForCommunities" />
-        <error to="Kill" />
+    <fork name="fork_dump">
+        <path start="dump_publication"/>
+        <path start="dump_dataset"/>
+        <path start="dump_orp"/>
+        <path start="dump_software"/>
+    </fork>
+
+    <action name="dump_publication">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table publication for community/funder related products</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/dump/publication</arg>
+            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
+            <arg>--dumpType</arg><arg>${dumpType}</arg>
+        </spark>
+        <ok to="join_dump"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="dump_dataset">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table dataset for community/funder related products</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/dump/dataset</arg>
+            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
+        </spark>
+        <ok to="join_dump"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="dump_orp">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table ORP for community related products</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
+            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
+        </spark>
+        <ok to="join_dump"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="dump_software">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table software for community related products</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/dump/software</arg>
+            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
+        </spark>
+        <ok to="join_dump"/>
+        <error to="Kill"/>
+    </action>
+
+    <join name="join_dump" to="prepareResultProject"/>
+
+    <action name="prepareResultProject">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Prepare association result subset of project info</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
+        </spark>
+        <ok to="fork_extendWithProject"/>
+        <error to="Kill"/>
+    </action>
+
+    <fork name="fork_extendWithProject">
+        <path start="extend_publication"/>
+        <path start="extend_dataset"/>
+        <path start="extend_orp"/>
+        <path start="extend_software"/>
+    </fork>
+
+    <action name="extend_publication">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Extend dumped publications with information about project</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${workingDir}/dump/publication</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/ext/publication</arg>
+            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
+        </spark>
+        <ok to="join_extend"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="extend_dataset">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Extend dumped dataset with information about project</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${workingDir}/dump/dataset</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/ext/dataset</arg>
+            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
+        </spark>
+        <ok to="join_extend"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="extend_orp">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Extend dumped ORP with information about project</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/ext/orp</arg>
+            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
+        </spark>
+        <ok to="join_extend"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="extend_software">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Extend dumped software with information about project</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${workingDir}/dump/software</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/ext/software</arg>
+            <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
+        </spark>
+        <ok to="join_extend"/>
+        <error to="Kill"/>
    </action>


-
+    <join name="join_extend" to="splitForCommunities"/>
    <action name="splitForCommunities">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml
@ -298,6 +298,7 @@
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+                --conf spark.sql.shuffle.partitions=3840
            </spark-opts>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--outputPath</arg><arg>${workingDir}/validrelation</arg>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/import.txt
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/import.txt
@ -1,2 +0,0 @@
-## This is a classpath-based import file (this header is required)
-dump_common classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/commoncommunityfunder/oozie_app
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml
@ -77,12 +77,36 @@
        </configuration>
    </global>

-    <start to="fork_result_linked_to_projects"/>
+    <start to="prepareResultProject"/>

    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>

+    <action name="prepareResultProject">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Prepare association result subset of project info</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
+        </spark>
+        <ok to="fork_result_linked_to_projects"/>
+        <error to="Kill"/>
+    </action>
+

    <fork name="fork_result_linked_to_projects">
        <path start="select_publication_linked_to_projects"/>
@ -111,7 +135,8 @@
            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
            <arg>--outputPath</arg><arg>${workingDir}/result/publication</arg>
-            <arg>--graphPath</arg><arg>${sourcePath}</arg>
+            <arg>--graphPath</arg><arg>${workingDir}/preparedInfo</arg>
+            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
        </spark>
        <ok to="join_link"/>
        <error to="Kill"/>
@ -137,7 +162,8 @@
            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
            <arg>--outputPath</arg><arg>${workingDir}/result/dataset</arg>
-            <arg>--graphPath</arg><arg>${sourcePath}</arg>
+            <arg>--graphPath</arg><arg>${workingDir}/preparedInfo</arg>
+            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
        </spark>
        <ok to="join_link"/>
        <error to="Kill"/>
@ -163,7 +189,8 @@
            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
            <arg>--outputPath</arg><arg>${workingDir}/result/otherresearchproduct</arg>
-            <arg>--graphPath</arg><arg>${sourcePath}</arg>
+            <arg>--graphPath</arg><arg>${workingDir}/preparedInfo</arg>
+            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
        </spark>
        <ok to="join_link"/>
        <error to="Kill"/>
@ -189,41 +216,14 @@
            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
            <arg>--outputPath</arg><arg>${workingDir}/result/software</arg>
-            <arg>--graphPath</arg><arg>${sourcePath}</arg>
+            <arg>--graphPath</arg><arg>${workingDir}/preparedInfo</arg>
+            <arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
        </spark>
        <ok to="join_link"/>
        <error to="Kill"/>
    </action>

-    <join name="join_link" to="common_action_community_funder"/>
-
-    <action name="common_action_community_funder">
-        <sub-workflow>
-            <app-path>${wf:appPath()}/dump_common
-            </app-path>
-            <propagate-configuration/>
-            <configuration>
-                <property>
-                    <name>sourcePath</name>
-                    <value>${sourcePath}</value>
-                </property>
-                <property>
-                    <name>selectedResults</name>
-                    <value>${workingDir}/result</value>
-                </property>
-                <property>
-                    <name>communityMapPath</name>
-                    <value>${workingDir}/communityMap</value>
-                </property>
-                <property>
-                    <name>outputPath</name>
-                    <value>${workingDir}</value>
-                </property>
-            </configuration>
-        </sub-workflow>
-        <ok to="dump_funder_results" />
-        <error to="Kill" />
-    </action>
+    <join name="join_link" to="dump_funder_results"/>

    <action name="dump_funder_results">
        <spark xmlns="uri:oozie:spark-action:0.2">
@ -242,9 +242,8 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
-            <arg>--sourcePath</arg><arg>${workingDir}/ext</arg>
+            <arg>--sourcePath</arg><arg>${workingDir}/result</arg>
            <arg>--outputPath</arg><arg>${outputPath}</arg>
-            <arg>--graphPath</arg><arg>${sourcePath}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java
@ -321,4 +321,27 @@ public class PrepareResultProjectJobTest {
 			3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count());

 	}
+
+	@Test
+	void testMatchx() throws Exception {
+
+		final String sourcePath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match")
+			.getPath();
+
+		SparkPrepareResultProject.main(new String[] {
+			"-isSparkSessionManaged", Boolean.FALSE.toString(),
+			"-outputPath", workingDir.toString() + "/preparedInfo",
+			"-sourcePath", sourcePath
+		});
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<ResultProject> tmp = sc
+			.textFile(workingDir.toString() + "/preparedInfo")
+			.map(item -> OBJECT_MAPPER.readValue(item, ResultProject.class));
+
+		tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
+	}
+
 }
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java
@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject;
+import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
 import eu.dnetlib.dhp.schema.oaf.Publication;
 import eu.dnetlib.dhp.schema.oaf.Result;

@ -76,7 +77,11 @@ public class ResultLinkedToProjectTest {
 			.getPath();

 		final String graphPath = getClass()
-			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch")
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/preparedInfo")
+			.getPath();
+
+		final String communityMapPath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/communityMapPath")
 			.getPath();

 		SparkResultLinkedToProject.main(new String[] {
@ -84,20 +89,18 @@ public class ResultLinkedToProjectTest {
 			"-outputPath", workingDir.toString() + "/preparedInfo",
 			"-sourcePath", sourcePath,
 			"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
-			"-graphPath", graphPath
+			"-graphPath", graphPath,
+			"-communityMapPath", communityMapPath

 		});

 		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());

-		JavaRDD<Result> tmp = sc
+		JavaRDD<CommunityResult> tmp = sc
 			.textFile(workingDir.toString() + "/preparedInfo")
-			.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
+			.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));

-		org.apache.spark.sql.Dataset<Result> verificationDataset = spark
-			.createDataset(tmp.rdd(), Encoders.bean(Result.class));
-
-		Assertions.assertEquals(0, verificationDataset.count());
+		Assertions.assertEquals(0, tmp.count());

 	}

@ -108,8 +111,12 @@ public class ResultLinkedToProjectTest {
 			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json")
 			.getPath();

-		final String relationPath = getClass()
-			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match")
+		final String graphPath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/preparedInfo")
+			.getPath();
+
+		final String communityMapPath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/communityMapPath")
 			.getPath();

 		SparkResultLinkedToProject.main(new String[] {
@ -117,20 +124,18 @@ public class ResultLinkedToProjectTest {
 			"-outputPath", workingDir.toString() + "/preparedInfo",
 			"-sourcePath", sourcePath,
 			"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
-			"-graphPath", relationPath
+			"-graphPath", graphPath,
+			"-communityMapPath", communityMapPath

 		});

 		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());

-		JavaRDD<Publication> tmp = sc
+		JavaRDD<CommunityResult> tmp = sc
 			.textFile(workingDir.toString() + "/preparedInfo")
-			.map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
+			.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));

-		org.apache.spark.sql.Dataset<Publication> verificationDataset = spark
-			.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
-
-		Assertions.assertEquals(1, verificationDataset.count());
+		Assertions.assertEquals(1, tmp.count());

 	}

--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java
@ -5,10 +5,14 @@ import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;

+// import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults2;
+// import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkGetFunderList;
 import org.apache.commons.io.FileUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.ForeachFunction;
+import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SparkSession;
 import org.junit.jupiter.api.AfterAll;
@ -68,20 +72,19 @@ public class SplitPerFunderTest {
 	void test1() throws Exception {

 		final String sourcePath = getClass()
-			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump")
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext")
 			.getPath();

 		SparkDumpFunderResults.main(new String[] {
 			"-isSparkSessionManaged", Boolean.FALSE.toString(),
 			"-outputPath", workingDir.toString() + "/split",
-			"-sourcePath", sourcePath,
-			"-graphPath", sourcePath
+			"-sourcePath", sourcePath

 		});

 		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());

-		// FP7 3
+		// FP7 3 and H2020 3
 		JavaRDD<CommunityResult> tmp = sc
 			.textFile(workingDir.toString() + "/split/EC_FP7")
 			.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
@ -143,11 +146,6 @@ public class SplitPerFunderTest {
 			.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
 		Assertions.assertEquals(1, tmp.count());

-		// CONICYT 0
-		tmp = sc
-			.textFile(workingDir.toString() + "/split/CONICYTF")
-			.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
-		Assertions.assertEquals(0, tmp.count());
-
 	}
+
 }
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectSubsetTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectSubsetTest.java
@ -0,0 +1,125 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.projectssubset;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.HashMap;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.schema.dump.oaf.graph.Project;
+
+public class ProjectSubsetTest {
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+	private static SparkSession spark;
+	private static Path workingDir;
+	private static final Logger log = LoggerFactory
+		.getLogger(eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectSubsetTest.class);
+
+	@BeforeAll
+	public static void beforeAll() throws IOException {
+		workingDir = Files
+			.createTempDirectory(
+				eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectSubsetTest.class.getSimpleName());
+		log.info("using work dir {}", workingDir);
+		SparkConf conf = new SparkConf();
+		conf.setAppName(eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectSubsetTest.class.getSimpleName());
+		conf.setMaster("local[*]");
+		conf.set("spark.driver.host", "localhost");
+		conf.set("hive.metastore.local", "true");
+		conf.set("spark.ui.enabled", "false");
+		conf.set("spark.sql.warehouse.dir", workingDir.toString());
+		conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+		spark = SparkSession
+			.builder()
+			.appName(eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectSubsetTest.class.getSimpleName())
+			.config(conf)
+			.getOrCreate();
+	}
+
+	@AfterAll
+	public static void afterAll() throws IOException {
+		FileUtils.deleteDirectory(workingDir.toFile());
+		spark.stop();
+	}
+
+	@Test
+	void testAllNew() throws Exception {
+		final String projectListPath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/projectId")
+			.getPath();
+		final String sourcePath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/allnew/projects")
+			.getPath();
+		spark
+			.read()
+			.textFile(projectListPath)
+			.write()
+			.mode(SaveMode.Overwrite)
+			.text(workingDir.toString() + "/projectIds");
+		ProjectsSubsetSparkJob.main(new String[] {
+			"-isSparkSessionManaged", Boolean.FALSE.toString(),
+			"-outputPath", workingDir.toString() + "/projects",
+			"-sourcePath", sourcePath,
+			"-projectListPath", workingDir.toString() + "/projectIds"
+		});
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+		JavaRDD<Project> tmp = sc
+			.textFile(workingDir.toString() + "/projects")
+			.map(item -> OBJECT_MAPPER.readValue(item, Project.class));
+		Assertions.assertEquals(12, tmp.count());
+		Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("aka_________")).count());
+		Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("anr_________")).count());
+		Assertions.assertEquals(4, tmp.filter(p -> p.getId().substring(3, 15).equals("arc_________")).count());
+		Assertions.assertEquals(3, tmp.filter(p -> p.getId().substring(3, 15).equals("conicytf____")).count());
+		Assertions.assertEquals(1, tmp.filter(p -> p.getId().substring(3, 15).equals("corda_______")).count());
+		Assertions.assertEquals(40, sc.textFile(workingDir.toString() + "/projectIds").count());
+	}
+
+	@Test
+	void testMatchOne() throws Exception {
+		final String projectListPath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/projectId")
+			.getPath();
+		final String sourcePath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/matchOne/projects")
+			.getPath();
+		spark
+			.read()
+			.textFile(projectListPath)
+			.write()
+			.mode(SaveMode.Overwrite)
+			.text(workingDir.toString() + "/projectIds");
+		ProjectsSubsetSparkJob.main(new String[] {
+			"-isSparkSessionManaged", Boolean.FALSE.toString(),
+			"-outputPath", workingDir.toString() + "/projects",
+			"-sourcePath", sourcePath,
+			"-projectListPath", workingDir.toString() + "/projectIds"
+		});
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+		JavaRDD<Project> tmp = sc
+			.textFile(workingDir.toString() + "/projects")
+			.map(item -> OBJECT_MAPPER.readValue(item, Project.class));
+		Assertions.assertEquals(11, tmp.count());
+		Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("aka_________")).count());
+		Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("anr_________")).count());
+		Assertions.assertEquals(4, tmp.filter(p -> p.getId().substring(3, 15).equals("arc_________")).count());
+		Assertions.assertEquals(3, tmp.filter(p -> p.getId().substring(3, 15).equals("conicytf____")).count());
+		Assertions.assertEquals(0, tmp.filter(p -> p.getId().substring(3, 15).equals("corda__h2020")).count());
+		Assertions.assertEquals(39, sc.textFile(workingDir.toString() + "/projectIds").count());
+	}
+}
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/communityMapPath/communityMap
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/communityMapPath/communityMap
@ -0,0 +1 @@
+{"ee":"SDSN - Greece","epos":"EPOS","enrmaps":"Energy Research","fet-h2020":"FET H2020","instruct":"Instruct-Eric","egi":"EGI Federation","euromarine":"Euromarine","covid-19":"COVID-19","dariah":"DARIAH EU","rda":"Research Data Alliance","clarin":"CLARIN","aginfra":"Agricultural and Food Sciences","risis":"RISI","fam":"Fisheries and Aquaculture Management","beopen":"Transport Research","elixir-gr":"ELIXIR GR","fet-fp7":"FET FP7","ifremer":"Ifremer","science-innovation-policy":"Science and Innovation Policy Studies","mes":"European Marine Scinece","oa-pg":"EC Post-Grant Open Access Pilot","ni":"Neuroinformatics","dh-ch":"Digital Humanities and Cultural Heritage"}
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/orp
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/orp
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext/otherresearchproduct
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext/otherresearchproduct
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext/project
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext/project
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext/publication
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext/publication
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext/software
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext/software
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/otherresearchproduct
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/otherresearchproduct
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/fundersList/list
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/fundersList/list
@ -0,0 +1,8 @@
+NSF
+CIHR
+NWO
+NHMRC
+NIH
+MZOS
+SNSF
+EC
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/preparedInfo/resultProject
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/preparedInfo/resultProject
@ -0,0 +1 @@
+{"resultId":"50|a89337edbe55::43e8b61e5e8d682545cb867be8118585","projectsList":[{"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","code":"135027","acronym":null,"title":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo","funder":{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null},"provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"validated":null},{"id":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","code":"316061","acronym":null,"title":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population.","funder":{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null},"provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"validated":null}]}
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/allnew/projects
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/allnew/projects
@ -0,0 +1,12 @@
+{"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","websiteurl":null,"code":"135027","acronym":null,"title":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo","startdate":null,"enddate":null,"callidentifier":"Fotoniikka ja modernit kuvantamismenetelmät LT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","websiteurl":null,"code":"316061","acronym":null,"title":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population.","startdate":null,"enddate":null,"callidentifier":"Academy Project Funding TT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|anr_________::1f21edc5c902be305ee47148955c6e50","websiteurl":null,"code":"ANR-17-CE05-0033","acronym":"MOISE","title":"METAL OXIDES AS LOW LOADED NANO-IRIDIUM SUPPORT FOR COMPETITIVE WATER ELECTROLYSIS","startdate":null,"enddate":null,"callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ANR","name":"French National Research Agency (ANR)","jurisdiction":"FR","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|anr_________::547e78ffdcb7d72a1ef31058dede3a33","websiteurl":null,"code":"ANR-09-SEGI-0005","acronym":"GALAXY","title":"DEVELOPPEMENT COLLABORATIF DE SYSTEMES COMPLEXES SELON UNE APPROCHE GUIDEE  PAR LES MODELES","startdate":null,"enddate":null,"callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ANR","name":"French National Research Agency (ANR)","jurisdiction":"FR","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|arc_________::838e781a8d479e27a11101421fd8b296","websiteurl":"http://purl.org/au-research/grants/arc/LE0347462","code":"LE0347462","acronym":null,"title":"Femtosecond laser micromachining facility","startdate":"2003-01-01","enddate":"2003-12-31","callidentifier":null,"keywords":"biomedical nanostructures,femtosecond laser machining,laser manufacturing,laser micromachining,microphotonics,photonic bandgap structures","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Infrastructure, Equipment and Facilities","description":"Linkage Infrastructure, Equipment and Facilities"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|arc_________::a461f180f7b6700c0499d4d3d53e58c7","websiteurl":"http://purl.org/au-research/grants/arc/LP140100567","code":"LP140100567","acronym":null,"title":"Linkage Projects - Grant ID: LP140100567","startdate":"2014-01-01","enddate":"2017-12-31","callidentifier":null,"keywords":"EDUCATIONAL MEASUREMENT; EDUCATIONAL MEASUREMENT; HIGH-STAKES TESTING; HIGH-STAKES TESTING; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Projects","description":"Linkage Projects"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|arc_________::b46b9e07d4cea67ccf497520a75ad0c8","websiteurl":"http://purl.org/au-research/grants/arc/DP180101235","code":"DP180101235","acronym":null,"title":"Discovery Projects - Grant ID: DP180101235","startdate":"2018-01-01","enddate":"2023-12-31","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Discovery Projects","description":"Discovery Projects"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|arc_________::c5f86314ce288f91a7f31c219b128fab","websiteurl":"http://purl.org/au-research/grants/arc/LE0989831","code":"LE0989831","acronym":null,"title":"The Australian Music Navigator: research infrastructure for discovering, accessing and analysing Australia's musical landscape","startdate":"2009-01-01","enddate":"2009-12-31","callidentifier":null,"keywords":"database metadata,digital sound,electroacoustic music,film music,music,music information retrieval","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Infrastructure, Equipment and Facilities","description":"Linkage Infrastructure, Equipment and Facilities"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|conicytf____::05539f3427ad605d7c1de0168f3e337f","websiteurl":"http://repositorio.conicyt.cl/handle/10533/183109","code":"3120023","acronym":null,"title":"SYNTHESIS AND STRUCTURE-ACTIVITY RELATIONSHIPS OF HETEROARYLISOQUINOLINE- AND PHENANTHRIDINEQUINONES AS ANTITUMOR AGENTS","startdate":"2011-01-01","enddate":"2014-01-28","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::POSTDOCTORADO","description":"Fondecyt fundings - Fondecyt stream, POSTDOCTORADO"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|conicytf____::96b47b91a6c061e31f626612b1650c03","websiteurl":"http://repositorio.conicyt.cl/handle/10533/163340","code":"1040240","acronym":null,"title":"ESTUDIO TEORICO-EXPERIMENTAL DE LA PERMEACION DE FLUIDOS SUPERCRITICOS Y LA SEPARACION DE MEZCLAS A ALTA PRESION A TRAVES DE MEMBRANAS MICROPOROSAS.","startdate":"2004-01-15","enddate":"2007-01-15","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::REGULAR","description":"Fondecyt fundings - Fondecyt stream, REGULAR"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|conicytf____::b122147e0a13f34cdb6311a9d714f9a5","websiteurl":"http://repositorio.conicyt.cl/handle/10533/162452","code":"1020683","acronym":null,"title":"SINTESIS Y CARACTERIZACION DE SALES CUATERNARIAS CON EL ANION CALCOFOSFATO [P2Qy]4- (Q=S,Se;y=6,7) PROPIEDADES FISICAS Y REACCIONES DE INCLUSION.","startdate":"2002-01-15","enddate":"2006-01-15","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::REGULAR","description":"Fondecyt fundings - Fondecyt stream, REGULAR"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|corda_______::132bac68f17bb81c451d9071be6e4d6d","websiteurl":null,"code":"628405","acronym":"ANIM","title":"Precisely Defined, Surface-Engineered Nanostructures via Crystallization-Driven Self-Assembly of Linear-Dendritic Block Copolymers","startdate":"2014-05-01","enddate":"2016-04-30","callidentifier":"FP7-PEOPLE-2013-IIF","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"EC","name":"European Commission","jurisdiction":"EU","funding_stream":{"id":"EC::FP7::SP3::PEOPLE","description":"SEVENTH FRAMEWORK PROGRAMME - SP3-People - Marie-Curie Actions"}}],"summary":null,"granted":null,"h2020programme":[]}
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/matchOne/projects
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/matchOne/projects
@ -0,0 +1,12 @@
+{"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","websiteurl":null,"code":"135027","acronym":null,"title":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo","startdate":null,"enddate":null,"callidentifier":"Fotoniikka ja modernit kuvantamismenetelmät LT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","websiteurl":null,"code":"316061","acronym":null,"title":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population.","startdate":null,"enddate":null,"callidentifier":"Academy Project Funding TT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|anr_________::1f21edc5c902be305ee47148955c6e50","websiteurl":null,"code":"ANR-17-CE05-0033","acronym":"MOISE","title":"METAL OXIDES AS LOW LOADED NANO-IRIDIUM SUPPORT FOR COMPETITIVE WATER ELECTROLYSIS","startdate":null,"enddate":null,"callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ANR","name":"French National Research Agency (ANR)","jurisdiction":"FR","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|anr_________::547e78ffdcb7d72a1ef31058dede3a33","websiteurl":null,"code":"ANR-09-SEGI-0005","acronym":"GALAXY","title":"DEVELOPPEMENT COLLABORATIF DE SYSTEMES COMPLEXES SELON UNE APPROCHE GUIDEE  PAR LES MODELES","startdate":null,"enddate":null,"callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ANR","name":"French National Research Agency (ANR)","jurisdiction":"FR","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|arc_________::838e781a8d479e27a11101421fd8b296","websiteurl":"http://purl.org/au-research/grants/arc/LE0347462","code":"LE0347462","acronym":null,"title":"Femtosecond laser micromachining facility","startdate":"2003-01-01","enddate":"2003-12-31","callidentifier":null,"keywords":"biomedical nanostructures,femtosecond laser machining,laser manufacturing,laser micromachining,microphotonics,photonic bandgap structures","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Infrastructure, Equipment and Facilities","description":"Linkage Infrastructure, Equipment and Facilities"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|arc_________::a461f180f7b6700c0499d4d3d53e58c7","websiteurl":"http://purl.org/au-research/grants/arc/LP140100567","code":"LP140100567","acronym":null,"title":"Linkage Projects - Grant ID: LP140100567","startdate":"2014-01-01","enddate":"2017-12-31","callidentifier":null,"keywords":"EDUCATIONAL MEASUREMENT; EDUCATIONAL MEASUREMENT; HIGH-STAKES TESTING; HIGH-STAKES TESTING; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT; PERFORMANCE ASSESSMENT","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Projects","description":"Linkage Projects"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|arc_________::b46b9e07d4cea67ccf497520a75ad0c8","websiteurl":"http://purl.org/au-research/grants/arc/DP180101235","code":"DP180101235","acronym":null,"title":"Discovery Projects - Grant ID: DP180101235","startdate":"2018-01-01","enddate":"2023-12-31","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Discovery Projects","description":"Discovery Projects"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|arc_________::c5f86314ce288f91a7f31c219b128fab","websiteurl":"http://purl.org/au-research/grants/arc/LE0989831","code":"LE0989831","acronym":null,"title":"The Australian Music Navigator: research infrastructure for discovering, accessing and analysing Australia's musical landscape","startdate":"2009-01-01","enddate":"2009-12-31","callidentifier":null,"keywords":"database metadata,digital sound,electroacoustic music,film music,music,music information retrieval","openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ARC","name":"Australian Research Council (ARC)","jurisdiction":"AU","funding_stream":{"id":"ARC::Linkage Infrastructure, Equipment and Facilities","description":"Linkage Infrastructure, Equipment and Facilities"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|conicytf____::05539f3427ad605d7c1de0168f3e337f","websiteurl":"http://repositorio.conicyt.cl/handle/10533/183109","code":"3120023","acronym":null,"title":"SYNTHESIS AND STRUCTURE-ACTIVITY RELATIONSHIPS OF HETEROARYLISOQUINOLINE- AND PHENANTHRIDINEQUINONES AS ANTITUMOR AGENTS","startdate":"2011-01-01","enddate":"2014-01-28","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::POSTDOCTORADO","description":"Fondecyt fundings - Fondecyt stream, POSTDOCTORADO"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|conicytf____::96b47b91a6c061e31f626612b1650c03","websiteurl":"http://repositorio.conicyt.cl/handle/10533/163340","code":"1040240","acronym":null,"title":"ESTUDIO TEORICO-EXPERIMENTAL DE LA PERMEACION DE FLUIDOS SUPERCRITICOS Y LA SEPARACION DE MEZCLAS A ALTA PRESION A TRAVES DE MEMBRANAS MICROPOROSAS.","startdate":"2004-01-15","enddate":"2007-01-15","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::REGULAR","description":"Fondecyt fundings - Fondecyt stream, REGULAR"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|conicytf____::b122147e0a13f34cdb6311a9d714f9a5","websiteurl":"http://repositorio.conicyt.cl/handle/10533/162452","code":"1020683","acronym":null,"title":"SINTESIS Y CARACTERIZACION DE SALES CUATERNARIAS CON EL ANION CALCOFOSFATO [P2Qy]4- (Q=S,Se;y=6,7) PROPIEDADES FISICAS Y REACCIONES DE INCLUSION.","startdate":"2002-01-15","enddate":"2006-01-15","callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"CONICYT","name":"Comisión Nacional de Investigación Científica y Tecnológica","jurisdiction":"CL","funding_stream":{"id":"CONICYT::FONDECYT::REGULAR","description":"Fondecyt fundings - Fondecyt stream, REGULAR"}}],"summary":null,"granted":null,"h2020programme":[]}
+{"id":"40|corda__h2020::bf5d35ec8d24ae4abfb4a1c6a0af3856","websiteurl":null,"code":"628405","acronym":"ANIM","title":"Precisely Defined, Surface-Engineered Nanostructures via Crystallization-Driven Self-Assembly of Linear-Dendritic Block Copolymers","startdate":"2014-05-01","enddate":"2016-04-30","callidentifier":"FP7-PEOPLE-2013-IIF","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"EC","name":"European Commission","jurisdiction":"EU","funding_stream":{"id":"EC::FP7::SP3::PEOPLE","description":"SEVENTH FRAMEWORK PROGRAMME - SP3-People - Marie-Curie Actions"}}],"summary":null,"granted":null,"h2020programme":[]}
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/projectId
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/projectId
@ -0,0 +1,28 @@
+40|nih_________::4c32cdbc4c9949853f02219fc4780a30
+40|nih_________::b485512ef116af73bee79d50c8f9ca01
+40|nih_________::b44d9bc8e99d9a0477ac06897e3e9c19
+40|nih_________::7d2d2b7d1644a722a6bbcb031d82fec6
+40|nsf_________::6b2674b0341e07b818a56c6f0daa2633
+40|nih_________::96bb39aecc8f7b9f3b02ed36ef09538b
+40|nsf_________::88d92bdf20ec2fac3ed9740f962b4fad
+40|nih_________::4bb8c14729a0082378bb04db8321ce14
+40|nih_________::08a8eed6c17c6d8e427afcfd29f87c7b
+40|nsf_________::c314f3d35af1990121bf5b803937e112
+40|nih_________::3ad6a2e6ebd561206f0da69468337f50
+40|nih_________::d02c60c65a59629e69a30abcf2ceaed1
+40|nih_________::d5a241cc94253feb72181cde15f51e96
+40|nih_________::b5df718bbca69af50d4b7213e26af3f0
+40|nih_________::bc90893c1be80503578e48f6ef6b7061
+40|rcuk________::2c39b38c26c260b14a9816b88c91c132
+40|nih_________::ab103ad117cd0579df66f7592a7d4adf
+40|nih_________::147aa6ad8bd201e2a02c7b6cc3f68348
+40|corda__h2020::bf5d35ec8d24ae4abfb4a1c6a0af3856
+40|nih_________::b8083208156f2764d07c736ba9b49dd2
+40|nih_________::f4d1e0aece0e6a9eff8d054c28e082db
+40|nsf_________::56297da8b472a4be8ac3f09af813c9f6
+40|nsf_________::6b6dc3398eeebb3de1ab66e6eb8c5cb3
+40|nih_________::93289a36ebffb0bee3d6b01c6fc0a3d6
+40|nih_________::6c3b00dd4ae9d43d6630ff18f189ebae
+40|nih_________::1d983a87768f13bc8377b1b7d17290a2
+40|nih_________::c3b56e91859b114644c1403e892eb80f
+40|rcuk________::c1e15330fc7956063652f9c06e584548
				`@ -0,0 +1 @@`
				{"ee":"SDSN - Greece","epos":"EPOS","enrmaps":"Energy Research","fet-h2020":"FET H2020","instruct":"Instruct-Eric","egi":"EGI Federation","euromarine":"Euromarine","covid-19":"COVID-19","dariah":"DARIAH EU","rda":"Research Data Alliance","clarin":"CLARIN","aginfra":"Agricultural and Food Sciences","risis":"RISI","fam":"Fisheries and Aquaculture Management","beopen":"Transport Research","elixir-gr":"ELIXIR GR","fet-fp7":"FET FP7","ifremer":"Ifremer","science-innovation-policy":"Science and Innovation Policy Studies","mes":"European Marine Scinece","oa-pg":"EC Post-Grant Open Access Pilot","ni":"Neuroinformatics","dh-ch":"Digital Humanities and Cultural Heritage"}
				`@ -0,0 +1 @@`
				{"resultId":"50\|a89337edbe55::43e8b61e5e8d682545cb867be8118585","projectsList":[{"id":"40\|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","code":"135027","acronym":null,"title":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo","funder":{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null},"provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"validated":null},{"id":"40\|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","code":"316061","acronym":null,"title":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population.","funder":{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","fundingStream":null},"provenance":{"provenance":"Harvested","trust":"0.900000000000000022"},"validated":null}]}