-

2023-05-02 15:47:06 +02:00 · 2023-05-02 15:47:06 +02:00 · 011b7737ad
parent 6ba43a1b67
commit 011b7737ad
7 changed files with 856 additions and 210 deletions
--- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java
+++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java
@ -8,6 +8,7 @@ import java.util.*;
 import java.util.stream.Collectors;

 import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.FilterFunction;
 import org.apache.spark.api.java.function.MapFunction;
@ -65,21 +66,21 @@ public class SparkDumpCommunityProducts implements Serializable {
 		final String resultClassName = parser.get("resultTableName");
 		log.info("resultTableName: {}", resultClassName);

-		String communityMapPath = parser.get("communityMapPath");
+		String communityMapPath = Optional
+			.ofNullable(parser.get("communityMapPath"))
+			.orElse(null);

 		Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);

 		SparkConf conf = new SparkConf();

-		String finalCommunityMapPath = communityMapPath;
-
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				Utils.removeOutputDir(spark, outputPath);
 				resultDump(
-					spark, inputPath, outputPath, finalCommunityMapPath, inputClazz);
+					spark, inputPath, outputPath, communityMapPath, inputClazz);
 			});

 	}
@ -91,12 +92,15 @@ public class SparkDumpCommunityProducts implements Serializable {
 		String communityMapPath,
 		Class<I> inputClazz) {

-		CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
+		CommunityMap communityMap = null;
+		if (!StringUtils.isEmpty(communityMapPath))
+			communityMap = Utils.getCommunityMap(spark, communityMapPath);

+		CommunityMap finalCommunityMap = communityMap;
 		Utils
 			.readPath(spark, inputPath, inputClazz)
 			.map(
-				(MapFunction<I, CommunityResult>) value -> execMap(value, communityMap),
+				(MapFunction<I, CommunityResult>) value -> execMap(value, finalCommunityMap),
 				Encoders.bean(CommunityResult.class))
 			.filter((FilterFunction<CommunityResult>) value -> value != null)
 			.map(
@ -119,26 +123,27 @@ public class SparkDumpCommunityProducts implements Serializable {
 			|| Boolean.TRUE.equals(odInfo.get().getInvisible())) {
 			return null;
 		}
-		Set<String> communities = communityMap.keySet();
+		if (communityMap != null) {
+			Set<String> communities = communityMap.keySet();

-		Optional<List<Context>> inputContext = Optional
-			.ofNullable(((eu.dnetlib.dhp.schema.oaf.Result) value).getContext());
-		if (!inputContext.isPresent()) {
-			return null;
-		}
-		List<String> toDumpFor = inputContext.get().stream().map(c -> {
-			if (communities.contains(c.getId())) {
-				return c.getId();
+			Optional<List<Context>> inputContext = Optional
+				.ofNullable(((eu.dnetlib.dhp.schema.oaf.Result) value).getContext());
+			if (!inputContext.isPresent()) {
+				return null;
 			}
-			if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
-				return c.getId().substring(0, c.getId().indexOf("::"));
+			List<String> toDumpFor = inputContext.get().stream().map(c -> {
+				if (communities.contains(c.getId())) {
+					return c.getId();
+				}
+				if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
+					return c.getId().substring(0, c.getId().indexOf("::"));
+				}
+				return null;
+			}).filter(Objects::nonNull).collect(Collectors.toList());
+			if (toDumpFor.isEmpty()) {
+				return null;
 			}
-			return null;
-		}).filter(Objects::nonNull).collect(Collectors.toList());
-		if (toDumpFor.isEmpty()) {
-			return null;
 		}
-
 		return (O) ResultMapper.map(value, communityMap, Constants.DUMPTYPE.COMMUNITY.getType());

 	}
--- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/country/SparkFindResultWithCountry.java
+++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/country/SparkFindResultWithCountry.java
@ -1,9 +1,29 @@
+
 package eu.dnetlib.dhp.oa.graph.dump.country;

+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.util.*;
+
+import javax.rmi.CORBA.Util;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FilterFunction;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.gson.Gson;
 import com.jayway.jsonpath.DocumentContext;
 import com.jayway.jsonpath.JsonPath;
+
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.oa.graph.dump.Constants;
 import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
@ -17,119 +37,100 @@ import eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints.SelectionConstra
 import eu.dnetlib.dhp.oa.model.graph.GraphResult;
 import eu.dnetlib.dhp.schema.oaf.DataInfo;
 import eu.dnetlib.dhp.schema.oaf.KeyValue;
-import org.apache.commons.io.IOUtils;
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Encoders;
-import org.apache.spark.sql.SaveMode;
-import org.apache.spark.sql.SparkSession;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 import scala.Tuple2;

-import javax.rmi.CORBA.Util;
-import java.io.Serializable;
-import java.util.*;
-
-import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
-
 /**
 * @author miriam.baglioni
 * @Date 27/04/23
 * Selects the results having in the country the given country
 */
 public class SparkFindResultWithCountry implements Serializable {
-    private static final Logger log = LoggerFactory.getLogger(SparkFindResultWithCountry.class);
+	private static final Logger log = LoggerFactory.getLogger(SparkFindResultWithCountry.class);

-    public static final String COMPRESSION = "compression";
-    public static final String GZIP = "gzip";
+	public static final String COMPRESSION = "compression";
+	public static final String GZIP = "gzip";

-    public static void main(String[] args) throws Exception {
-        String jsonConfiguration = IOUtils
-                .toString(
-                        SparkFindResultWithCountry.class
-                                .getResourceAsStream(
-                                        "/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
+	public static void main(String[] args) throws Exception {
+		String jsonConfiguration = IOUtils
+			.toString(
+				SparkFindResultWithCountry.class
+					.getResourceAsStream(
+						"/eu/dnetlib/dhp/oa/graph/dump/result_country_parameters.json"));

-        final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
-        parser.parseArgument(args);
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+		parser.parseArgument(args);

-        Boolean isSparkSessionManaged = Optional
-                .ofNullable(parser.get("isSparkSessionManaged"))
-                .map(Boolean::valueOf)
-                .orElse(Boolean.TRUE);
-        log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-        final String inputPath = parser.get("sourcePath");
-        log.info("inputPath: {}", inputPath);
+		final String inputPath = parser.get("sourcePath");
+		log.info("inputPath: {}", inputPath);

-        final String outputPath = parser.get("outputPath");
-        log.info("outputPath: {}", outputPath);
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath: {}", outputPath);

-        final String resultType = parser.get("resultType");
-        log.info("resultType: {}", resultType);
+		final String resultType = parser.get("resultType");
+		log.info("resultType: {}", resultType);

-        final String resultClassName = parser.get("resultTableName");
-        log.info("resultTableName: {}", resultClassName);
+		final String resultClassName = parser.get("resultTableName");
+		log.info("resultTableName: {}", resultClassName);

+		final String preparedInfoPath = parser.get("resultWithCountry");

+		Class<? extends eu.dnetlib.dhp.schema.oaf.Result> inputClazz = (Class<? extends eu.dnetlib.dhp.schema.oaf.Result>) Class
+			.forName(resultClassName);

-        final String preparedInfoPath = parser.get("resultWithCountry");
+		run(
+			isSparkSessionManaged, inputPath, outputPath, inputClazz,
+			resultType, preparedInfoPath);

+	}

+	private static void run(Boolean isSparkSessionManaged, String inputPath, String outputPath,

-        Class<? extends eu.dnetlib.dhp.schema.oaf.Result> inputClazz = (Class<? extends eu.dnetlib.dhp.schema.oaf.Result>) Class
-                .forName(resultClassName);
+		Class<? extends eu.dnetlib.dhp.schema.oaf.Result> inputClazz, String resultType, String preparedInfoPath) {
+		SparkConf conf = new SparkConf();

-        run(
-                isSparkSessionManaged, inputPath, outputPath, inputClazz,
-                resultType,  preparedInfoPath);
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> {
+				Utils.removeOutputDir(spark, outputPath + "/original/" + resultType);

-    }
+				resultDump(
+					spark, inputPath, outputPath, inputClazz, resultType, preparedInfoPath);
+			});

-    private static void run(Boolean isSparkSessionManaged, String inputPath, String outputPath,
+	}

-                            Class<? extends eu.dnetlib.dhp.schema.oaf.Result> inputClazz, String resultType,  String preparedInfoPath) {
-        SparkConf conf = new SparkConf();
+	public static <I extends eu.dnetlib.dhp.schema.oaf.Result> void resultDump(
+		SparkSession spark,
+		String inputPath,
+		String outputPath,
+		Class<I> inputClazz,

+		String resultType,

-        runWithSparkSession(
-                conf,
-                isSparkSessionManaged,
-                spark -> {
-                    Utils.removeOutputDir(spark, outputPath + "/original/" + resultType);
+		String preparedInfoPath) {

-                    resultDump(
-                            spark, inputPath, outputPath, inputClazz,  resultType,  preparedInfoPath);
-                });
+		Dataset<String> resultsWithCountry = spark.read().textFile(preparedInfoPath).distinct();

-    }
+		Dataset<I> result = Utils
+			.readPath(spark, inputPath, inputClazz)
+			.filter(
+				(FilterFunction<I>) r -> !r.getDataInfo().getInvisible() && !r.getDataInfo().getDeletedbyinference());

-    public static <I extends eu.dnetlib.dhp.schema.oaf.Result> void resultDump(
-            SparkSession spark,
-            String inputPath,
-            String outputPath,
-            Class<I> inputClazz,
+		resultsWithCountry
+			.joinWith(result, resultsWithCountry.col("value").equalTo(result.col("id")))
+			.map((MapFunction<Tuple2<String, I>, I>) t2 -> t2._2(), Encoders.bean(inputClazz))
+			.write()
+			.mode(SaveMode.Overwrite)
+			.option("compression", "gzip")
+			.json(outputPath + "/original/" + resultType);

-            String resultType,
-
-            String preparedInfoPath) {
-
-        Dataset<String> resultsWithCountry = Utils.readPath(spark, preparedInfoPath, String.class).distinct();
-
-
-        Dataset<I> result = Utils
-                .readPath(spark, inputPath, inputClazz);
-
-        resultsWithCountry.joinWith(result, resultsWithCountry.col("value").equalTo(result.col("id")))
-                .map((MapFunction<Tuple2<String,I>, I>) t2 -> t2._2(), Encoders.bean(inputClazz) )
-                .write()
-                .mode(SaveMode.Overwrite)
-                .option("compression", "gzip")
-                .json(outputPath + "/original/" + resultType);
-
-
-    }
+	}

 }
--- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/country/SparkFindResultsRelatedToCountry.java
+++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/country/SparkFindResultsRelatedToCountry.java
@ -1,9 +1,12 @@
+
 package eu.dnetlib.dhp.oa.graph.dump.country;

-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.oa.graph.dump.Utils;
-import eu.dnetlib.dhp.schema.common.ModelConstants;
-import eu.dnetlib.dhp.schema.oaf.*;
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.io.StringReader;
+import java.util.*;
+
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.FilterFunction;
@ -19,14 +22,13 @@ import org.dom4j.io.SAXReader;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.xml.sax.SAXException;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.*;
 import scala.Tuple2;

-import java.io.Serializable;
-import java.io.StringReader;
-import java.util.*;
-
-import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
-
 /**
 * @author miriam.baglioni
 * @Date 27/04/23
@ -34,124 +36,136 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 * or that have that country in the country list
 */
 public class SparkFindResultsRelatedToCountry implements Serializable {
-    private static final Logger log = LoggerFactory.getLogger(SparkFindResultsRelatedToCountry.class);
+	private static final Logger log = LoggerFactory.getLogger(SparkFindResultsRelatedToCountry.class);

-    public static final String COMPRESSION = "compression";
-    public static final String GZIP = "gzip";
+	public static final String COMPRESSION = "compression";
+	public static final String GZIP = "gzip";

-    public static void main(String[] args) throws Exception {
-        String jsonConfiguration = IOUtils
-                .toString(
-                        SparkFindResultsRelatedToCountry.class
-                                .getResourceAsStream(
-                                        "/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
+	public static void main(String[] args) throws Exception {
+		String jsonConfiguration = IOUtils
+			.toString(
+				SparkFindResultsRelatedToCountry.class
+					.getResourceAsStream(
+						"/eu/dnetlib/dhp/oa/graph/dump/result_related_country_parameters.json"));

-        final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
-        parser.parseArgument(args);
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+		parser.parseArgument(args);

-        Boolean isSparkSessionManaged = Optional
-                .ofNullable(parser.get("isSparkSessionManaged"))
-                .map(Boolean::valueOf)
-                .orElse(Boolean.TRUE);
-        log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-        final String inputPath = parser.get("sourcePath");
-        log.info("inputPath: {}", inputPath);
+		final String inputPath = parser.get("sourcePath");
+		log.info("inputPath: {}", inputPath);

-        final String outputPath = parser.get("outputPath");
-        log.info("outputPath: {}", outputPath);
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath: {}", outputPath);

+		final String country = parser.get("country");

-        final String country = parser.get("country");
+		run(
+			isSparkSessionManaged, inputPath, outputPath, country);

+	}

-        run(
-                isSparkSessionManaged, inputPath, outputPath, country);
+	private static void run(Boolean isSparkSessionManaged, String inputPath, String outputPath,
+		String country) {
+		SparkConf conf = new SparkConf();

-    }
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> {
+				findRelatedEntities(
+					spark, inputPath, outputPath, country);
+			});

-    private static void run(Boolean isSparkSessionManaged, String inputPath, String outputPath,
-                            String country) {
-        SparkConf conf = new SparkConf();
+	}

-        runWithSparkSession(
-                conf,
-                isSparkSessionManaged,
-                spark -> {
-                    findRelatedEntities(
-                            spark, inputPath, outputPath,  country);
-                });
+	public static <I extends eu.dnetlib.dhp.schema.oaf.Result> void findRelatedEntities(
+		SparkSession spark,
+		String inputPath,
+		String outputPath,
+		String country) {

-    }
+		Dataset<Project> projectsInCountry = Utils
+			.readPath(spark, inputPath + "/project", Project.class)
+			.filter((FilterFunction<Project>) p -> isCountryInFunderJurisdiction(p.getFundingtree(), country));

-    public static <I extends eu.dnetlib.dhp.schema.oaf.Result> void findRelatedEntities(
-            SparkSession spark,
-            String inputPath,
-            String outputPath,
-            String country) {
+		Dataset<Relation> relsProjectResults = Utils
+			.readPath(spark, inputPath + "/relation", Relation.class)
+			.filter(
+				(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
+					r.getRelClass().equals(ModelConstants.PRODUCES));

-        Dataset<Project> projectsInCountry = Utils.readPath(spark, inputPath + "/project", Project.class)
-                .filter((FilterFunction<Project>) p -> isCountryInFunderJurisdiction(p.getFundingtree(), country));
+		projectsInCountry
+			.joinWith(relsProjectResults, projectsInCountry.col("id").equalTo(relsProjectResults.col("source")))
+			.map((MapFunction<Tuple2<Project, Relation>, String>) t2 -> t2._2().getTarget(), Encoders.STRING())
+			.write()
+			.option(COMPRESSION, GZIP)
+			.mode(SaveMode.Overwrite)
+			.text(outputPath);

-        Dataset<Relation> relsProjectResults = Utils.readPath(spark, inputPath + "/relation", Relation.class)
-                .filter((FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
-                        r.getRelClass().equals(ModelConstants.PRODUCES));
+		Dataset<Organization> organizationsInCountry = Utils
+			.readPath(spark, inputPath + "/organization", Organization.class)
+			.filter((FilterFunction<Organization>) o -> o.getCountry().getClassid().equals(country));

-        projectsInCountry.joinWith(relsProjectResults, projectsInCountry.col("id").equalTo(relsProjectResults.col("source")))
-                .map((MapFunction<Tuple2<Project, Relation>, String>) t2 -> t2._2().getTarget(), Encoders.STRING())
-                .write()
-                .option(COMPRESSION, GZIP)
-                .mode(SaveMode.Overwrite)
-                .json(outputPath );
+		Dataset<Relation> relsOrganizationResults = Utils
+			.readPath(spark, inputPath + "/relation", Relation.class)
+			.filter(
+				(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
+					r.getRelClass().equals(ModelConstants.HAS_AUTHOR_INSTITUTION));

+		organizationsInCountry
+			.joinWith(
+				relsOrganizationResults,
+				organizationsInCountry.col("id").equalTo(relsOrganizationResults.col("source")))
+			.map((MapFunction<Tuple2<Organization, Relation>, String>) t2 -> t2._2().getTarget(), Encoders.STRING())
+			.write()
+			.option(COMPRESSION, GZIP)
+			.mode(SaveMode.Append)
+			.text(outputPath);

-        Dataset<Organization> organizationsInCountry = Utils.readPath(spark, inputPath + "/organization", Organization.class)
-                .filter((FilterFunction<Organization>) o -> o.getCountry().getClassid().equals(country));
+		selectResultWithCountry(spark, inputPath, outputPath, country, "publication", Publication.class);
+		selectResultWithCountry(
+			spark, inputPath, outputPath, country, "dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class);
+		selectResultWithCountry(spark, inputPath, outputPath, country, "software", Software.class);
+		selectResultWithCountry(
+			spark, inputPath, outputPath, country, "otherresearchproduct", OtherResearchProduct.class);

-        Dataset<Relation> relsOrganizationResults = Utils.readPath(spark, inputPath + "/relation", Relation.class)
-                .filter((FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
-                        r.getRelClass().equals(ModelConstants.IS_AUTHOR_INSTITUTION_OF));
+	}

-        organizationsInCountry.joinWith(relsOrganizationResults, organizationsInCountry.col("id").equalTo(relsOrganizationResults.col("source")))
-                .map((MapFunction<Tuple2<Organization, Relation>, String>) t2 -> t2._2().getTarget(), Encoders.STRING())
-                .write()
-                .option(COMPRESSION, GZIP)
-                .mode(SaveMode.Append)
-                .json(outputPath );
-
-        selectResultWithCountry(spark, inputPath, outputPath, country, "publication", Publication.class);
-        selectResultWithCountry(spark, inputPath, outputPath, country, "dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class);
-        selectResultWithCountry(spark, inputPath, outputPath, country, "software", Software.class);
-        selectResultWithCountry(spark, inputPath, outputPath, country, "otherresearchproduct", OtherResearchProduct.class);
-
-    }
-
-    private static <R extends Result> void selectResultWithCountry(SparkSession spark, String inputPath, String outputPath, String country, String type, Class<R> inputClazz) {
-        Utils.readPath(spark, inputPath + "/" + type, inputClazz)
-                .filter((FilterFunction<R>) p -> !p.getDataInfo().getDeletedbyinference() && !p.getDataInfo().getInvisible() &&
-                        p.getCountry().stream().anyMatch(c -> c.getClassid().equals(country)))
-                .map((MapFunction<R, String>) p -> p.getId(), Encoders.STRING() )
-                .write()
-                .option(COMPRESSION, GZIP)
-                .mode(SaveMode.Append)
-                .json(outputPath);
-    }
-
-    private static boolean isCountryInFunderJurisdiction(List<Field<String>> fundingtrees, String country) {
-        try {
-            final SAXReader reader = new SAXReader();
-            reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
-            for (Field<String> fundingtree: fundingtrees){
-                final Document doc = reader.read(new StringReader(fundingtree.getValue()));
-                if(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText().equals(country)){
-                    return true;
-                }
-            }
-            return false;
-        } catch (DocumentException | SAXException e) {
-            throw new IllegalArgumentException(e);
-        }
-    }
+	private static <R extends Result> void selectResultWithCountry(SparkSession spark, String inputPath,
+		String outputPath, String country, String type, Class<R> inputClazz) {
+		Utils
+			.readPath(spark, inputPath + "/" + type, inputClazz)
+			.filter(
+				(FilterFunction<R>) p -> !p.getDataInfo().getDeletedbyinference() && !p.getDataInfo().getInvisible() &&
+					p.getCountry() != null &&
+					p.getCountry().stream().anyMatch(c -> c.getClassid().equals(country)))
+			.map((MapFunction<R, String>) p -> p.getId(), Encoders.STRING())
+			.write()
+			.option(COMPRESSION, GZIP)
+			.mode(SaveMode.Append)
+			.text(outputPath);
+	}

+	private static boolean isCountryInFunderJurisdiction(List<Field<String>> fundingtrees, String country) {
+		try {
+			final SAXReader reader = new SAXReader();
+			reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
+			for (Field<String> fundingtree : fundingtrees) {
+				final Document doc = reader.read(new StringReader(fundingtree.getValue()));
+				if (((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText().equals(country)) {
+					return true;
+				}
+			}
+			return false;
+		} catch (DocumentException | SAXException e) {
+			throw new IllegalArgumentException(e);
+		}
+	}

 }
--- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/config-default.xml
+++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/config-default.xml
@ -0,0 +1,30 @@
+<configuration>
+    <property>
+        <name>jobTracker</name>
+        <value>yarnRM</value>
+    </property>
+    <property>
+        <name>nameNode</name>
+        <value>hdfs://nameservice1</value>
+    </property>
+    <property>
+        <name>oozie.use.system.libpath</name>
+        <value>true</value>
+    </property>
+    <property>
+        <name>hiveMetastoreUris</name>
+        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
+    </property>
+    <property>
+        <name>hiveJdbcUrl</name>
+        <value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
+    </property>
+    <property>
+        <name>hiveDbName</name>
+        <value>openaire</value>
+    </property>
+    <property>
+        <name>oozie.launcher.mapreduce.user.classpath.first</name>
+        <value>true</value>
+    </property>
+</configuration>
--- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml
+++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml
@ -0,0 +1,525 @@
+<workflow-app name="sub-dump_subset" xmlns="uri:oozie:workflow:0.5">
+    <parameters>
+        <property>
+            <name>sourcePath</name>
+            <description>the source path</description>
+        </property>
+        <property>
+            <name>outputPath</name>
+            <description>the output path</description>
+        </property>
+<property>
+    <name>country</name>
+    <description>the country for which to produce the dump</description>
+</property>
+        <property>
+            <name>hiveDbName</name>
+            <description>the target hive database name</description>
+        </property>
+        <property>
+            <name>hiveJdbcUrl</name>
+            <description>hive server jdbc url</description>
+        </property>
+        <property>
+            <name>hiveMetastoreUris</name>
+            <description>hive server metastore URIs</description>
+        </property>
+        <property>
+            <name>sparkDriverMemory</name>
+            <description>memory for driver process</description>
+        </property>
+        <property>
+            <name>sparkExecutorMemory</name>
+            <description>memory for individual executor</description>
+        </property>
+        <property>
+            <name>sparkExecutorCores</name>
+            <description>number of cores used by single executor</description>
+        </property>
+        <property>
+            <name>oozieActionShareLibForSpark2</name>
+            <description>oozie action sharelib for spark 2.*</description>
+        </property>
+        <property>
+            <name>spark2ExtraListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorAppListener</value>
+            <description>spark 2.* extra listeners classname</description>
+        </property>
+        <property>
+            <name>spark2SqlQueryExecutionListeners</name>
+            <value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
+            <description>spark 2.* sql query execution listeners classname</description>
+        </property>
+        <property>
+            <name>spark2YarnHistoryServerAddress</name>
+            <description>spark 2.* yarn history server address</description>
+        </property>
+        <property>
+            <name>spark2EventLogDir</name>
+            <description>spark 2.* event log dir location</description>
+        </property>
+    </parameters>
+
+    <global>
+        <job-tracker>${jobTracker}</job-tracker>
+        <name-node>${nameNode}</name-node>
+        <configuration>
+            <property>
+                <name>mapreduce.job.queuename</name>
+                <value>${queueName}</value>
+            </property>
+            <property>
+                <name>oozie.launcher.mapred.job.queue.name</name>
+                <value>${oozieLauncherQueueName}</value>
+            </property>
+            <property>
+                <name>oozie.action.sharelib.for.spark</name>
+                <value>${oozieActionShareLibForSpark2}</value>
+            </property>
+
+        </configuration>
+    </global>
+
+    <start to="save_community_map" />
+
+    <action name="save_community_map">
+        <java>
+            <main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
+            <arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
+            <arg>--nameNode</arg><arg>${nameNode}</arg>
+            <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
+        </java>
+        <ok to="find_results_for_country"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="find_results_for_country">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table publication </name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultsRelatedToCountry</class>
+            <jar>dump-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/resultsInCountry</arg>
+            <arg>--country</arg><arg>${country}</arg>
+        </spark>
+        <ok to="fork_select"/>
+        <error to="Kill"/>
+    </action>
+
+
+    <fork name="fork_select">
+        <path start="select_publication"/>
+        <path start="select_dataset"/>
+        <path start="select_orp"/>
+        <path start="select_software"/>
+    </fork>
+
+    <action name="select_publication">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table publication </name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultWithCountry</class>
+            <jar>dump-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=7G
+                --executor-cores=2
+                --driver-memory=7G
+                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
+            <arg>--outputPath</arg><arg>${outputPath}</arg>
+            <arg>--resultWithCountry</arg><arg>${workingDir}/resultsInCountry</arg>
+            <arg>--resultType</arg><arg>publication</arg>
+        </spark>
+        <ok to="join_dump"/>
+        <error to="Kill"/>
+    </action>
+
+
+    <action name="select_dataset">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table dataset </name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultWithCountry</class>
+            <jar>dump-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
+            <arg>--outputPath</arg><arg>${outputPath}</arg>
+            <arg>--resultType</arg><arg>dataset</arg>
+            <arg>--resultWithCountry</arg><arg>${workingDir}/resultsInCountry</arg>
+        </spark>
+        <ok to="join_dump"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="select_orp">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table ORP </name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultWithCountry</class>
+            <jar>dump-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
+            <arg>--outputPath</arg><arg>${outputPath}</arg>
+            <arg>--resultType</arg><arg>otherresearchproduct</arg>
+            <arg>--resultWithCountry</arg><arg>${workingDir}/resultsInCountry</arg>
+        </spark>
+        <ok to="join_dump"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="select_software">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table software </name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultWithCountry</class>
+            <jar>dump-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
+            <arg>--outputPath</arg><arg>${outputPath}</arg>
+            <arg>--resultType</arg><arg>software</arg>
+            <arg>--resultWithCountry</arg><arg>${workingDir}/resultsInCountry</arg>
+        </spark>
+        <ok to="join_dump"/>
+        <error to="Kill"/>
+    </action>
+
+    <join name="join_dump" to="select_subset"/>
+
+    <action name="select_subset">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Select valid table relation </name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.subset.SparkSelectSubset</class>
+            <jar>dump-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+                --conf spark.sql.shuffle.partitions=3840
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
+            <arg>--outputPath</arg><arg>${outputPath}</arg>
+            <arg>--removeSet</arg><arg>${removeSet}</arg>
+
+        </spark>
+        <ok to="fork_dump_community"/>
+        <error to="Kill"/>
+    </action>
+
+
+    <fork name="fork_dump_community">
+        <path start="dump_publication"/>
+        <path start="dump_dataset"/>
+        <path start="dump_orp"/>
+        <path start="dump_software"/>
+    </fork>
+
+    <action name="dump_publication">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table publication for community/funder related products</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
+            <jar>dump-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${outputPath}/original/publication</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/dump/publication</arg>
+            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
+        </spark>
+        <ok to="join_dump_comm"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="dump_dataset">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table dataset for community/funder related products</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
+            <jar>dump-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${outputPath}/original/dataset</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/dump/dataset</arg>
+            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
+        </spark>
+        <ok to="join_dump_comm"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="dump_orp">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table ORP for community related products</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
+            <jar>dump-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${outputPath}/original/otherresearchproduct</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
+            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
+        </spark>
+        <ok to="join_dump_comm"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="dump_software">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Dump table software for community related products</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
+            <jar>dump-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${outputPath}/original/software</arg>
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/dump/software</arg>
+            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
+        </spark>
+        <ok to="join_dump_comm"/>
+        <error to="Kill"/>
+    </action>
+
+    <join name="join_dump_comm" to="prepareResultProject"/>
+
+    <action name="prepareResultProject">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Prepare association result subset of project info</name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
+            <jar>dump-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${outputPath}/original</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
+        </spark>
+        <ok to="fork_extendWithProject"/>
+        <error to="Kill"/>
+    </action>
+
+
+
+<fork name="fork_extendWithProject">
+<path start="extend_publication"/>
+<path start="extend_dataset"/>
+<path start="extend_orp"/>
+<path start="extend_software"/>
+</fork>
+
+<action name="extend_publication">
+<spark xmlns="uri:oozie:spark-action:0.2">
+    <master>yarn</master>
+    <mode>cluster</mode>
+    <name>Extend dumped publications with information about project</name>
+    <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
+    <jar>dump-${projectVersion}.jar</jar>
+    <spark-opts>
+        --executor-memory=${sparkExecutorMemory}
+        --executor-cores=${sparkExecutorCores}
+        --driver-memory=${sparkDriverMemory}
+        --conf spark.extraListeners=${spark2ExtraListeners}
+        --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+        --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+        --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+        --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+    </spark-opts>
+    <arg>--sourcePath</arg><arg>${workingDir}/dump/publication</arg>
+    <arg>--outputPath</arg><arg>${outputPath}/dump/publication</arg>
+    <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
+</spark>
+<ok to="join_extend"/>
+<error to="Kill"/>
+</action>
+
+<action name="extend_dataset">
+<spark xmlns="uri:oozie:spark-action:0.2">
+    <master>yarn</master>
+    <mode>cluster</mode>
+    <name>Extend dumped dataset with information about project</name>
+    <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
+    <jar>dump-${projectVersion}.jar</jar>
+    <spark-opts>
+        --executor-memory=${sparkExecutorMemory}
+        --executor-cores=${sparkExecutorCores}
+        --driver-memory=${sparkDriverMemory}
+        --conf spark.extraListeners=${spark2ExtraListeners}
+        --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+        --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+        --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+        --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+    </spark-opts>
+    <arg>--sourcePath</arg><arg>${workingDir}/dump/dataset</arg>
+    <arg>--outputPath</arg><arg>${outputPath}/dump/dataset</arg>
+    <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
+</spark>
+<ok to="join_extend"/>
+<error to="Kill"/>
+</action>
+
+<action name="extend_orp">
+<spark xmlns="uri:oozie:spark-action:0.2">
+    <master>yarn</master>
+    <mode>cluster</mode>
+    <name>Extend dumped ORP with information about project</name>
+    <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
+    <jar>dump-${projectVersion}.jar</jar>
+    <spark-opts>
+        --executor-memory=${sparkExecutorMemory}
+        --executor-cores=${sparkExecutorCores}
+        --driver-memory=${sparkDriverMemory}
+        --conf spark.extraListeners=${spark2ExtraListeners}
+        --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+        --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+        --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+        --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+    </spark-opts>
+    <arg>--sourcePath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
+    <arg>--outputPath</arg><arg>${outputPath}/dump/otherresearchproduct</arg>
+    <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
+</spark>
+<ok to="join_extend"/>
+<error to="Kill"/>
+</action>
+
+<action name="extend_software">
+<spark xmlns="uri:oozie:spark-action:0.2">
+    <master>yarn</master>
+    <mode>cluster</mode>
+    <name>Extend dumped software with information about project</name>
+    <class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
+    <jar>dump-${projectVersion}.jar</jar>
+    <spark-opts>
+        --executor-memory=${sparkExecutorMemory}
+        --executor-cores=${sparkExecutorCores}
+        --driver-memory=${sparkDriverMemory}
+        --conf spark.extraListeners=${spark2ExtraListeners}
+        --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+        --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+        --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+        --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+    </spark-opts>
+    <arg>--sourcePath</arg><arg>${workingDir}/dump/software</arg>
+    <arg>--outputPath</arg><arg>${outputPath}/dump/software</arg>
+    <arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
+</spark>
+<ok to="join_extend"/>
+<error to="Kill"/>
+</action>
+
+
+<join name="join_extend" to="End"/>
+    
+
+    <kill name="Kill">
+        <message>Sub-workflow dump complete failed with error message ${wf:errorMessage()}
+        </message>
+    </kill>
+
+    <end name="End" />
+</workflow-app>
--- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_country_parameters.json
+++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_country_parameters.json
@ -0,0 +1,42 @@
+
+[
+
+  {
+    "paramName":"s",
+    "paramLongName":"sourcePath",
+    "paramDescription": "the path of the sequencial file to read",
+    "paramRequired": true
+  },
+  {
+    "paramName": "out",
+    "paramLongName": "outputPath",
+    "paramDescription": "the path used to store temporary output files",
+    "paramRequired": true
+  },
+  {
+    "paramName": "ssm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "true if the spark session is managed, false otherwise",
+    "paramRequired": false
+  },
+  {
+    "paramName": "c",
+    "paramLongName": "resultWithCountry",
+    "paramDescription": "the path of the id of results associated to a given country",
+    "paramRequired": true
+  },
+  {
+  "paramName":"tn",
+  "paramLongName":"resultTableName",
+"paramDescription": "the name of the result table we are currently working on",
+"paramRequired": true
+},
+{
+"paramName":"rt",
+"paramLongName":"resultType",
+"paramDescription": "",
+"paramRequired": true
+}
+]
+
+
--- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_related_country_parameters.json
+++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_related_country_parameters.json
@ -0,0 +1,29 @@
+[
+
+  {
+    "paramName":"s",
+    "paramLongName":"sourcePath",
+    "paramDescription": "the path of the sequencial file to read",
+    "paramRequired": true
+  },
+  {
+    "paramName": "out",
+    "paramLongName": "outputPath",
+    "paramDescription": "the path used to store temporary output files",
+    "paramRequired": true
+  },
+  {
+    "paramName": "ssm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "true if the spark session is managed, false otherwise",
+    "paramRequired": false
+  },
+  {
+    "paramName": "c",
+    "paramLongName": "country",
+    "paramDescription": "the path of the association result projectlist",
+    "paramRequired": true
+  }
+]
+
+