added shaded libs module

2020-04-21 16:03:51 +02:00 · 2020-04-21 16:03:51 +02:00 · 90c768dde6
parent e1848b7603
commit 90c768dde6
32 changed files with 2764 additions and 271 deletions
--- a/dhp-build/dhp-shaded-libs/pom.xml
+++ b/dhp-build/dhp-shaded-libs/pom.xml
@ -0,0 +1,52 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>dhp-build</artifactId>
+        <groupId>eu.dnetlib.dhp</groupId>
+        <version>1.1.7-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>dhp-shaded-libs</artifactId>
+<dependencies>
+    <dependency>
+        <groupId>com.google.guava</groupId>
+        <artifactId>guava</artifactId>
+        <version>23.3-jre</version>
+    </dependency>
+</dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>3.2.3</version>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <relocations>
+                                <relocation>
+                                    <pattern>com.google.guava</pattern>
+                                    <shadedPattern>shaded.com.google.guava</shadedPattern>
+                                </relocation>
+                            </relocations>
+                            <artifactSet>
+                                <includes>
+                                    <include>*:*</include>
+                                </includes>
+                            </artifactSet>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
--- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/SparkBulkTagJob.java
+++ b/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/SparkBulkTagJob.java
@ -1,7 +1,5 @@
 package eu.dnetlib.dhp;

-import java.io.File;
-
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.gson.Gson;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
@ -10,21 +8,23 @@ import eu.dnetlib.dhp.community.ProtoMap;
 import eu.dnetlib.dhp.community.QueryInformationSystem;
 import eu.dnetlib.dhp.community.ResultTagger;
 import eu.dnetlib.dhp.schema.oaf.*;
+import java.io.File;
 import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.io.Text;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.SparkSession;

-
-
 public class SparkBulkTagJob {

    public static void main(String[] args) throws Exception {

-        final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkBulkTagJob.class.getResourceAsStream("/eu/dnetlib/dhp/input_bulktag_parameters.json")));
+        final ArgumentApplicationParser parser =
+                new ArgumentApplicationParser(
+                        IOUtils.toString(
+                                SparkBulkTagJob.class.getResourceAsStream(
+                                        "/eu/dnetlib/dhp/input_bulktag_parameters.json")));
        parser.parseArgument(args);
-        final SparkSession spark = SparkSession
-                .builder()
+        final SparkSession spark =
+                SparkSession.builder()
                        .appName(SparkBulkTagJob.class.getSimpleName())
                        .master(parser.get("master"))
                        .enableHiveSupport()
@ -35,7 +35,9 @@ public class SparkBulkTagJob {
        final String outputPath = "/tmp/provision/bulktagging";

        final ResultTagger resultTagger = new ResultTagger();
-        ProtoMap protoMappingParams = new Gson().fromJson(parser.get("mappingProto"),ProtoMap.class);;
+        ProtoMap protoMappingParams =
+                new Gson().fromJson(parser.get("mappingProto"), ProtoMap.class);
+        ;

        File directory = new File(outputPath);

@ -43,31 +45,28 @@ public class SparkBulkTagJob {
            directory.mkdirs();
        }

-        CommunityConfiguration cc = QueryInformationSystem.getCommunityConfiguration(parser.get("isLookupUrl"));
+        CommunityConfiguration cc =
+                QueryInformationSystem.getCommunityConfiguration(parser.get("isLookupUrl"));

-
-        sc.sequenceFile(inputPath + "/publication", Text.class, Text.class)
-                .map(item -> new ObjectMapper().readValue(item._2().toString(), Publication.class))
+        sc.textFile(inputPath + "/publication")
+                .map(item -> new ObjectMapper().readValue(item, Publication.class))
                .map(p -> resultTagger.enrichContextCriteria(p, cc, protoMappingParams))
                .map(p -> new ObjectMapper().writeValueAsString(p))
-                .saveAsTextFile(outputPath+"/publication");
-        sc.sequenceFile(inputPath + "/dataset", Text.class, Text.class)
-                .map(item -> new ObjectMapper().readValue(item._2().toString(), Dataset.class))
+                .saveAsTextFile(outputPath + "/publication");
+        sc.textFile(inputPath + "/dataset")
+                .map(item -> new ObjectMapper().readValue(item, Dataset.class))
                .map(p -> resultTagger.enrichContextCriteria(p, cc, protoMappingParams))
                .map(p -> new ObjectMapper().writeValueAsString(p))
-                .saveAsTextFile(outputPath+"/dataset");
-        sc.sequenceFile(inputPath + "/software", Text.class, Text.class)
-                .map(item -> new ObjectMapper().readValue(item._2().toString(), Software.class))
+                .saveAsTextFile(outputPath + "/dataset");
+        sc.textFile(inputPath + "/software")
+                .map(item -> new ObjectMapper().readValue(item, Software.class))
                .map(p -> resultTagger.enrichContextCriteria(p, cc, protoMappingParams))
                .map(p -> new ObjectMapper().writeValueAsString(p))
-                .saveAsTextFile(outputPath+"/software");
-        sc.sequenceFile(inputPath + "/otherresearchproduct", Text.class, Text.class)
-                .map(item -> new ObjectMapper().readValue(item._2().toString(), OtherResearchProduct.class))
+                .saveAsTextFile(outputPath + "/software");
+        sc.textFile(inputPath + "/otherresearchproduct")
+                .map(item -> new ObjectMapper().readValue(item, OtherResearchProduct.class))
                .map(p -> resultTagger.enrichContextCriteria(p, cc, protoMappingParams))
                .map(p -> new ObjectMapper().writeValueAsString(p))
-                .saveAsTextFile(outputPath+"/otherresearchproduct");
-
-
-
+                .saveAsTextFile(outputPath + "/otherresearchproduct");
    }
 }
--- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/SparkBulkTagJob2.java
+++ b/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/SparkBulkTagJob2.java
@ -0,0 +1,161 @@
+package eu.dnetlib.dhp;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.gson.Gson;
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.community.*;
+import eu.dnetlib.dhp.schema.oaf.*;
+import java.util.Optional;
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class SparkBulkTagJob2 {
+
+    private static final Logger log = LoggerFactory.getLogger(SparkBulkTagJob2.class);
+
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    public static void main(String[] args) throws Exception {
+        String jsonConfiguration =
+                IOUtils.toString(
+                        SparkBulkTagJob2.class.getResourceAsStream(
+                                "/eu/dnetlib/dhp/input_bulktag_parameters.json"));
+
+        final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+
+        parser.parseArgument(args);
+
+        Boolean isSparkSessionManaged =
+                Optional.ofNullable(parser.get("isSparkSessionManaged"))
+                        .map(Boolean::valueOf)
+                        .orElse(Boolean.TRUE);
+        log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+        Boolean isTest =
+                Optional.ofNullable(parser.get("isTest"))
+                        .map(Boolean::valueOf)
+                        .orElse(Boolean.FALSE);
+        log.info("isTest: {} ", isTest);
+
+        final String inputPath = parser.get("sourcePath");
+        log.info("inputPath: {}", inputPath);
+
+        final String outputPath = parser.get("outputPath");
+        log.info("outputPath: {}", outputPath);
+
+        ProtoMap protoMappingParams = new Gson().fromJson(parser.get("protoMap"), ProtoMap.class);
+        ;
+        log.info("protoMap: {}", new Gson().toJson(protoMappingParams));
+
+        final String resultClassName = parser.get("resultTableName");
+        log.info("resultTableName: {}", resultClassName);
+
+        final Boolean saveGraph =
+                Optional.ofNullable(parser.get("saveGraph"))
+                        .map(Boolean::valueOf)
+                        .orElse(Boolean.TRUE);
+        log.info("saveGraph: {}", saveGraph);
+
+        Class<? extends Result> resultClazz =
+                (Class<? extends Result>) Class.forName(resultClassName);
+
+        SparkConf conf = new SparkConf();
+        CommunityConfiguration cc;
+
+        String taggingConf = parser.get("taggingConf");
+
+        if (isTest) {
+            cc = CommunityConfigurationFactory.fromJson(taggingConf);
+        } else {
+            cc = QueryInformationSystem.getCommunityConfiguration(parser.get("isLookupUrl"));
+        }
+
+        runWithSparkSession(
+                conf,
+                isSparkSessionManaged,
+                spark -> {
+                    execBulkTag(spark, inputPath, outputPath, protoMappingParams, resultClazz, cc);
+                });
+
+        //        runWithSparkSession(conf, isSparkSessionManaged,
+        //                spark -> {
+        //                    if(isTest(parser)) {
+        //                        removeOutputDir(spark, outputPath);
+        //                    }
+        //                    if(saveGraph)
+        //                        execPropagation(spark, possibleUpdates, inputPath, outputPath,
+        // resultClazz);
+        //                });
+        //
+        //
+        //
+        //
+        //
+        //
+        //        sc.textFile(inputPath + "/publication")
+        //                .map(item -> new ObjectMapper().readValue(item, Publication.class))
+        //                .map(p -> resultTagger.enrichContextCriteria(p, cc, protoMappingParams))
+        //                .map(p -> new ObjectMapper().writeValueAsString(p))
+        //                .saveAsTextFile(outputPath+"/publication");
+        //        sc.textFile(inputPath + "/dataset")
+        //                .map(item -> new ObjectMapper().readValue(item, Dataset.class))
+        //                .map(p -> resultTagger.enrichContextCriteria(p, cc, protoMappingParams))
+        //                .map(p -> new ObjectMapper().writeValueAsString(p))
+        //                .saveAsTextFile(outputPath+"/dataset");
+        //        sc.textFile(inputPath + "/software")
+        //                .map(item -> new ObjectMapper().readValue(item, Software.class))
+        //                .map(p -> resultTagger.enrichContextCriteria(p, cc, protoMappingParams))
+        //                .map(p -> new ObjectMapper().writeValueAsString(p))
+        //                .saveAsTextFile(outputPath+"/software");
+        //        sc.textFile(inputPath + "/otherresearchproduct")
+        //                .map(item -> new ObjectMapper().readValue(item,
+        // OtherResearchProduct.class))
+        //                .map(p -> resultTagger.enrichContextCriteria(p, cc, protoMappingParams))
+        //                .map(p -> new ObjectMapper().writeValueAsString(p))
+        //                .saveAsTextFile(outputPath+"/otherresearchproduct");
+        //
+
+    }
+
+    private static <R extends Result> void execBulkTag(
+            SparkSession spark,
+            String inputPath,
+            String outputPath,
+            ProtoMap protoMappingParams,
+            Class<R> resultClazz,
+            CommunityConfiguration communityConfiguration) {
+
+        ResultTagger resultTagger = new ResultTagger();
+        Dataset<R> result = readPathEntity(spark, inputPath, resultClazz);
+        result.map(
+                        value ->
+                                resultTagger.enrichContextCriteria(
+                                        value, communityConfiguration, protoMappingParams),
+                        Encoders.bean(resultClazz))
+                .toJSON()
+                .write()
+                .mode(SaveMode.Overwrite)
+                .option("compression", "gzip")
+                .text(outputPath);
+    }
+
+    private static <R extends Result> org.apache.spark.sql.Dataset<R> readPathEntity(
+            SparkSession spark, String inputEntityPath, Class<R> resultClazz) {
+
+        return spark.read()
+                .textFile(inputEntityPath)
+                .map(
+                        (MapFunction<String, R>)
+                                value -> OBJECT_MAPPER.readValue(value, resultClazz),
+                        Encoders.bean(resultClazz));
+    }
+}
--- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Community.java
+++ b/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Community.java
@ -1,16 +1,14 @@
 package eu.dnetlib.dhp.community;

 import com.google.gson.Gson;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;

-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Created by miriam on 01/08/2018.
- */
-public class Community {
+/** Created by miriam on 01/08/2018. */
+public class Community implements Serializable {

    private static final Log log = LogFactory.getLog(Community.class);

@ -19,14 +17,15 @@ public class Community {
    private List<Datasource> datasources = new ArrayList<>();
    private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>();

-
    public String toJson() {
        final Gson g = new Gson();
        return g.toJson(this);
    }

    public boolean isValid() {
-        return !getSubjects().isEmpty() || !getDatasources().isEmpty() || !getZenodoCommunities().isEmpty();
+        return !getSubjects().isEmpty()
+                || !getDatasources().isEmpty()
+                || !getZenodoCommunities().isEmpty();
    }

    public String getId() {
@ -60,5 +59,4 @@ public class Community {
    public void setZenodoCommunities(List<ZenodoCommunity> zenodoCommunities) {
        this.zenodoCommunities = zenodoCommunities;
    }
-
 }
--- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/CommunityConfiguration.java
+++ b/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/CommunityConfiguration.java
@ -3,38 +3,58 @@ package eu.dnetlib.dhp.community;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.gson.Gson;
-
 import com.google.gson.GsonBuilder;
-
 import eu.dnetlib.dhp.selectioncriteria.InterfaceAdapter;
 import eu.dnetlib.dhp.selectioncriteria.Selection;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-
+import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;

-/**
- * Created by miriam on 02/08/2018.
- */
-public class CommunityConfiguration {
+/** Created by miriam on 02/08/2018. */
+public class CommunityConfiguration implements Serializable {

    private static final Log log = LogFactory.getLog(CommunityConfiguration.class);

+    private Map<String, Community> communities;

-    private Map<String,Community> communities;
+    // map subject -> communityid
+    private Map<String, List<Pair<String, SelectionConstraints>>> subjectMap = new HashMap<>();
+    // map datasourceid -> communityid
+    private Map<String, List<Pair<String, SelectionConstraints>>> datasourceMap = new HashMap<>();
+    // map zenodocommunityid -> communityid
+    private Map<String, List<Pair<String, SelectionConstraints>>> zenodocommunityMap =
+            new HashMap<>();

+    public Map<String, List<Pair<String, SelectionConstraints>>> getSubjectMap() {
+        return subjectMap;
+    }

-    //map subject -> communityid
-    private transient Map<String,List<Pair<String,SelectionConstraints>>> subjectMap = new HashMap<>();
-    //map datasourceid -> communityid
-    private transient Map<String,List<Pair<String,SelectionConstraints>>> datasourceMap = new HashMap<>();
-    //map zenodocommunityid -> communityid
-    private transient Map<String,List<Pair<String,SelectionConstraints>>> zenodocommunityMap = new HashMap<>();
+    public void setSubjectMap(Map<String, List<Pair<String, SelectionConstraints>>> subjectMap) {
+        this.subjectMap = subjectMap;
+    }
+
+    public Map<String, List<Pair<String, SelectionConstraints>>> getDatasourceMap() {
+        return datasourceMap;
+    }
+
+    public void setDatasourceMap(
+            Map<String, List<Pair<String, SelectionConstraints>>> datasourceMap) {
+        this.datasourceMap = datasourceMap;
+    }
+
+    public Map<String, List<Pair<String, SelectionConstraints>>> getZenodocommunityMap() {
+        return zenodocommunityMap;
+    }
+
+    public void setZenodocommunityMap(
+            Map<String, List<Pair<String, SelectionConstraints>>> zenodocommunityMap) {
+        this.zenodocommunityMap = zenodocommunityMap;
+    }

    CommunityConfiguration(final Map<String, Community> communities) {
        this.communities = communities;
@ -53,65 +73,67 @@ public class CommunityConfiguration {
            zenodocommunityMap = Maps.newHashMap();
        }

-
-        for(Community c : getCommunities().values()) {
-            //get subjects
+        for (Community c : getCommunities().values()) {
+            // get subjects
            final String id = c.getId();
-            for(String sbj : c.getSubjects()){
-                Pair<String,SelectionConstraints> p = new Pair<>(id,new SelectionConstraints());
-                add(sbj.toLowerCase().trim() , p, subjectMap);
+            for (String sbj : c.getSubjects()) {
+                Pair<String, SelectionConstraints> p = new Pair<>(id, new SelectionConstraints());
+                add(sbj.toLowerCase().trim(), p, subjectMap);
            }
-            //get datasources
-            for(Datasource d: c.getDatasources()){
+            // get datasources
+            for (Datasource d : c.getDatasources()) {

-                add(d.getOpenaireId(),new Pair<>(id,d.getSelectionConstraints()),datasourceMap);
+                add(d.getOpenaireId(), new Pair<>(id, d.getSelectionConstraints()), datasourceMap);
            }
-            //get zenodo communities
-            for(ZenodoCommunity zc : c.getZenodoCommunities()){
-                add(zc.getZenodoCommunityId(),new Pair<>(id,zc.getSelCriteria()),zenodocommunityMap);
+            // get zenodo communities
+            for (ZenodoCommunity zc : c.getZenodoCommunities()) {
+                add(
+                        zc.getZenodoCommunityId(),
+                        new Pair<>(id, zc.getSelCriteria()),
+                        zenodocommunityMap);
            }
-
-
        }
    }

-    private void add(String key, Pair<String,SelectionConstraints> value, Map<String,List<Pair<String,SelectionConstraints>>> map){
-        List<Pair<String,SelectionConstraints>> values = map.get(key);
+    private void add(
+            String key,
+            Pair<String, SelectionConstraints> value,
+            Map<String, List<Pair<String, SelectionConstraints>>> map) {
+        List<Pair<String, SelectionConstraints>> values = map.get(key);

-        if (values == null){
+        if (values == null) {
            values = new ArrayList<>();
-            map.put(key,values);
+            map.put(key, values);
        }
        values.add(value);
    }

-    public List<Pair<String,SelectionConstraints>> getCommunityForSubject(String sbj){
+    public List<Pair<String, SelectionConstraints>> getCommunityForSubject(String sbj) {
        return subjectMap.get(sbj);
    }

-    public List<Pair<String,SelectionConstraints>> getCommunityForDatasource(String dts){
+    public List<Pair<String, SelectionConstraints>> getCommunityForDatasource(String dts) {
        return datasourceMap.get(dts);
    }

+    public List<String> getCommunityForDatasource(
+            final String dts, final Map<String, List<String>> param) {
+        List<Pair<String, SelectionConstraints>> lp = datasourceMap.get(dts);
+        if (lp == null) return Lists.newArrayList();

-    public List<String> getCommunityForDatasource(final String dts, final Map<String, List<String>>  param) {
-        List<Pair<String,SelectionConstraints>> lp = datasourceMap.get(dts);
-        if (lp==null)
-            return Lists.newArrayList();
-
-        return lp.stream().map(p -> {
-            if (p.getSnd() == null)
-                return p.getFst();
+        return lp.stream()
+                .map(
+                        p -> {
+                            if (p.getSnd() == null) return p.getFst();
                            if (((SelectionConstraints) p.getSnd()).verifyCriteria(param))
                                return p.getFst();
-            else
-                return null;
-        }).filter(st->(st!=null)).collect(Collectors.toList());
-
-
+                            else return null;
+                        })
+                .filter(st -> (st != null))
+                .collect(Collectors.toList());
    }

-    public List<Pair<String,SelectionConstraints>> getCommunityForZenodoCommunity(String zc){
+    public List<Pair<String, SelectionConstraints>> getCommunityForZenodoCommunity(String zc) {
        return zenodocommunityMap.get(zc);
    }

@ -125,7 +147,7 @@ public class CommunityConfiguration {
        return getContextIds(datasourceMap.get(value.toLowerCase()));
    }

-    public List<String> getCommunityForZenodoCommunityValue(String value){
+    public List<String> getCommunityForZenodoCommunityValue(String value) {

        return getContextIds(zenodocommunityMap.get(value.toLowerCase()));
    }
@ -137,7 +159,6 @@ public class CommunityConfiguration {
        return Lists.newArrayList();
    }

-
    public Map<String, Community> getCommunities() {
        return communities;
    }
@ -158,7 +179,7 @@ public class CommunityConfiguration {
        return communities.keySet().size();
    }

-    public Community getCommunityById(String id){
+    public Community getCommunityById(String id) {
        return communities.get(id);
    }

--- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Datasource.java
+++ b/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Datasource.java
@ -1,24 +1,20 @@
 package eu.dnetlib.dhp.community;

-
 import com.google.gson.Gson;
-
 import eu.dnetlib.dhp.selectioncriteria.VerbResolver;
+import java.io.Serializable;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.dom4j.Node;

-/**
- * Created by miriam on 01/08/2018.
- */
-public class Datasource {
+/** Created by miriam on 01/08/2018. */
+public class Datasource implements Serializable {
    private static final Log log = LogFactory.getLog(Datasource.class);

    private String openaireId;

    private SelectionConstraints selectionConstraints;

-
    public SelectionConstraints getSelCriteria() {
        return selectionConstraints;
    }
@ -43,23 +39,19 @@ public class Datasource {
        this.openaireId = openaireId;
    }

-    private void setSelCriteria(String json, VerbResolver resolver){
+    private void setSelCriteria(String json, VerbResolver resolver) {
        log.info("Selection constraints for datasource = " + json);
        selectionConstraints = new Gson().fromJson(json, SelectionConstraints.class);

        selectionConstraints.setSelection(resolver);
    }

-    public void setSelCriteria(Node n, VerbResolver resolver){
-        try{
-            setSelCriteria(n.getText(),resolver);
-        }catch(Exception e) {
+    public void setSelCriteria(Node n, VerbResolver resolver) {
+        try {
+            setSelCriteria(n.getText(), resolver);
+        } catch (Exception e) {
            log.info("not set selection criteria... ");
-            selectionConstraints =null;
+            selectionConstraints = null;
        }
-
    }
-
-
-
 }
--- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Pair.java
+++ b/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/Pair.java
@ -1,11 +1,10 @@
 package eu.dnetlib.dhp.community;

 import com.google.gson.Gson;
+import java.io.Serializable;

-/**
- * Created by miriam on 03/08/2018.
- */
-public class Pair<A,B> {
+/** Created by miriam on 03/08/2018. */
+public class Pair<A, B> implements Serializable {
    private A fst;
    private B snd;

@ -27,12 +26,12 @@ public class Pair<A,B> {
        return this;
    }

-    public Pair(A a, B b){
+    public Pair(A a, B b) {
        fst = a;
        snd = b;
    }

-    public String toJson(){
+    public String toJson() {
        return new Gson().toJson(this);
    }
 }
--- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/ProtoMap.java
+++ b/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/ProtoMap.java
@ -1,10 +1,11 @@
 package eu.dnetlib.dhp.community;

+import java.io.Serializable;
 import java.util.HashMap;

-public class ProtoMap extends HashMap<String,String> {
+public class ProtoMap extends HashMap<String, String> implements Serializable {

-    public ProtoMap(){
+    public ProtoMap() {
        super();
    }
 }
--- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/ResultTagger.java
+++ b/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/ResultTagger.java
@ -1,42 +1,40 @@
 package eu.dnetlib.dhp.community;
+
+import static eu.dnetlib.dhp.community.TagginConstants.*;
+
 import com.google.gson.Gson;
 import com.jayway.jsonpath.DocumentContext;
 import com.jayway.jsonpath.JsonPath;
 import eu.dnetlib.dhp.schema.oaf.*;
-import org.apache.commons.lang3.StringUtils;
-
+import java.io.Serializable;
 import java.util.*;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
+import org.apache.commons.lang3.StringUtils;

-import static eu.dnetlib.dhp.community.TagginConstants.*;
-
-
-/**
- * Created by miriam on 02/08/2018.
- */
-public class ResultTagger {
-
+/** Created by miriam on 02/08/2018. */
+public class ResultTagger implements Serializable {

    private String trust = "0.8";

-
-    private boolean clearContext(Result result){
+    private boolean clearContext(Result result) {
        int tmp = result.getContext().size();
-        List<Context> clist = result.getContext().stream()
-                .filter(c -> (!c.getId().contains(ZENODO_COMMUNITY_INDICATOR))).collect(Collectors.toList());
+        List<Context> clist =
+                result.getContext().stream()
+                        .filter(c -> (!c.getId().contains(ZENODO_COMMUNITY_INDICATOR)))
+                        .collect(Collectors.toList());
        result.setContext(clist);
        return (tmp != clist.size());
    }

-    private Map<String,List<String>> getParamMap(final Result result, Map<String,String> params) {
-        Map<String,List<String>> param = new HashMap<>();
-        String json = new Gson().toJson(result,Result.class);
+    private Map<String, List<String>> getParamMap(final Result result, Map<String, String> params) {
+        Map<String, List<String>> param = new HashMap<>();
+        String json = new Gson().toJson(result, Result.class);
        DocumentContext jsonContext = JsonPath.parse(json);
-        if (params == null){
+        if (params == null) {
            params = new HashMap<>();
        }
-        for(String key : params.keySet()) {
+        for (String key : params.keySet()) {
            try {
                param.put(key, jsonContext.read(params.get(key)));
            } catch (com.jayway.jsonpath.PathNotFoundException e) {
@ -45,23 +43,26 @@ public class ResultTagger {
            }
        }
        return param;
-
    }

+    public <R extends Result> R enrichContextCriteria(
+            final R result, final CommunityConfiguration conf, final Map<String, String> criteria) {

-    public Result enrichContextCriteria(final Result result, final CommunityConfiguration conf, final Map<String,String> criteria) {
+        //    }
+        //    public Result enrichContextCriteria(final Result result, final CommunityConfiguration
+        // conf, final Map<String,String> criteria) {
        final Map<String, List<String>> param = getParamMap(result, criteria);

-        //Verify if the entity is deletedbyinference. In case verify if to clean the context list from all the zenodo communities
-        if(result.getDataInfo().getDeletedbyinference()){
-            return result;
+        // Verify if the entity is deletedbyinference. In case verify if to clean the context list
+        // from all the zenodo communities
+        if (result.getDataInfo().getDeletedbyinference()) {
+            if (clearContext(result)) return result;
        }

-        //communities contains all the communities to be added as context for the result
+        // communities contains all the communities to be added as context for the result
        final Set<String> communities = new HashSet<>();

-
-        //tagging for Subject
+        // tagging for Subject
        final Set<String> subjects = new HashSet<>();
        result.getSubject().stream()
                .map(subject -> subject.getValue())
@ -73,79 +74,104 @@ public class ResultTagger {

        communities.addAll(subjects);

-
-        //Tagging for datasource
+        // Tagging for datasource
        final Set<String> datasources = new HashSet<>();
        final Set<String> tmp = new HashSet<>();

-        for(Instance i : result.getInstance()){
-            tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(),"|"));
-            tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(),"|"));
+        for (Instance i : result.getInstance()) {
+            tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|"));
+            tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|"));
        }

-        result.getInstance()
-                .stream()
+        result.getInstance().stream()
                .map(i -> new Pair<>(i.getCollectedfrom().getKey(), i.getHostedby().getKey()))
                .flatMap(p -> Stream.of(p.getFst(), p.getSnd()))
                .map(s -> StringUtils.substringAfter(s, "|"))
                .collect(Collectors.toCollection(HashSet::new))
-                .forEach(dsId -> datasources.addAll(conf.getCommunityForDatasource(dsId,param)));
+                .forEach(dsId -> datasources.addAll(conf.getCommunityForDatasource(dsId, param)));

        communities.addAll(datasources);

        /*Tagging for Zenodo Communities*/
        final Set<String> czenodo = new HashSet<>();
-        //final ResultProtos.Result.Metadata.Builder mBuilder = builder.getEntityBuilder().getResultBuilder().getMetadataBuilder();
-        result.getContext()
-                .stream()
+        result.getContext().stream()
                .filter(c -> c.getId().contains(ZENODO_COMMUNITY_INDICATOR))
                .collect(Collectors.toList())
-                .forEach(c->czenodo.addAll(conf.getCommunityForZenodoCommunityValue(c.getId().substring(c.getId().lastIndexOf("/")+1).trim())));
+                .forEach(
+                        c ->
+                                czenodo.addAll(
+                                        conf.getCommunityForZenodoCommunityValue(
+                                                c.getId()
+                                                        .substring(c.getId().lastIndexOf("/") + 1)
+                                                        .trim())));

        communities.addAll(czenodo);

        clearContext(result);

        /*Verify if there is something to bulktag*/
-        if(communities.isEmpty()){
+        if (communities.isEmpty()) {
            return result;
-
        }

-        result.getContext()
-                .stream()
-                .map(c -> {
-                    if(communities.contains(c.getId())){
+        result.getContext().stream()
+                .map(
+                        c -> {
+                            if (communities.contains(c.getId())) {
                                List<DataInfo> dataInfoList = c.getDataInfo();
                                if (subjects.contains(c.getId()))
-                            dataInfoList.add(getDataInfo(BULKTAG_DATA_INFO_TYPE, CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT));
+                                    dataInfoList.add(
+                                            getDataInfo(
+                                                    BULKTAG_DATA_INFO_TYPE,
+                                                    CLASS_ID_SUBJECT,
+                                                    CLASS_NAME_BULKTAG_SUBJECT));
                                if (datasources.contains(c.getId()))
-                            dataInfoList.add(getDataInfo(BULKTAG_DATA_INFO_TYPE, CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE));
+                                    dataInfoList.add(
+                                            getDataInfo(
+                                                    BULKTAG_DATA_INFO_TYPE,
+                                                    CLASS_ID_DATASOURCE,
+                                                    CLASS_NAME_BULKTAG_DATASOURCE));
                                if (czenodo.contains(c.getId()))
-                            dataInfoList.add(getDataInfo(BULKTAG_DATA_INFO_TYPE, CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO));
+                                    dataInfoList.add(
+                                            getDataInfo(
+                                                    BULKTAG_DATA_INFO_TYPE,
+                                                    CLASS_ID_CZENODO,
+                                                    CLASS_NAME_BULKTAG_ZENODO));
                            }
                            return c;
                        })
                .collect(Collectors.toList());

+        communities.removeAll(
+                result.getContext().stream().map(c -> c.getId()).collect(Collectors.toSet()));

-        communities.removeAll(result.getContext().stream().map(c -> c.getId()).collect(Collectors.toSet()));
+        if (communities.isEmpty()) return result;

-        if(communities.isEmpty())
-            return result;
-
-        List<Context> toaddcontext = communities
-                .stream()
-                .map(c -> {
+        List<Context> toaddcontext =
+                communities.stream()
+                        .map(
+                                c -> {
                                    Context context = new Context();
                                    context.setId(c);
                                    List<DataInfo> dataInfoList = Arrays.asList();
                                    if (subjects.contains(c))
-                        dataInfoList.add(getDataInfo(BULKTAG_DATA_INFO_TYPE, CLASS_ID_SUBJECT, CLASS_NAME_BULKTAG_SUBJECT));
+                                        dataInfoList.add(
+                                                getDataInfo(
+                                                        BULKTAG_DATA_INFO_TYPE,
+                                                        CLASS_ID_SUBJECT,
+                                                        CLASS_NAME_BULKTAG_SUBJECT));
                                    if (datasources.contains(c))
-                        dataInfoList.add(getDataInfo(BULKTAG_DATA_INFO_TYPE, CLASS_ID_DATASOURCE, CLASS_NAME_BULKTAG_DATASOURCE));
+                                        dataInfoList.add(
+                                                getDataInfo(
+                                                        BULKTAG_DATA_INFO_TYPE,
+                                                        CLASS_ID_DATASOURCE,
+                                                        CLASS_NAME_BULKTAG_DATASOURCE));
                                    if (czenodo.contains(c))
-                        dataInfoList.add(getDataInfo(BULKTAG_DATA_INFO_TYPE, CLASS_ID_CZENODO, CLASS_NAME_BULKTAG_ZENODO));
+                                        dataInfoList.add(
+                                                getDataInfo(
+                                                        BULKTAG_DATA_INFO_TYPE,
+                                                        CLASS_ID_CZENODO,
+                                                        CLASS_NAME_BULKTAG_ZENODO));
                                    context.setDataInfo(dataInfoList);
                                    return context;
                                })
@ -155,7 +181,8 @@ public class ResultTagger {
        return result;
    }

-    public static DataInfo getDataInfo(String inference_provenance, String inference_class_id, String inference_class_name){
+    public static DataInfo getDataInfo(
+            String inference_provenance, String inference_class_id, String inference_class_name) {
        DataInfo di = new DataInfo();
        di.setInferred(true);
        di.setInferenceprovenance(inference_provenance);
@ -171,5 +198,4 @@ public class ResultTagger {
        pa.setSchemename(DNET_SCHEMA_NAME);
        return pa;
    }
-
 }
--- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/TagginConstants.java
+++ b/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/TagginConstants.java
@ -1,27 +1,23 @@
 package eu.dnetlib.dhp.community;

-
 public class TagginConstants {

+    public static final String BULKTAG_DATA_INFO_TYPE = "bulktagging";

-    public final static String BULKTAG_DATA_INFO_TYPE = "bulktagging";
+    public static final String DNET_SCHEMA_NAME = "dnet:provenanceActions";
+    public static final String DNET_SCHEMA_ID = "dnet:provenanceActions";

-    public final static String DNET_SCHEMA_NAME = "dnet:provenanceActions";
-    public final static String DNET_SCHEMA_ID = "dnet:provenanceActions";
-
-    public final static String CLASS_ID_SUBJECT = "bulktagging:community:subject";
-    public final static String CLASS_ID_DATASOURCE = "bulktagging:community:datasource";
-    public final static String CLASS_ID_CZENODO = "bulktagging:community:zenodocommunity";
-
-    public final static String SCHEMA_ID = "dnet:provenanceActions";
-    public final static String COUNTER_GROUP = "Bulk Tagging";
-
-    public final static String ZENODO_COMMUNITY_INDICATOR = "zenodo.org/communities/";
-
-    public final static String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
-    public final static String CLASS_NAME_BULKTAG_DATASOURCE = "Bulktagging for Community - Datasource";
-    public final static String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
+    public static final String CLASS_ID_SUBJECT = "community:subject";
+    public static final String CLASS_ID_DATASOURCE = "community:datasource";
+    public static final String CLASS_ID_CZENODO = "community:zenodocommunity";

+    public static final String SCHEMA_ID = "dnet:provenanceActions";
+    public static final String COUNTER_GROUP = "Bulk Tagging";

+    public static final String ZENODO_COMMUNITY_INDICATOR = "zenodo.org/communities/";

+    public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
+    public static final String CLASS_NAME_BULKTAG_DATASOURCE =
+            "Bulktagging for Community - Datasource";
+    public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
 }
--- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/ZenodoCommunity.java
+++ b/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/community/ZenodoCommunity.java
@ -1,13 +1,11 @@
 package eu.dnetlib.dhp.community;

 import com.google.gson.Gson;
+import java.io.Serializable;
 import org.dom4j.Node;

-
-/**
- * Created by miriam on 01/08/2018.
- */
-public class ZenodoCommunity {
+/** Created by miriam on 01/08/2018. */
+public class ZenodoCommunity implements Serializable {

    private String zenodoCommunityId;

@ -29,18 +27,16 @@ public class ZenodoCommunity {
        this.selCriteria = selCriteria;
    }

-    private void setSelCriteria(String json){
-        //Type collectionType = new TypeToken<Collection<Constraints>>(){}.getType();
+    private void setSelCriteria(String json) {
+        // Type collectionType = new TypeToken<Collection<Constraints>>(){}.getType();
        selCriteria = new Gson().fromJson(json, SelectionConstraints.class);
-
    }

-    public void setSelCriteria(Node n){
-        if (n==null){
+    public void setSelCriteria(Node n) {
+        if (n == null) {
            selCriteria = null;
-        }else{
+        } else {
            setSelCriteria(n.getText());
        }
    }
-
 }
--- a/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/VerbClass.java
+++ b/dhp-workflows/dhp-bulktag/src/main/java/eu/dnetlib/dhp/selectioncriteria/VerbClass.java
@ -7,7 +7,7 @@ import java.lang.annotation.Target;

@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
-public @interface VerbClass {
+@interface VerbClass {

-    public String value();
+    String value();
 }
--- a/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/input_bulkTag_parameters.json
+++ b/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/input_bulkTag_parameters.json
@ -5,12 +5,6 @@
    "paramDescription": "URL of the isLookUp Service",
    "paramRequired": true
  },
-  {
-    "paramName":"mt",
-    "paramLongName":"master",
-    "paramDescription": "should be local or yarn",
-    "paramRequired": true
-  },
  {
    "paramName":"s",
    "paramLongName":"sourcePath",
@ -22,6 +16,36 @@
    "paramLongName":"protoMap",
    "paramDescription": "the json path associated to each selection field",
    "paramRequired": true
+  },
+  {
+    "paramName":"tn",
+    "paramLongName":"resultTableName",
+    "paramDescription": "the name of the result table we are currently working on",
+    "paramRequired": true
+  },
+  {
+    "paramName": "out",
+    "paramLongName": "outputPath",
+    "paramDescription": "the path used to store temporary output files",
+    "paramRequired": true
+  },
+  {
+    "paramName": "ssm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "true if the spark session is managed, false otherwise",
+    "paramRequired": false
+  },
+  {
+    "paramName": "test",
+    "paramLongName": "isTest",
+    "paramDescription": "true if the spark session is managed, false otherwise",
+    "paramRequired": false
+  },
+  {
+    "paramName": "tg",
+    "paramLongName": "taggingConf",
+    "paramDescription": "true if the spark session is managed, false otherwise",
+    "paramRequired": false
  }

 ]
--- a/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/oozie_app/config-default.xml
@ -19,4 +19,28 @@
        <name>hive_metastore_uris</name>
        <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
    </property>
+    <property>
+        <name>spark2YarnHistoryServerAddress</name>
+        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089</value>
+    </property>
+    <property>
+        <name>spark2EventLogDir</name>
+        <value>/user/spark/spark2ApplicationHistory</value>
+    </property>
+    <property>
+        <name>sparkExecutorNumber</name>
+        <value>1</value>
+    </property>
+    <property>
+        <name>sparkDriverMemory</name>
+        <value>15G</value>
+    </property>
+    <property>
+        <name>sparkExecutorMemory</name>
+        <value>6G</value>
+    </property>
+    <property>
+        <name>sparkExecutorCores</name>
+        <value>1</value>
+    </property>
 </configuration>
--- a/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-bulktag/src/main/resources/eu/dnetlib/dhp/oozie_app/workflow.xml
@ -1,13 +1,9 @@
-<workflow-app name="result_to_community_from_semrel_propagation" xmlns="uri:oozie:workflow:0.5">
+<workflow-app name="bulk_tagging" xmlns="uri:oozie:workflow:0.5">
 <parameters>
    <property>
        <name>sourcePath</name>
        <description>the source path</description>
    </property>
-    <property>
-        <name>allowedsemrels</name>
-        <description>the semantic relationships allowed for propagation</description>
-    </property>
    <property>
        <name>sparkDriverMemory</name>
        <description>memory for driver process</description>
@ -24,38 +20,163 @@
        <name>isLookupUrl</name>
        <description>the isLookup service endpoint</description>
    </property>
+    <property>
+        <name>protoMap</name>
+        <description>the json path associated to each selection field</description>
+    </property>
 </parameters>

-<start to="ResultToCommunityFromSemRelPropagation"/>
+<start to="reset-outputpath"/>

 <kill name="Kill">
    <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
 </kill>

-<action name="ResultToCommunityFromSemRelPropagation">
+    <action name="reset-outputpath">
+        <fs>
+            <delete path='${workingDir}/relation'/>
+            <delete path='${workingDir}/publication'/>
+            <delete path='${workingDir}/dataset'/>
+            <delete path='${workingDir}/otherresearchproduct'/>
+            <delete path='${workingDir}/software'/>
+        </fs>
+        <ok to="copy_relation"/>
+        <error to="Kill"/>
+    </action>
+
+    <action name="copy_relation">
+        <distcp xmlns="uri:oozie:distcp-action:0.2">
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <arg>${nameNode}/${sourcePath}/relation</arg>
+            <arg>${nameNode}/${workingDir}/relation</arg>
+        </distcp>
+        <ok to="fork_exec_bulktag"/>
+        <error to="Kill"/>
+    </action>
+
+<action name="fork_exec_bulktag">
+    <path start="join_bulktag_publication"/>
+    <path start="join_bulktag_dataset"/>
+    <path start="join_bulktag_otherresearchproduct"/>
+    <path start="join_bulktag_software"/>
+</action>
+    <action name="join_bulktag_publication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
-        <name>ResultToCommunitySemRelPropagation</name>
-        <class>eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganizationFromIstRepoJob</class>
-        <jar>dhp-propagation-${projectVersion}.jar</jar>
-        <spark-opts>--executor-memory ${sparkExecutorMemory}
-            --executor-cores ${sparkExecutorCores}
+            <name>bulkTagging</name>
+            <class>eu.dnetlib.dhp.SparkBulkTagJob</class>
+            <jar>dhp-bulktag-${projectVersion}.jar</jar>
+            <spark-opts>
+                --num-executors=${sparkExecutorNumber}
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
-            --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
-            --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
-        <arg>-mt</arg> <arg>yarn-cluster</arg>
-        <arg>--sourcePath</arg><arg>${sourcePath}</arg>
-      <!-- <arg>-allowedsemrels</arg><arg>${allowedsemrels}</arg>-->
-        <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
-       <!-- <arg>-isLookupUrl</arg><arg>${isLookupUrl}</arg>-->
+            <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
+            <!--        <arg>&#45;&#45;hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>-->
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/publication</arg>
+            <arg>--proto_map</arg><arg>${protoMap}</arg>
+            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
        </spark>
-    <ok to="End"/>
+        <ok to="wait"/>
        <error to="Kill"/>
-</action>
-
+    </action>
+    <action name="join_bulktag_dataset">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>bulkTagging</name>
+            <class>eu.dnetlib.dhp.SparkBulkTagJob</class>
+            <jar>dhp-bulktag-${projectVersion}.jar</jar>
+            <spark-opts>
+                --num-executors=${sparkExecutorNumber}
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
+            <!--        <arg>&#45;&#45;hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>-->
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
+            <arg>--proto_map</arg><arg>${protoMap}</arg>
+            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
+        </spark>
+        <ok to="wait"/>
+        <error to="Kill"/>
+    </action>
+    <action name="join_bulktag_otherresearchproduct">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>bulkTagging</name>
+            <class>eu.dnetlib.dhp.SparkBulkTagJob</class>
+            <jar>dhp-bulktag-${projectVersion}.jar</jar>
+            <spark-opts>
+                --num-executors=${sparkExecutorNumber}
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
+            <!--        <arg>&#45;&#45;hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>-->
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
+            <arg>--proto_map</arg><arg>${protoMap}</arg>
+            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
+        </spark>
+        <ok to="wait"/>
+        <error to="Kill"/>
+    </action>
+    <action name="join_bulktag_software">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <master>yarn-cluster</master>
+            <mode>cluster</mode>
+            <name>bulkTagging</name>
+            <class>eu.dnetlib.dhp.SparkBulkTagJob</class>
+            <jar>dhp-bulktag-${projectVersion}.jar</jar>
+            <spark-opts>
+                --num-executors=${sparkExecutorNumber}
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
+            <!--        <arg>&#45;&#45;hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>-->
+            <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/software</arg>
+            <arg>--proto_map</arg><arg>${protoMap}</arg>
+            <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
+        </spark>
+        <ok to="wait"/>
+        <error to="Kill"/>
+    </action>
+<join name="wait" to="End"/>
 <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/BulkTagJobTest.java
+++ b/dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/BulkTagJobTest.java
@ -0,0 +1,233 @@
+package eu.dnetlib.dhp;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import org.apache.commons.io.FileUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.mortbay.util.IO;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class BulkTagJobTest {
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    private static final ClassLoader cl = eu.dnetlib.dhp.BulkTagJobTest.class.getClassLoader();
+
+    private static SparkSession spark;
+
+    private static Path workingDir;
+    private static final Logger log = LoggerFactory.getLogger(eu.dnetlib.dhp.BulkTagJobTest.class);
+
+    private static String taggingConf = "";
+
+    static {
+        try {
+            taggingConf =
+                    IO.toString(
+                            BulkTagJobTest.class.getResourceAsStream(
+                                    "/eu/dnetlib/dhp/communityconfiguration/tagging_conf.json"));
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+    @BeforeAll
+    public static void beforeAll() throws IOException {
+        workingDir = Files.createTempDirectory(eu.dnetlib.dhp.BulkTagJobTest.class.getSimpleName());
+        log.info("using work dir {}", workingDir);
+
+        SparkConf conf = new SparkConf();
+        conf.setAppName(eu.dnetlib.dhp.BulkTagJobTest.class.getSimpleName());
+
+        conf.setMaster("local[*]");
+        conf.set("spark.driver.host", "localhost");
+        conf.set("hive.metastore.local", "true");
+        conf.set("spark.ui.enabled", "false");
+        conf.set("spark.sql.warehouse.dir", workingDir.toString());
+        conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+        spark =
+                SparkSession.builder()
+                        .appName(BulkTagJobTest.class.getSimpleName())
+                        .config(conf)
+                        .getOrCreate();
+    }
+
+    @AfterAll
+    public static void afterAll() throws IOException {
+        FileUtils.deleteDirectory(workingDir.toFile());
+        spark.stop();
+    }
+
+    @Test
+    public void test1() throws Exception {
+        SparkBulkTagJob2.main(
+                new String[] {
+                    "-isTest",
+                    Boolean.TRUE.toString(),
+                    "-isSparkSessionManaged",
+                    Boolean.FALSE.toString(),
+                    "-sourcePath",
+                    getClass().getResource("/eu/dnetlib/dhp/sample/dataset").getPath(),
+                    "-taggingConf",
+                    taggingConf,
+                    "-resultTableName",
+                    "eu.dnetlib.dhp.schema.oaf.Dataset",
+                    "-outputPath",
+                    workingDir.toString() + "/dataset",
+                    "-isLookupUrl",
+                    "http://beta.services.openaire.eu:8280/is/services/isLookUp",
+                    "-protoMap",
+                    "{ \"author\" : \"$['author'][*]['fullname']\","
+                            + "  \"title\" : \"$['title'][*]['value']\","
+                            + "  \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
+                            + "  \"contributor\" : \"$['contributor'][*]['value']\","
+                            + "  \"description\" : \"$['description'][*]['value']\"}"
+                    //                "-preparedInfoPath",
+                    // getClass().getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo").getPath()
+                });
+    }
+}
+
+/*
+
+
+import eu.dnetlib.dhp.orcidtoresultfromsemrel.OrcidPropagationJobTest;
+import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob2;
+import eu.dnetlib.dhp.schema.oaf.Dataset;
+import org.apache.commons.io.FileUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+
+import static org.apache.spark.sql.functions.desc;
+
+
+
+
+
+    @Test
+    public void test1() throws Exception {
+        SparkResultToCommunityThroughSemRelJob4.main(new String[]{
+                "-isTest", Boolean.TRUE.toString(),
+                "-isSparkSessionManaged", Boolean.FALSE.toString(),
+                "-sourcePath", getClass().getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/sample").getPath(),
+                "-hive_metastore_uris", "",
+                "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
+                "-outputPath", workingDir.toString() + "/dataset",
+                "-preparedInfoPath", getClass().getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo").getPath()
+        });
+
+        final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+
+        JavaRDD<Dataset> tmp = sc.textFile(workingDir.toString() + "/dataset")
+                .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
+
+        Assertions.assertEquals(10, tmp.count());
+        org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
+
+        verificationDataset.createOrReplaceTempView("dataset");
+
+        String query = "select id, MyT.id community " +
+                "from dataset " +
+                "lateral view explode(context) c as MyT " +
+                "lateral view explode(MyT.datainfo) d as MyD " +
+                "where MyD.inferenceprovenance = 'propagation'";
+
+        org.apache.spark.sql.Dataset<Row> resultExplodedProvenance = spark.sql(query);
+        Assertions.assertEquals(5, resultExplodedProvenance.count());
+
+        Assertions.assertEquals(0, resultExplodedProvenance.filter("id = '50|dedup_wf_001::2305908abeca9da37eaf3bddcaf81b7b'").count());
+
+        Assertions.assertEquals(1, resultExplodedProvenance.filter("id = '50|dedup_wf_001::0489ae524201eedaa775da282dce35e7'").count());
+        Assertions.assertEquals("dh-ch",resultExplodedProvenance.select("community")
+                .where(resultExplodedProvenance.col("id").equalTo("50|dedup_wf_001::0489ae524201eedaa775da282dce35e7"))
+                .collectAsList().get(0).getString(0));
+
+        Assertions.assertEquals(3, resultExplodedProvenance.filter("id = '50|dedup_wf_001::0a60e33b4f0986ebd9819451f2d87a28'").count());
+        List<Row> rowList = resultExplodedProvenance.select("community")
+                .where(resultExplodedProvenance.col("id")
+                        .equalTo("50|dedup_wf_001::0a60e33b4f0986ebd9819451f2d87a28"))
+                .sort(desc("community")).collectAsList();
+        Assertions.assertEquals("mes", rowList.get(0).getString(0));
+        Assertions.assertEquals("fam", rowList.get(1).getString(0));
+        Assertions.assertEquals("ee", rowList.get(2).getString(0));
+
+
+        Assertions.assertEquals(1, resultExplodedProvenance.filter("id = '50|dedup_wf_001::0ae02edb5598a5545d10b107fcf48dcc'").count());
+        Assertions.assertEquals("aginfra", resultExplodedProvenance.select("community")
+                .where(resultExplodedProvenance.col("id")
+                        .equalTo("50|dedup_wf_001::0ae02edb5598a5545d10b107fcf48dcc"))
+                .collectAsList().get(0).getString(0));
+
+
+        query = "select id, MyT.id community " +
+                "from dataset " +
+                "lateral view explode(context) c as MyT " +
+                "lateral view explode(MyT.datainfo) d as MyD ";
+
+        org.apache.spark.sql.Dataset<Row> resultCommunityId = spark.sql(query);
+
+        Assertions.assertEquals(10, resultCommunityId.count());
+
+        Assertions.assertEquals(2, resultCommunityId.filter("id = '50|dedup_wf_001::0489ae524201eedaa775da282dce35e7'").count());
+        rowList = resultCommunityId.select("community")
+                .where(resultCommunityId.col("id").equalTo("50|dedup_wf_001::0489ae524201eedaa775da282dce35e7"))
+                .sort(desc("community"))
+                .collectAsList();
+        Assertions.assertEquals("dh-ch", rowList.get(0).getString(0));
+        Assertions.assertEquals("beopen", rowList.get(1).getString(0));
+
+        Assertions.assertEquals(3, resultCommunityId.filter("id = '50|dedup_wf_001::0a60e33b4f0986ebd9819451f2d87a28'").count());
+        rowList = resultCommunityId.select("community")
+                .where(resultCommunityId.col("id").equalTo("50|dedup_wf_001::0a60e33b4f0986ebd9819451f2d87a28"))
+                .sort(desc("community"))
+                .collectAsList();
+        Assertions.assertEquals("mes", rowList.get(0).getString(0));
+        Assertions.assertEquals("fam", rowList.get(1).getString(0));
+        Assertions.assertEquals("ee", rowList.get(2).getString(0));
+
+        Assertions.assertEquals(2, resultCommunityId.filter("id = '50|dedup_wf_001::0ae02edb5598a5545d10b107fcf48dcc'").count());
+        rowList = resultCommunityId.select("community")
+                .where(resultCommunityId.col("id").equalTo("50|dedup_wf_001::0ae02edb5598a5545d10b107fcf48dcc"))
+                .sort(desc("community"))
+                .collectAsList();
+        Assertions.assertEquals("beopen", rowList.get(0).getString(0));
+        Assertions.assertEquals("aginfra", rowList.get(1).getString(0));
+
+        Assertions.assertEquals(2, resultCommunityId.filter("id = '50|dedup_wf_001::2305908abeca9da37eaf3bddcaf81b7b'").count());
+        rowList = resultCommunityId.select("community")
+                .where(resultCommunityId.col("id").equalTo("50|dedup_wf_001::2305908abeca9da37eaf3bddcaf81b7b"))
+                .sort(desc("community"))
+                .collectAsList();
+        Assertions.assertEquals("euromarine", rowList.get(1).getString(0));
+        Assertions.assertEquals("ni", rowList.get(0).getString(0));
+
+        Assertions.assertEquals(1, resultCommunityId.filter("id = '50|doajarticles::8d817039a63710fcf97e30f14662c6c8'").count());
+        Assertions.assertEquals("euromarine", resultCommunityId.select("community")
+                .where(resultCommunityId.col("id")
+                        .equalTo("50|doajarticles::8d817039a63710fcf97e30f14662c6c8"))
+                .collectAsList().get(0).getString(0));
+
+
+    }
+ */
--- a/dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/CommunityConfigurationFactoryTest.java
+++ b/dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/CommunityConfigurationFactoryTest.java
@ -0,0 +1,147 @@
+package eu.dnetlib.dhp;
+
+import com.google.gson.Gson;
+import eu.dnetlib.dhp.community.CommunityConfiguration;
+import eu.dnetlib.dhp.community.CommunityConfigurationFactory;
+import eu.dnetlib.dhp.community.Constraint;
+import eu.dnetlib.dhp.community.SelectionConstraints;
+import eu.dnetlib.dhp.selectioncriteria.VerbResolver;
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.util.*;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.dom4j.DocumentException;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+/** Created by miriam on 03/08/2018. */
+public class CommunityConfigurationFactoryTest {
+
+    private static String xml;
+    private static String xml1;
+
+    private final VerbResolver resolver = new VerbResolver();
+
+    @Test
+    public void parseTest() throws DocumentException, IOException {
+        String xml =
+                IOUtils.toString(
+                        getClass()
+                                .getResourceAsStream(
+                                        "/eu/dnetlib/dhp/communityconfiguration/community_configuration.xml"));
+        final CommunityConfiguration cc = CommunityConfigurationFactory.newInstance(xml);
+        Assertions.assertEquals(5, cc.size());
+        cc.getCommunityList()
+                .forEach(c -> Assertions.assertTrue(StringUtils.isNoneBlank(c.getId())));
+    }
+
+    @Test
+    public void applyVerb()
+            throws InvocationTargetException, IllegalAccessException, NoSuchMethodException,
+                    InstantiationException {
+        Constraint sc = new Constraint();
+        sc.setVerb("not_contains");
+        sc.setField("contributor");
+        sc.setValue("DARIAH");
+        sc.setSelection(resolver.getSelectionCriteria(sc.getVerb(), sc.getValue()));
+        String metadata = "This work has been partially supported by DARIAH-EU infrastructure";
+        Assertions.assertFalse(sc.verifyCriteria(metadata));
+    }
+
+    @Test
+    public void loadSelCriteriaTest() throws DocumentException, IOException {
+        String xml =
+                IOUtils.toString(
+                        getClass()
+                                .getResourceAsStream(
+                                        "/eu/dnetlib/dhp/communityconfiguration/community_configuration_selcrit.xml"));
+        final CommunityConfiguration cc = CommunityConfigurationFactory.newInstance(xml);
+        Map<String, List<String>> param = new HashMap<>();
+        param.put("author", new ArrayList<>(Collections.singletonList("Pippo Pippi")));
+        param.put(
+                "description",
+                new ArrayList<>(
+                        Collections.singletonList(
+                                "This work has been partially supported by DARIAH-EU infrastructure")));
+        param.put(
+                "contributor",
+                new ArrayList<>(
+                        Collections.singletonList(
+                                "Pallino ha aiutato a scrivere il paper. Pallino lavora per DARIAH")));
+        List<String> comm =
+                cc.getCommunityForDatasource(
+                        "openaire____::1cfdb2e14977f31a98e0118283401f32", param);
+        Assertions.assertEquals(1, comm.size());
+        Assertions.assertEquals("dariah", comm.get(0));
+    }
+
+    @Test
+    public void test4() throws DocumentException, IOException {
+        final CommunityConfiguration cc =
+                CommunityConfigurationFactory.fromJson(
+                        IOUtils.toString(
+                                getClass()
+                                        .getResourceAsStream(
+                                                "/eu/dnetlib/dhp/communityconfiguration/community_configuration_selcrit.json")));
+        cc.toString();
+    }
+
+    @Test
+    public void test5() throws IOException, DocumentException {
+
+        // final CommunityConfiguration cc =
+        // CommunityConfigurationFactory.newInstance(IOUtils.toString(getClass().getResourceAsStream("test.xml")));
+        final CommunityConfiguration cc =
+                CommunityConfigurationFactory.fromJson(
+                        IOUtils.toString(
+                                getClass()
+                                        .getResourceAsStream(
+                                                "/eu/dnetlib/dhp/communityconfiguration/community_configuration.json")));
+
+        System.out.println(cc.toJson());
+    }
+
+    @Test
+    public void test6() {
+        String json =
+                "{\"criteria\":[{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}]}";
+
+        String step1 = "{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}";
+
+        Constraint c = new Gson().fromJson(step1, Constraint.class);
+        //
+        //        String step2 =
+        // "{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}";
+        //
+        //        ConstraintEncapsulator ce = new
+        // Gson().fromJson(step2,ConstraintEncapsulator.class);
+        //
+        //
+        //        String step3 =
+        // "{\"ce\":{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}}";
+        //
+        //        Constraints cons = new Gson().fromJson(step3,Constraints.class);
+        //
+        //        String step4 =
+        // "{\"criteria\":[{\"ce\":{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}}]}";
+        //
+        //        ConstraintsList cl = new Gson().fromJson(step4,ConstraintsList.class);
+        //
+        //        String step5 =
+        // "{\"cl\":{\"criteria\":[{\"ce\":{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}}]}}";
+        SelectionConstraints sl = new Gson().fromJson(json, SelectionConstraints.class);
+    }
+
+    @Test
+    public void test7() throws IOException {
+        final CommunityConfiguration cc =
+                CommunityConfigurationFactory.fromJson(
+                        IOUtils.toString(
+                                getClass()
+                                        .getResourceAsStream(
+                                                "/eu/dnetlib/dhp/communityconfiguration/tagging_conf.json")));
+
+        System.out.println(cc.toJson());
+    }
+}
--- a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration.json
+++ b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration.json
@ -0,0 +1,694 @@
+{"communities": {
+    "clarin": {
+      "id": "clarin",
+      "subjects": [],
+      "datasources": [
+        {
+          "openaireId": "re3data_____::a507cdacc5bbcc08761c92185dee5cab"
+        }
+      ],
+      "zenodoCommunities": [
+
+      ]
+    },
+    "ee": {
+      "id": "ee",
+      "subjects": [
+        "SDG13 - Climate action",
+        "SDG8 - Decent work and economic\n\t\t\t\t\tgrowth",
+        "SDG15 - Life on land",
+        "SDG2 - Zero hunger",
+        "SDG17 - Partnerships for the\n\t\t\t\t\tgoals",
+        "SDG10 - Reduced inequalities",
+        "SDG5 - Gender equality",
+        "SDG12 - Responsible\n\t\t\t\t\tconsumption and production",
+        "SDG14 - Life below water",
+        "SDG6 - Clean water and\n\t\t\t\t\tsanitation",
+        "SDG11 - Sustainable cities and communities",
+        "SDG1 - No poverty",
+        "SDG3 -\n\t\t\t\t\tGood health and well being",
+        "SDG7 - Affordable and clean energy",
+        "SDG4 - Quality\n\t\t\t\t\teducation",
+        "SDG9 - Industry innovation and infrastructure",
+        "SDG16 - Peace justice\n\t\t\t\t\tand strong institutions"
+      ],
+      "datasources": [
+
+      ],
+      "zenodoCommunities": [
+
+      ]
+    },
+    "aginfra": {
+      "id": "aginfra",
+      "subjects": [
+        "animal production and health",
+        "fisheries and aquaculture",
+        "food safety and human nutrition",
+        "information management",
+        "food technology",
+        "agri-food education and extension",
+        "natural resources and environment",
+        "food system",
+        "engineering technology and Research",
+        "agriculture",
+        "food safety risk assessment",
+        "food security",
+        "farming practices and systems",
+        "plant production and protection",
+        "agri-food economics and policy",
+        "food distribution",
+        "forestry"
+      ],
+      "datasources": [
+        {
+          "openaireId": "opendoar____::1a551829d50f1400b0dab21fdd969c04"
+        },
+        {
+          "openaireId": "opendoar____::49af6c4e558a7569d80eee2e035e2bd7"
+        },
+        {
+          "openaireId": "opendoar____::0266e33d3f546cb5436a10798e657d97"
+        },
+        {
+          "openaireId": "opendoar____::fd4c2dc64ccb8496e6f1f94c85f30d06"
+        },
+        {
+          "openaireId": "opendoar____::41bfd20a38bb1b0bec75acf0845530a7"
+        },
+        {
+          "openaireId": "opendoar____::87ae6fb631f7c8a627e8e28785d9992d"
+        }
+      ],
+      "zenodoCommunities": [
+        {
+          "zenodoCommunityId": "edenis"
+        },
+        {
+          "zenodoCommunityId": "efsa-pilot"
+        },
+        {
+          "zenodoCommunityId": "egene3"
+        },
+        {
+          "zenodoCommunityId": "efsa-kj"
+        },
+        {
+          "zenodoCommunityId": "euromixproject"
+        },
+        {
+          "zenodoCommunityId": "discardless"
+        },
+        {
+          "zenodoCommunityId": "sedinstcjfst"
+        },
+        {
+          "zenodoCommunityId": "afinet-kc"
+        },
+        {
+          "zenodoCommunityId": "2231-4784"
+        },
+        {
+          "zenodoCommunityId": "2231-0606"
+        },
+        {
+          "zenodoCommunityId": "solace"
+        },
+        {
+          "zenodoCommunityId": "pa17"
+        },
+        {
+          "zenodoCommunityId": "smartakis"
+        },
+        {
+          "zenodoCommunityId": "sedinstcjae"
+        },
+        {
+          "zenodoCommunityId": "phenology_camera"
+        },
+        {
+          "zenodoCommunityId": "aginfra"
+        },
+        {
+          "zenodoCommunityId": "erosa"
+        },
+        {
+          "zenodoCommunityId": "bigdatagrapes"
+        }
+      ]
+    },
+    "fam": {
+      "id": "fam",
+      "subjects": [
+        "Stock Assessment",
+        "pelagic",
+        "Fish farming",
+        "EMFF",
+        "Fisheries",
+        "Fishermen",
+        "maximum sustainable yield",
+        "trawler",
+        "Fishing vessel",
+        "Fisherman",
+        "Fishing gear",
+        "RFMO",
+        "Fish Aggregating Device",
+        "Bycatch",
+        "Fishery",
+        "common fisheries policy",
+        "Fishing fleet",
+        "Aquaculture"
+      ],
+      "datasources": [
+        {
+          "openaireId": "doajarticles::8cec81178926caaca531afbd8eb5d64c"
+        },
+        {
+          "openaireId": "doajarticles::0f7a7f30b5400615cae1829f3e743982"
+        },
+        {
+          "openaireId": "doajarticles::9740f7f5af3e506d2ad2c215cdccd51a"
+        },
+        {
+          "openaireId": "doajarticles::9f3fbaae044fa33cb7069b72935a3254"
+        },
+        {
+          "openaireId": "doajarticles::cb67f33eb9819f5c624ce0313957f6b3"
+        },
+        {
+          "openaireId": "doajarticles::e21c97cbb7a209afc75703681c462906"
+        },
+        {
+          "openaireId": "doajarticles::554cde3be9e5c4588b4c4f9f503120cb"
+        },
+        {
+          "openaireId": "tubitakulakb::11e22f49e65b9fd11d5b144b93861a1b"
+        },
+        {
+          "openaireId": "doajarticles::57c5d3837da943e93b28ec4db82ec7a5"
+        },
+        {
+          "openaireId": "doajarticles::a186f5ddb8e8c7ecc992ef51cf3315b1"
+        },
+        {
+          "openaireId": "doajarticles::e21c97cbb7a209afc75703681c462906"
+        },
+        {
+          "openaireId": "doajarticles::dca64612dfe0963fffc119098a319957"
+        },
+        {
+          "openaireId": "doajarticles::dd70e44479f0ade25aa106aef3e87a0a"
+        }
+      ],
+      "zenodoCommunities": [
+        {
+          "zenodoCommunityId": "discardless"
+        },
+        {
+          "zenodoCommunityId": "farfish2020"
+        },
+        {
+          "zenodoCommunityId": "facts"
+        },
+        {
+          "zenodoCommunityId": "climefish"
+        },
+        {
+          "zenodoCommunityId": "proeel"
+        },
+        {
+          "zenodoCommunityId": "primefish"
+        },
+        {
+          "zenodoCommunityId": "h2020_vicinaqua"
+        },
+        {
+          "zenodoCommunityId": "meece"
+        },
+        {
+          "zenodoCommunityId": "rlsadb"
+        }
+      ]
+    },
+    "instruct": {
+      "id": "instruct",
+      "subjects": [
+
+      ],
+      "datasources": [
+
+      ],
+      "zenodoCommunities": [
+        {
+          "zenodoCommunityId": "instruct"
+        },
+        {
+          "zenodoCommunityId": "west-life"
+        }
+      ]
+    },
+    "mes": {
+      "id": "mes",
+      "subjects": [
+        "marine",
+        "ocean",
+        "fish",
+        "aqua",
+        "sea"
+      ],
+      "datasources": [
+
+      ],
+      "zenodoCommunities": [
+        {
+          "zenodoCommunityId": "adriplan"
+        },
+        {
+          "zenodoCommunityId": "devotes-project"
+        },
+        {
+          "zenodoCommunityId": "euro-basin"
+        },
+        {
+          "zenodoCommunityId": "naclim"
+        },
+        {
+          "zenodoCommunityId": "discardless"
+        },
+        {
+          "zenodoCommunityId": "assisibf"
+        },
+        {
+          "zenodoCommunityId": "meece"
+        },
+        {
+          "zenodoCommunityId": "facts"
+        },
+        {
+          "zenodoCommunityId": "proeel"
+        },
+        {
+          "zenodoCommunityId": "aquatrace"
+        },
+        {
+          "zenodoCommunityId": "myfish"
+        },
+        {
+          "zenodoCommunityId": "atlas"
+        },
+        {
+          "zenodoCommunityId": "blue-actionh2020"
+        },
+        {
+          "zenodoCommunityId": "sponges"
+        },
+        {
+          "zenodoCommunityId": "merces_project"
+        },
+        {
+          "zenodoCommunityId": "bigdataocean"
+        },
+        {
+          "zenodoCommunityId": "columbus"
+        },
+        {
+          "zenodoCommunityId": "h2020-aquainvad-ed"
+        },
+        {
+          "zenodoCommunityId": "aquarius"
+        },
+        {
+          "zenodoCommunityId": "southern-ocean-observing-system"
+        },
+        {
+          "zenodoCommunityId": "eawag"
+        },
+        {
+          "zenodoCommunityId": "mossco"
+        },
+        {
+          "zenodoCommunityId": "onc"
+        },
+        {
+          "zenodoCommunityId": "oceanbiogeochemistry"
+        },
+        {
+          "zenodoCommunityId": "oceanliteracy"
+        },
+        {
+          "zenodoCommunityId": "openearth"
+        },
+        {
+          "zenodoCommunityId": "ocean"
+        },
+        {
+          "zenodoCommunityId": "calcifierraman"
+        },
+        {
+          "zenodoCommunityId": "bermudabream"
+        },
+        {
+          "zenodoCommunityId": "brcorp1"
+        },
+        {
+          "zenodoCommunityId": "mce"
+        },
+        {
+          "zenodoCommunityId": "biogeochem"
+        },
+        {
+          "zenodoCommunityId": "ecc2014"
+        },
+        {
+          "zenodoCommunityId": "fisheries"
+        },
+        {
+          "zenodoCommunityId": "sedinstcjfas"
+        },
+        {
+          "zenodoCommunityId": "narmada"
+        },
+        {
+          "zenodoCommunityId": "umr-entropie"
+        },
+        {
+          "zenodoCommunityId": "farfish2020"
+        },
+        {
+          "zenodoCommunityId": "primefish"
+        },
+        {
+          "zenodoCommunityId": "zf-ilcs"
+        },
+        {
+          "zenodoCommunityId": "climefish"
+        },
+        {
+          "zenodoCommunityId": "afrimed_eu"
+        },
+        {
+          "zenodoCommunityId": "spi-ace"
+        },
+        {
+          "zenodoCommunityId": "cice-consortium"
+        },
+        {
+          "zenodoCommunityId": "nemo-ocean"
+        },
+        {
+          "zenodoCommunityId": "mesopp-h2020"
+        },
+        {
+          "zenodoCommunityId": "marxiv"
+        }
+      ]
+    },
+    "ni": {
+      "id": "ni",
+      "subjects": [
+        "brain mapping",
+        "brain imaging",
+        "electroencephalography",
+        "arterial spin labelling",
+        "brain fingerprinting",
+        "brain",
+        "neuroimaging",
+        "Multimodal Brain Image Analysis",
+        "fMRI",
+        "neuroinformatics",
+        "fetal brain",
+        "brain ultrasonic imaging",
+        "topographic brain mapping",
+        "diffusion tensor imaging",
+        "computerized knowledge assessment",
+        "connectome mapping",
+        "brain magnetic resonance imaging",
+        "brain abnormalities"
+      ],
+      "datasources": [
+        {
+          "openaireId": "re3data_____::5b9bf9171d92df854cf3c520692e9122"
+        },
+        {
+          "openaireId": "doajarticles::c7d3de67dc77af72f6747157441252ec"
+        },
+        {
+          "openaireId": "re3data_____::8515794670370f49c1d176c399c714f5"
+        },
+        {
+          "openaireId": "doajarticles::d640648c84b10d425f96f11c3de468f3"
+        },
+        {
+          "openaireId": "doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a"
+        },
+        {
+          "openaireId": "rest________::fb1a3d4523c95e63496e3bc7ba36244b"
+        }
+      ],
+      "zenodoCommunities": [
+        {
+          "zenodoCommunityId": "neuroinformatics"
+        },
+        {
+          "zenodoCommunityId": "hbp"
+        },
+        {
+          "zenodoCommunityId": "from_neuroscience_to_machine_learning"
+        },
+        {
+          "zenodoCommunityId": "ci2c"
+        },
+        {
+          "zenodoCommunityId": "opensourcebrain"
+        },
+        {
+          "zenodoCommunityId": "brainspeak"
+        },
+        {
+          "zenodoCommunityId": "braincom"
+        },
+        {
+          "zenodoCommunityId": "nextgenvis"
+        },
+        {
+          "zenodoCommunityId": "meso-brain"
+        },
+        {
+          "zenodoCommunityId": "neuroplasticity-workshop"
+        },
+        {
+          "zenodoCommunityId": "bionics"
+        },
+        {
+          "zenodoCommunityId": "brainmattrain-676408"
+        },
+        {
+          "zenodoCommunityId": "repronim"
+        },
+        {
+          "zenodoCommunityId": "affectiveneuro"
+        },
+        {
+          "zenodoCommunityId": "con"
+        },
+        {
+          "zenodoCommunityId": "lab_neurol_sperim_irfmn_irccs_milano_it"
+        }
+      ]
+    },
+    "dariah": {
+      "id": "dariah",
+      "subjects": [
+
+      ],
+      "datasources": [
+        {
+          "openaireId": "opendoar____::7e7757b1e12abcb736ab9a754ffb617a",
+          "sc": {
+            "cl": {
+              "criteria": [
+                {
+                  "ce": {
+                    "constraint": [
+                      {
+                        "verb": "contains",
+                        "field": "contributor",
+                        "value": "DARIAH"
+                      }
+                    ]
+                  }
+                }
+              ]
+            }
+          }
+        }
+      ],
+      "zenodoCommunities": [
+        {
+          "zenodoCommunityId": "dimpo"
+        }
+      ]
+    },
+    "rda": {
+      "id": "rda",
+      "subjects": [
+
+      ],
+      "datasources": [
+
+      ],
+      "zenodoCommunities": [
+        {
+          "zenodoCommunityId": "rda"
+        }
+      ]
+    },
+    "dh-ch": {
+      "id": "dh-ch",
+      "subjects": [
+        "modern art",
+        "metadata",
+        "monuments",
+        "sites",
+        "field walking",
+        "frescoes",
+        "excavation",
+        "ontologies",
+        "mapping",
+        "cities",
+        "temples",
+        "lithics",
+        "roads",
+        "digital cultural heritage",
+        "interoperability",
+        "archaeological reports",
+        "churches",
+        "standards",
+        "archaeological stratigraphy",
+        "buidings",
+        "digital humanities",
+        "survey",
+        "archaeological sites",
+        "CIDOC CRM",
+        "decorations",
+        "classic art",
+        "stratigraphy",
+        "digital archaeology",
+        "walls",
+        "data science",
+        "chapels",
+        "paintings",
+        "archaeology",
+        "fair data",
+        "mosaics",
+        "data visualization",
+        "burials",
+        "medieval art",
+        "castles",
+        "statues",
+        "natural language processing",
+        "inscriptions",
+        "vaults",
+        "open data",
+        "contemporary art",
+        "3D",
+        "pottery",
+        "site",
+        "metadata schema",
+        "architectural",
+        "vessels"
+      ],
+      "datasources": [
+        {
+          "openaireId": "re3data_____::9ebe127e5f3a0bf401875690f3bb6b81"
+        },
+        {
+          "openaireId": "doajarticles::c6cd4b532e12868c1d760a8d7cda6815"
+        },
+        {
+          "openaireId": "doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b"
+        },
+        {
+          "openaireId": "doajarticles::6eb31d13b12bc06bbac06aef63cf33c9"
+        },
+        {
+          "openaireId": "doajarticles::0da84e9dfdc8419576169e027baa8028"
+        },
+        {
+          "openaireId": "re3data_____::84e123776089ce3c7a33db98d9cd15a8"
+        },
+        {
+          "openaireId": "openaire____::c5502a43e76feab55dd00cf50f519125"
+        },
+        {
+          "openaireId": "re3data_____::a48f09c562b247a9919acfe195549b47"
+        },
+        {
+          "openaireId": "opendoar____::97275a23ca44226c9964043c8462be96"
+        }
+      ],
+      "zenodoCommunities": [
+        {
+          "zenodoCommunityId": "storm"
+        },
+        {
+          "zenodoCommunityId": "crosscult"
+        },
+        {
+          "zenodoCommunityId": "wholodance_eu"
+        },
+        {
+          "zenodoCommunityId": "digcur2013"
+        },
+        {
+          "zenodoCommunityId": "gravitate"
+        },
+        {
+          "zenodoCommunityId": "dipp2014"
+        },
+        {
+          "zenodoCommunityId": "digitalhumanities"
+        },
+        {
+          "zenodoCommunityId": "dimpo"
+        },
+        {
+          "zenodoCommunityId": "adho"
+        },
+        {
+          "zenodoCommunityId": "chc"
+        },
+        {
+          "zenodoCommunityId": "wahr"
+        },
+        {
+          "zenodoCommunityId": "ibe"
+        },
+        {
+          "zenodoCommunityId": "ariadne"
+        },
+        {
+          "zenodoCommunityId": "parthenos-hub"
+        },
+        {
+          "zenodoCommunityId": "parthenos-training"
+        },
+        {
+          "zenodoCommunityId": "gandhara"
+        },
+        {
+          "zenodoCommunityId": "cmsouthasia"
+        },
+        {
+          "zenodoCommunityId": "nilgirihills"
+        },
+        {
+          "zenodoCommunityId": "shamsa_mustecio"
+        },
+        {
+          "zenodoCommunityId": "bodhgaya"
+        }
+      ]
+    }
+  }
+}
--- a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration.xml
+++ b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration.xml
@ -0,0 +1,176 @@
+<communities>
+    <community id="fet-fp7">
+        <oacommunity/>
+        <subjects/>
+        <datasources/>
+        <zenodocommunities/>
+    </community>
+    <community id="fet-h2020">
+        <subjects/>
+        <datasources/>
+        <zenodocommunities/>
+    </community>
+    <community id="oa-pg">
+        <subjects/>
+        <datasources/>
+        <zenodocommunities/>
+    </community>
+    <community id="ee">
+        <subjects>
+            <subject>SDG13 - Climate action</subject>
+            <subject>SDG8 - Decent work and economic growth</subject>
+            <subject>SDG15 - Life on land</subject>
+            <subject>SDG2 - Zero hunger</subject>
+            <subject>SDG17 - Partnerships for the goals</subject>
+            <subject>SDG10 - Reduced inequalities</subject>
+            <subject>SDG5 - Gender equality</subject>
+            <subject>SDG12 - Responsible consumption and production</subject>
+            <subject>SDG14 - Life below water</subject>
+            <subject>SDG6 - Clean water and sanitation</subject>
+            <subject>SDG11 - Sustainable cities and communities</subject>
+            <subject>SDG1 - No poverty</subject>
+            <subject>SDG3 - Good health and well being</subject>
+            <subject>SDG7 - Affordable and clean energy</subject>
+            <subject>SDG4 - Quality education</subject>
+            <subject>SDG9 - Industry innovation and infrastructure</subject>
+            <subject>SDG16 - Peace justice and strong institutions</subject>
+        </subjects>
+        <datasources/>
+        <zenodocommunities>
+            <zenodocommunity>
+                <zenodoid>123</zenodoid>
+                <selcriteria/>
+            </zenodocommunity>
+        </zenodocommunities>
+    </community>
+    <community id="dh-ch">
+        <subjects/>
+        <datasources/>
+        <zenodocommunities/>
+    </community>
+    <community id="fam">
+        <subjects/>
+        <datasources/>
+        <zenodocommunities/>
+    </community>
+    <community id="ni">
+        <subjects>
+            <subject>brain mapping</subject>
+            <subject>brain imaging</subject>
+            <subject>electroencephalography</subject>
+            <subject>arterial spin labelling</subject>
+            <subject>brain fingerprinting</subject>
+            <subject>brain</subject>
+            <subject>neuroimaging</subject>
+            <subject>Multimodal Brain Image Analysis</subject>
+            <subject>fMRI</subject>
+            <subject>neuroinformatics</subject>
+            <subject>fetal brain</subject>
+            <subject>brain ultrasonic imaging</subject>
+            <subject>topographic brain mapping</subject>
+            <subject>diffusion tensor imaging</subject>
+            <subject>computerized knowledge assessment</subject>
+            <subject>connectome mapping</subject>
+            <subject>brain magnetic resonance imaging</subject>
+            <subject>brain abnormalities</subject>
+        </subjects>
+        <datasources>
+            <datasource>
+                <openaireId>re3data_____::5b9bf9171d92df854cf3c520692e9122</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>doajarticles::c7d3de67dc77af72f6747157441252ec</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>re3data_____::8515794670370f49c1d176c399c714f5</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>doajarticles::d640648c84b10d425f96f11c3de468f3</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</openaireId>
+                <selcriteria/>
+            </datasource>
+        </datasources>
+        <zenodocommunities/>
+    </community>
+    <community id="mes">
+        <subjects>
+            <subject>marine</subject>
+            <subject>ocean</subject>
+            <subject>fish</subject>
+            <subject>aqua</subject>
+            <subject>sea</subject>
+        </subjects>
+        <datasources>
+            <datasource>
+                <openaireId>re3data_____::9633d1e8c4309c833c2c442abeb0cfeb</openaireId>
+                <selcriteria/>
+            </datasource>
+        </datasources>
+        <zenodocommunities/>
+    </community>
+    <community id="aginfra">
+        <subjects>
+            <subject>animal production and health</subject>
+            <subject>fisheries and aquaculture</subject>
+            <subject>food safety and human nutrition</subject>
+            <subject>information management</subject>
+            <subject>food technology</subject>
+            <subject>agri-food education and extension</subject>
+            <subject>natural resources and environment</subject>
+            <subject>food system</subject>
+            <subject>engineering technology and Research</subject>
+            <subject>agriculture</subject>
+            <subject>food safety risk assessment</subject>
+            <subject>food security</subject>
+            <subject>farming practices and systems</subject>
+            <subject>plant production and protection</subject>
+            <subject>agri-food economics and policy</subject>
+            <subject>food distribution</subject>
+            <subject>forestry</subject>
+        </subjects>
+        <datasources>
+            <datasource>
+                <openaireId>opendoar____::1a551829d50f1400b0dab21fdd969c04</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>opendoar____::49af6c4e558a7569d80eee2e035e2bd7</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>opendoar____::0266e33d3f546cb5436a10798e657d97</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>opendoar____::fd4c2dc64ccb8496e6f1f94c85f30d06</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>opendoar____::41bfd20a38bb1b0bec75acf0845530a7</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>opendoar____::87ae6fb631f7c8a627e8e28785d9992d</openaireId>
+                <selcriteria/>
+            </datasource>
+        </datasources>
+        <zenodocommunities/>
+    </community>
+    <community id="clarin">
+        <oacommunity>oac_clarin</oacommunity>
+        <subjects/>
+        <datasources>
+            <datasource>
+                <openaireId>re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId>
+                <selcriteria/>
+            </datasource>
+        </datasources>
+        <zenodocommunities/>
+    </community>
+</communities>
--- a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration_selcrit.json
+++ b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration_selcrit.json
@ -0,0 +1,37 @@
+{
+  "communities": {
+    "dariah": {
+      "id": "dariah",
+      "subjects": [
+
+      ],
+      "datasources": [
+        {
+          "openaireId": "opendoar____::7e7757b1e12abcb736ab9a754ffb617a",
+          "sc": {
+            "cl": {
+              "criteria": [
+                {
+                  "ce": {
+                    "constraint": [
+                      {
+                        "verb": "contains",
+                        "field": "contributor",
+                        "value": "DARIAH"
+                      }
+                    ]
+                  }
+                }
+              ]
+            }
+          }
+        }
+      ],
+      "zenodoCommunities": [
+        {
+          "zenodoCommunityId": "dimpo"
+        }
+      ]
+    }
+  }
+}
--- a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration_selcrit.xml
+++ b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/community_configuration_selcrit.xml
@ -0,0 +1,193 @@
+<communities>
+    <community id="fet-fp7">
+        <oacommunity/>
+        <subjects/>
+        <datasources/>
+        <zenodocommunities/>
+    </community>
+    <community id="fet-h2020">
+        <subjects/>
+        <datasources/>
+        <zenodocommunities/>
+    </community>
+    <community id="oa-pg">
+        <subjects/>
+        <datasources/>
+        <zenodocommunities/>
+    </community>
+    <community id="ee">
+        <subjects>
+            <subject>SDG13 - Climate action</subject>
+            <subject>SDG8 - Decent work and economic growth</subject>
+            <subject>SDG15 - Life on land</subject>
+            <subject>SDG2 - Zero hunger</subject>
+            <subject>SDG17 - Partnerships for the goals</subject>
+            <subject>SDG10 - Reduced inequalities</subject>
+            <subject>SDG5 - Gender equality</subject>
+            <subject>SDG12 - Responsible consumption and production</subject>
+            <subject>SDG14 - Life below water</subject>
+            <subject>SDG6 - Clean water and sanitation</subject>
+            <subject>SDG11 - Sustainable cities and communities</subject>
+            <subject>SDG1 - No poverty</subject>
+            <subject>SDG3 - Good health and well being</subject>
+            <subject>SDG7 - Affordable and clean energy</subject>
+            <subject>SDG4 - Quality education</subject>
+            <subject>SDG9 - Industry innovation and infrastructure</subject>
+            <subject>SDG16 - Peace justice and strong institutions</subject>
+        </subjects>
+        <datasources/>
+        <zenodocommunities>
+            <zenodocommunity>
+                <zenodoid>123</zenodoid>
+                <selcriteria/>
+            </zenodocommunity>
+        </zenodocommunities>
+    </community>
+    <community id="dh-ch">
+        <subjects/>
+        <datasources/>
+        <zenodocommunities/>
+    </community>
+    <community id="fam">
+        <subjects/>
+        <datasources/>
+        <zenodocommunities/>
+    </community>
+    <community id="ni">
+        <subjects>
+            <subject>brain mapping</subject>
+            <subject>brain imaging</subject>
+            <subject>electroencephalography</subject>
+            <subject>arterial spin labelling</subject>
+            <subject>brain fingerprinting</subject>
+            <subject>brain</subject>
+            <subject>neuroimaging</subject>
+            <subject>Multimodal Brain Image Analysis</subject>
+            <subject>fMRI</subject>
+            <subject>neuroinformatics</subject>
+            <subject>fetal brain</subject>
+            <subject>brain ultrasonic imaging</subject>
+            <subject>topographic brain mapping</subject>
+            <subject>diffusion tensor imaging</subject>
+            <subject>computerized knowledge assessment</subject>
+            <subject>connectome mapping</subject>
+            <subject>brain magnetic resonance imaging</subject>
+            <subject>brain abnormalities</subject>
+        </subjects>
+        <datasources>
+            <datasource>
+                <openaireId>re3data_____::5b9bf9171d92df854cf3c520692e9122</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>doajarticles::c7d3de67dc77af72f6747157441252ec</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>re3data_____::8515794670370f49c1d176c399c714f5</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>doajarticles::d640648c84b10d425f96f11c3de468f3</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</openaireId>
+                <selcriteria/>
+            </datasource>
+        </datasources>
+        <zenodocommunities/>
+    </community>
+    <community id="mes">
+        <subjects>
+            <subject>marine</subject>
+            <subject>ocean</subject>
+            <subject>fish</subject>
+            <subject>aqua</subject>
+            <subject>sea</subject>
+        </subjects>
+        <datasources>
+            <datasource>
+                <openaireId>re3data_____::9633d1e8c4309c833c2c442abeb0cfeb</openaireId>
+                <selcriteria/>
+            </datasource>
+        </datasources>
+        <zenodocommunities/>
+    </community>
+    <community id="aginfra">
+        <subjects>
+            <subject>animal production and health</subject>
+            <subject>fisheries and aquaculture</subject>
+            <subject>food safety and human nutrition</subject>
+            <subject>information management</subject>
+            <subject>food technology</subject>
+            <subject>agri-food education and extension</subject>
+            <subject>natural resources and environment</subject>
+            <subject>food system</subject>
+            <subject>engineering technology and Research</subject>
+            <subject>agriculture</subject>
+            <subject>food safety risk assessment</subject>
+            <subject>food security</subject>
+            <subject>farming practices and systems</subject>
+            <subject>plant production and protection</subject>
+            <subject>agri-food economics and policy</subject>
+            <subject>food distribution</subject>
+            <subject>forestry</subject>
+        </subjects>
+        <datasources>
+            <datasource>
+                <openaireId>opendoar____::1a551829d50f1400b0dab21fdd969c04</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>opendoar____::49af6c4e558a7569d80eee2e035e2bd7</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>opendoar____::0266e33d3f546cb5436a10798e657d97</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>opendoar____::fd4c2dc64ccb8496e6f1f94c85f30d06</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>opendoar____::41bfd20a38bb1b0bec75acf0845530a7</openaireId>
+                <selcriteria/>
+            </datasource>
+            <datasource>
+                <openaireId>opendoar____::87ae6fb631f7c8a627e8e28785d9992d</openaireId>
+                <selcriteria/>
+            </datasource>
+        </datasources>
+        <zenodocommunities/>
+    </community>
+    <community id="clarin">
+        <oacommunity>oac_clarin</oacommunity>
+        <subjects/>
+        <datasources>
+            <datasource>
+                <openaireId>re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId>
+                <selcriteria/>
+            </datasource>
+        </datasources>
+        <zenodocommunities/>
+    </community>
+    <community id="dariah">
+        <oacommunity>oaa_dariah</oacommunity>
+        <subjects/>
+        <datasources>
+            <datasource>
+                <openaireId>openaire____::1cfdb2e14977f31a98e0118283401f32</openaireId>
+                <selcriteria>{"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]}
+                </selcriteria>
+            </datasource>
+        </datasources>
+        <zenodocommunities>
+            <zenodocommunity>
+                <zenodoid>dimpo</zenodoid>
+                <selcriteria/>
+            </zenodocommunity>
+        </zenodocommunities>
+    </community>
+</communities>
--- a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/tagging_conf.json
+++ b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/tagging_conf.json
--- a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/dataset_10.json.gz
+++ b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/dataset_10.json.gz
--- a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/otherresearchproduct/otherresearchproduct_10.json.gz
+++ b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/otherresearchproduct/otherresearchproduct_10.json.gz
--- a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/publication/publication_10.json.gz
+++ b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/publication/publication_10.json.gz
--- a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/software/software_10.json.gz
+++ b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/software/software_10.json.gz
--- a/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob4.java
+++ b/dhp-workflows/dhp-propagation/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob4.java
@ -0,0 +1,473 @@
+package eu.dnetlib.dhp.resulttocommunityfromsemrel;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import eu.dnetlib.dhp.QueryInformationSystem;
+import eu.dnetlib.dhp.TypedRow;
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.schema.oaf.*;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import scala.Tuple2;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+import static eu.dnetlib.dhp.PropagationConstant.*;
+
+public class SparkResultToCommunityThroughSemRelJob3 {
+    public static void main(String[] args) throws Exception {
+
+        final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils
+                .toString(SparkResultToCommunityThroughSemRelJob3.class
+                        .getResourceAsStream("/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_propagationresultcommunityfromsemrel_parameters.json")));
+        parser.parseArgument(args);
+
+       SparkConf conf = new SparkConf();
+        conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
+        final SparkSession spark = SparkSession
+                .builder()
+                .appName(SparkResultToCommunityThroughSemRelJob3.class.getSimpleName())
+                .master(parser.get("master"))
+                .config(conf)
+                .enableHiveSupport()
+                .getOrCreate();
+
+        final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
+        final String inputPath = parser.get("sourcePath");
+        final String outputPath = "/tmp/provision/propagation/resulttocommunityfromsemrel";
+
+        final List<String> allowedsemrel = Arrays.asList(parser.get("allowedsemrels").split(";"));
+
+        final List<String> communityIdList = QueryInformationSystem.getCommunityList(parser.get("isLookupUrl"));
+
+        createOutputDirs(outputPath, FileSystem.get(spark.sparkContext().hadoopConfiguration()));
+
+
+        JavaRDD<Publication> publication_rdd = sc.textFile(inputPath + "/publication")
+                .map(item -> new ObjectMapper().readValue(item, Publication.class));
+
+        JavaRDD<Dataset> dataset_rdd = sc.textFile(inputPath + "/dataset")
+                .map(item -> new ObjectMapper().readValue(item, Dataset.class));
+
+        JavaRDD<OtherResearchProduct> orp_rdd = sc.textFile(inputPath + "/otherresearchproduct")
+                .map(item -> new ObjectMapper().readValue(item, OtherResearchProduct.class));
+
+        JavaRDD<Software> software_rdd = sc.textFile(inputPath + "/software")
+                .map(item -> new ObjectMapper().readValue(item, Software.class));
+
+        JavaRDD<Relation> relation_rdd = sc.textFile(inputPath + "/relation")
+                .map(item -> new ObjectMapper().readValue(item, Relation.class));
+
+
+        org.apache.spark.sql.Dataset<Publication> publication = spark.createDataset(publication_rdd.rdd(),
+                Encoders.bean(Publication.class));
+
+        org.apache.spark.sql.Dataset<Relation> relation = spark.createDataset(relation_rdd.rdd(),
+                Encoders.bean(Relation.class));
+
+        org.apache.spark.sql.Dataset<Dataset> dataset = spark.createDataset(dataset_rdd.rdd(),
+                Encoders.bean(Dataset.class));
+
+        org.apache.spark.sql.Dataset<OtherResearchProduct> other = spark.createDataset(orp_rdd.rdd(),
+                Encoders.bean(OtherResearchProduct.class));
+
+        org.apache.spark.sql.Dataset<Software> software = spark.createDataset(software_rdd.rdd(),
+                Encoders.bean(Software.class));
+
+
+        publication.createOrReplaceTempView("publication");
+        relation.createOrReplaceTempView("relation");
+        dataset.createOrReplaceTempView("dataset");
+        software.createOrReplaceTempView("software");
+        other.createOrReplaceTempView("other");
+
+        String communitylist = getConstraintList(" co.id = '", communityIdList);
+
+        String semrellist = getConstraintList(" relClass = '", allowedsemrel );
+
+
+        String query = "Select source, community_context, target " +
+                "from (select id, collect_set(co.id) community_context " +
+                "from  publication " +
+                "lateral view explode (context) c as co " +
+                "where datainfo.deletedbyinference = false "+ communitylist +
+                " group by id) p " +
+                "JOIN " +
+                "(select * " +
+                "from relation " +
+                "where datainfo.deletedbyinference = false " + semrellist + ") r " +
+                "ON p.id = r.source";
+
+
+        org.apache.spark.sql.Dataset<Row> publication_context = spark.sql( query);
+        publication_context.createOrReplaceTempView("publication_context");
+
+        //( source, (mes, dh-ch-, ni), target )
+        query = "select target , collect_set(co) " +
+                "from (select target, community_context " +
+                "from publication_context pc join publication p on " +
+                "p.id = pc.source) tmp " +
+                "lateral view explode (community_context) c as co " +
+                "group by target";
+
+
+
+        org.apache.spark.sql.Dataset<Row> toupdatepublicationreresult = spark.sql(query);
+        org.apache.spark.sql.Dataset<Row> toupdatesoftwareresult = getUpdateCommunitiesForTable(spark, "software");
+        org.apache.spark.sql.Dataset<Row> toupdatedatasetresult = getUpdateCommunitiesForTable(spark, "dataset");
+        org.apache.spark.sql.Dataset<Row> toupdateotherresult = getUpdateCommunitiesForTable(spark, "other");
+
+        createUpdateForResultDatasetWrite(toupdatesoftwareresult.toJavaRDD(), outputPath, "software_update",
+                PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, communityIdList);
+
+        createUpdateForResultDatasetWrite(toupdatedatasetresult.toJavaRDD(),  outputPath, "dataset_update",
+                PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, communityIdList);
+
+        createUpdateForResultDatasetWrite(toupdatepublicationreresult.toJavaRDD(),  outputPath, "publication_update",
+                PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, communityIdList);
+
+        createUpdateForResultDatasetWrite(toupdateotherresult.toJavaRDD(), outputPath, "other_update",
+                PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, communityIdList);
+
+        updateForDatasetDataset(toupdatedatasetresult.toJavaRDD(), dataset.toJavaRDD(), outputPath, "dataset",
+                PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, communityIdList);
+
+        updateForOtherDataset(toupdateotherresult.toJavaRDD(), other.toJavaRDD(), outputPath, "otherresearchproduct",
+                PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, communityIdList);
+
+        updateForSoftwareDataset(toupdatesoftwareresult.toJavaRDD(), software.toJavaRDD(), outputPath, "software",
+                PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, communityIdList);
+
+        updateForPublicationDataset(toupdatepublicationreresult.toJavaRDD(), publication.toJavaRDD(), outputPath, "publication",
+                PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, communityIdList);
+//
+
+/*
+        JavaPairRDD<String, TypedRow> resultLinkedToCommunities = publication
+                .map(p -> getTypedRow(communityIdList, p.getContext(), p.getId(),"publication"))
+                .filter(p -> !(p == null))
+                .mapToPair(toPair())
+                .union(datasets
+                        .map(p -> getTypedRow(communityIdList, p.getContext(), p.getId(),"dataset"))
+                        .filter(p -> !(p == null))
+                        .mapToPair(toPair())
+                )
+                .union(software
+                        .map(p -> getTypedRow(communityIdList, p.getContext(), p.getId(),"software"))
+                        .filter(p -> !(p == null))
+                        .mapToPair(toPair())
+                )
+                .union(other
+                        .map(p -> getTypedRow(communityIdList, p.getContext(), p.getId(),"otherresearchproduct"))
+                        .filter(p -> !(p == null))
+                        .mapToPair(toPair())
+                );
+
+        JavaPairRDD<String, TypedRow> to_add_result_communities = resultLinkedToCommunities.join(result_result).map(r -> r._2()._1().setSourceId(r._2()._2().getTargetId()))
+                .mapToPair(toPair());
+
+        JavaPairRDD<String, Result> pubs = publications.mapToPair(p -> new Tuple2<>(p.getId(),p));
+        JavaPairRDD<String, Result> dss = datasets.mapToPair(p -> new Tuple2<>(p.getId(),p));
+        JavaPairRDD<String, Result> sfw = software.mapToPair(p -> new Tuple2<>(p.getId(),p));
+        JavaPairRDD<String, Result> orp = other.mapToPair(p -> new Tuple2<>(p.getId(),p));
+
+        updateResultForCommunity(pubs, to_add_result_communities, outputPath, "publication", PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME);
+        updateResultForCommunity(dss, to_add_result_communities, outputPath, "dataset", PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME);
+        updateResultForCommunity(sfw, to_add_result_communities, outputPath, "software", PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME);
+        updateResultForCommunity(orp, to_add_result_communities, outputPath, "otherresearchproduct", PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME);
+        //leftouterjoin result.to_add_result_communities (result = java pair rdd result) [left outer join perche' li voglio tutti anche quelli che non ho aggiornato]
+        //per quelli che matchano cercare nel risultato se i context da aggiungere sono gia' presenti. Se non ci sono aggiungerli, altrimenti nulla
+*/
+    }
+
+   private static org.apache.spark.sql.Dataset<Row> getUpdateCommunitiesForTable(SparkSession spark, String table){
+        String query = "SELECT target_id, collect_set(co.id) context_id  " +
+                "       FROM (SELECT t.id target_id, s.context source_context " +
+                "             FROM context_software s " +
+                "             JOIN " + table + " t " +
+                "             ON s.target = t.id " +
+                "             UNION ALL " +
+                "             SELECT t.id target_id, d.context source_context " +
+                "             FROM dataset_context d  " +
+                "             JOIN " + table + " t" +
+                "             ON s.target = t.id  " +
+                "             UNION ALL  " +
+                "             SELECT t.id target_id, p.context source_context " +
+                "             FROM publication_context p" +
+                "             JOIN " + table +" t " +
+                "             on p.target = t.id " +
+                "             UNION ALL " +
+                "             SELECT t.id target_id, o.context source_context " +
+                "             FROM other_context o " +
+                "             JOIN " + table + " t  " +
+                "             ON o.target = t.id)  TMP " +
+                "             LATERAL VIEW EXPLODE(source_context) MyT as co " +
+                "             GROUP BY target_id" ;
+
+        return spark.sql(query);
+    }
+
+    private static JavaRDD<Result> createUpdateForResultDatasetWrite(JavaRDD<Row> toupdateresult, String outputPath, String type, String class_id, String class_name, List<String> communityIdList){
+        return toupdateresult.map(r -> {
+            List<Context> contextList = new ArrayList();
+            List<String> toAddContext = r.getList(1);
+            for (String cId : toAddContext) {
+                if (communityIdList.contains(cId)) {
+                    Context newContext = new Context();
+                    newContext.setId(cId);
+                    newContext.setDataInfo(Arrays.asList(getDataInfo(PROPAGATION_DATA_INFO_TYPE, class_id, class_name)));
+                    contextList.add(newContext);
+                }
+
+            }
+
+            if (contextList.size() > 0) {
+                Result ret = new Result();
+                ret.setId(r.getString(0));
+                ret.setContext(contextList);
+                return ret;
+            }
+            return null;
+        }).filter(r -> r != null);
+    }
+
+    private static void updateForSoftwareDataset(JavaRDD<Row> toupdateresult, JavaRDD<Software> result, String outputPath, String type, String class_id, String class_name, List<String> communityIdList){
+        JavaPairRDD<String, Result> tmp = result.mapToPair(r -> new Tuple2(r.getId(),  r));
+        getUpdateForResultDataset(toupdateresult, tmp, outputPath, type, class_id, class_name, communityIdList)
+        .map(r -> (Software) r)
+        .map(s -> new ObjectMapper().writeValueAsString(s))
+        .saveAsTextFile(outputPath + "/" + type);
+    }
+
+    private static void updateForDatasetDataset(JavaRDD<Row> toupdateresult, JavaRDD<Dataset> result, String outputPath, String type, String class_id, String class_name, List<String> communityIdList){
+        JavaPairRDD<String, Result> tmp = result.mapToPair(r -> new Tuple2(r.getId(),  r));
+        getUpdateForResultDataset(toupdateresult, tmp, outputPath, type, class_id, class_name, communityIdList)
+        .map( r-> (Dataset)r)
+                .map(d -> new ObjectMapper().writeValueAsString(d))
+                .saveAsTextFile(outputPath + "/" + type);
+    }
+
+    private static void updateForPublicationDataset(JavaRDD<Row> toupdateresult, JavaRDD<Publication> result, String outputPath, String type, String class_id, String class_name, List<String> communityIdList){
+        JavaPairRDD<String, Result> tmp = result.mapToPair(r -> new Tuple2(r.getId(),  r));
+        getUpdateForResultDataset(toupdateresult, tmp, outputPath, type, class_id, class_name, communityIdList)
+        .map(r -> (Publication)r)
+        .map(p -> new ObjectMapper().writeValueAsString(p))
+        .saveAsTextFile(outputPath + "/" + type);
+    }
+
+    private static void updateForOtherDataset(JavaRDD<Row> toupdateresult, JavaRDD<OtherResearchProduct> result, String outputPath, String type, String class_id, String class_name, List<String> communityIdList){
+        JavaPairRDD<String, Result> tmp = result.mapToPair(r -> new Tuple2(r.getId(),  r));
+        getUpdateForResultDataset(toupdateresult, tmp, outputPath, type, class_id, class_name, communityIdList)
+        .map( r -> (OtherResearchProduct)r)
+        .map( o -> new ObjectMapper().writeValueAsString(o))
+        .saveAsTextFile(outputPath + "/" + type);
+    }
+
+
+
+    private static JavaRDD<Result> getUpdateForResultDataset(JavaRDD<Row> toupdateresult, JavaPairRDD<String, Result> result, String outputPath, String type, String class_id, String class_name, List<String> communityIdList){
+        return result.leftOuterJoin(toupdateresult.mapToPair(r -> new Tuple2<>(r.getString(0), r.getList(1))))
+                .map(c -> {
+                    if(! c._2()._2().isPresent()){
+                        return c._2()._1();
+                    }
+
+                    List<Object> toAddContext = c._2()._2().get();
+                    Set<String> context_set = new HashSet<>();
+                    for(Object cId: toAddContext){
+                        String id = (String)cId;
+                        if (communityIdList.contains(id)){
+                            context_set.add(id);
+                        }
+                    }
+                    for (Context context:  c._2()._1().getContext()){
+                        if(context_set.contains(context)){
+                            context_set.remove(context);
+                        }
+                    }
+
+                    List<Context> contextList = context_set.stream().map(co -> {
+                        Context newContext = new Context();
+                        newContext.setId(co);
+                        newContext.setDataInfo(Arrays.asList(getDataInfo(PROPAGATION_DATA_INFO_TYPE, class_id, class_name)));
+                        return newContext;
+
+                    }).collect(Collectors.toList());
+
+                    if(contextList.size() > 0 ){
+                        Result r = new Result();
+                        r.setId(c._1());
+                        r.setContext(contextList);
+                        return r;
+                    }
+                    return null;
+                }).filter(r -> r != null);
+
+
+//        return toupdateresult.mapToPair(r -> new Tuple2<>(r.getString(0), r.getList(1)))
+//                .join(result)
+//                .map(c -> {
+//                    List<Object> toAddContext = c._2()._1();
+//                    Set<String> context_set = new HashSet<>();
+//                    for(Object cId: toAddContext){
+//                        String id = (String)cId;
+//                        if (communityIdList.contains(id)){
+//                            context_set.add(id);
+//                        }
+//                    }
+//                    for (Context context:  c._2()._2().getContext()){
+//                        if(context_set.contains(context)){
+//                            context_set.remove(context);
+//                        }
+//                    }
+//
+//                    List<Context> contextList = context_set.stream().map(co -> {
+//                        Context newContext = new Context();
+//                        newContext.setId(co);
+//                        newContext.setDataInfo(Arrays.asList(getDataInfo(PROPAGATION_DATA_INFO_TYPE, class_id, class_name)));
+//                       return newContext;
+//
+//                    }).collect(Collectors.toList());
+//
+//                    if(contextList.size() > 0 ){
+//                        Result r = new Result();
+//                        r.setId(c._1());
+//                        r.setContext(contextList);
+//                        return r;
+//                    }
+//                   return null;
+//                })
+//                .filter(r -> r != null);
+    }
+
+    private static JavaRDD<Software> createUpdateForSoftwareDataset(JavaRDD<Row> toupdateresult, List<String> communityList,
+                                                                    JavaRDD<Software> result, String class_id, String class_name) {
+        return result
+                .mapToPair(s -> new Tuple2<>(s.getId(), s)).leftOuterJoin(getStringResultJavaPairRDD(toupdateresult, communityList))
+                .map(c -> {
+                    Software oaf = c._2()._1();
+                    if (c._2()._2().isPresent()) {
+
+                        HashSet<String> contexts = new HashSet<>(c._2()._2().get());
+
+                        for (Context context : oaf.getContext()) {
+                            if (contexts.contains(context.getId())){
+                                if (!context.getDataInfo().stream().map(di -> di.getInferenceprovenance())
+                                        .collect(Collectors.toSet()).contains(PROPAGATION_DATA_INFO_TYPE)){
+                                    context.getDataInfo().add(getDataInfo(PROPAGATION_DATA_INFO_TYPE, class_id, class_name));
+                                    //community id already in the context of the result. Remove it from the set that has to be added
+                                    contexts.remove(context.getId());
+                                }
+                            }
+                        }
+                        List<Context> cc = oaf.getContext();
+                        for(String cId: contexts){
+                            Context context = new Context();
+                            context.setId(cId);
+                            context.setDataInfo(Arrays.asList(getDataInfo(PROPAGATION_DATA_INFO_TYPE, class_id, class_name)));
+                            cc.add(context);
+                        }
+                        oaf.setContext(cc);
+
+                    }
+                    return oaf;
+                });
+    }
+
+    private static JavaPairRDD<String, List<String>> getStringResultJavaPairRDD(JavaRDD<Row> toupdateresult, List<String> communityList) {
+        return toupdateresult.mapToPair(c -> {
+
+            List<String> contextList = new ArrayList<>();
+            List<String> contexts = c.getList(1);
+            for (String context : contexts) {
+                if (communityList.contains(context)) {
+                    contextList.add(context);
+                }
+            }
+
+            return new Tuple2<>(c.getString(0) ,contextList);
+        });
+    }
+
+
+    private static org.apache.spark.sql.Dataset<Row> getContext(SparkSession spark, String table){
+        String query = "SELECT relation.source, " + table +".context , relation.target " +
+                "FROM " + table  +
+                " JOIN relation  " +
+                "ON  id = source" ;
+
+        return spark.sql(query);
+    }
+
+    private static Boolean relatedToCommunities(Result r, List<String> communityIdList) {
+        Set<String> result_communities = r.getContext()
+                .stream()
+                .map(c -> c.getId())
+                .collect(Collectors.toSet());
+        for (String communityId : result_communities) {
+            if (communityIdList.contains(communityId)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private static void updateResult(JavaPairRDD<String, Result> results, JavaPairRDD<String, TypedRow> toupdateresult, String outputPath, String type) {
+        results.leftOuterJoin(toupdateresult)
+                .map(p -> {
+                    Result r = p._2()._1();
+                    if (p._2()._2().isPresent()){
+                        Set<String> communityList = p._2()._2().get().getAccumulator();
+                        for(Context c: r.getContext()){
+                            if (communityList.contains(c.getId())){
+                                //verify if the datainfo for this context contains propagation
+                                if (!c.getDataInfo().stream().map(di -> di.getInferenceprovenance()).collect(Collectors.toSet()).contains(PROPAGATION_DATA_INFO_TYPE)){
+                                    c.getDataInfo().add(getDataInfo(PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME));
+                                    //community id already in the context of the result. Remove it from the set that has to be added
+                                communityList.remove(c.getId());
+                                }
+                            }
+                        }
+                        List<Context> cc = r.getContext();
+                        for(String cId: communityList){
+                            Context context = new Context();
+                            context.setId(cId);
+                            context.setDataInfo(Arrays.asList(getDataInfo(PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME)));
+                            cc.add(context);
+                        }
+                        r.setContext(cc);
+                    }
+                    return r;
+                })
+                .map(p -> new ObjectMapper().writeValueAsString(p))
+                .saveAsTextFile(outputPath+"/"+type);
+    }
+
+
+
+    private static TypedRow getTypedRow(List<String> communityIdList, List<Context> context, String id, String type) {
+        Set<String> result_communities = context
+                .stream()
+                .map(c -> c.getId())
+                .collect(Collectors.toSet());
+        TypedRow tp = new TypedRow();
+        tp.setSourceId(id);
+        tp.setType(type);
+        for (String communityId : result_communities) {
+            if (communityIdList.contains(communityId)) {
+                tp.add(communityId);
+            }
+        }
+        if (tp.getAccumulator() != null) {
+            return tp;
+        }
+        return null;
+    }
+}
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json
@ -0,0 +1,40 @@
+[
+
+  {
+    "paramName":"s",
+    "paramLongName":"sourcePath",
+    "paramDescription": "the path of the sequencial file to read",
+    "paramRequired": true
+  },
+  {
+    "paramName":"h",
+    "paramLongName":"hive_metastore_uris",
+    "paramDescription": "the hive metastore uris",
+    "paramRequired": true
+  },
+  {
+    "paramName": "ssm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "true if the spark session is managed, false otherwise",
+    "paramRequired": false
+  },
+  {
+    "paramName": "out",
+    "paramLongName": "outputPath",
+    "paramDescription": "the path used to store temporary output files",
+    "paramRequired": true
+  },
+  {
+    "paramName":"tn",
+    "paramLongName":"resultTableName",
+    "paramDescription": "the name of the result table we are currently working on",
+    "paramRequired": true
+  },
+  {
+    "paramName": "p",
+    "paramLongName": "preparedInfoPath",
+    "paramDescription": "the path where prepared info have been stored",
+    "paramRequired": true
+  }
+
+]
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json
@ -0,0 +1,50 @@
+[
+  {
+    "paramName":"is",
+    "paramLongName":"isLookupUrl",
+    "paramDescription": "URL of the isLookUp Service",
+    "paramRequired": true
+  },
+  {
+    "paramName":"s",
+    "paramLongName":"sourcePath",
+    "paramDescription": "the path of the sequencial file to read",
+    "paramRequired": true
+  },
+  {
+    "paramName":"as",
+    "paramLongName":"allowedsemrels",
+    "paramDescription": "the allowed semantic relations for propagation",
+    "paramRequired": true
+  },
+  {
+    "paramName":"h",
+    "paramLongName":"hive_metastore_uris",
+    "paramDescription": "the hive metastore uris",
+    "paramRequired": true
+  },
+  {
+    "paramName":"sg",
+    "paramLongName":"saveGraph",
+    "paramDescription": "true if the new version of the graph must be saved",
+    "paramRequired": false
+  },
+  {
+    "paramName": "ssm",
+    "paramLongName": "isSparkSessionManaged",
+    "paramDescription": "true if the spark session is managed, false otherwise",
+    "paramRequired": false
+  },
+  {
+    "paramName": "out",
+    "paramLongName": "outputPath",
+    "paramDescription": "the path used to store temporary output files",
+    "paramRequired": true
+  },
+  {
+  "paramName":"tn",
+  "paramLongName":"resultTableName",
+  "paramDescription": "the name of the result table we are currently working on",
+  "paramRequired": true
+ }
+]
--- a/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_propagationresultcommunityfromsemrel_parameters.json
+++ b/dhp-workflows/dhp-propagation/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_propagationresultcommunityfromsemrel_parameters.json
@ -28,6 +28,18 @@
    "paramLongName":"hive_metastore_uris",
    "paramDescription": "the hive metastore uris",
    "paramRequired": true
+  },
+  {
+    "paramName":"wu",
+    "paramLongName":"writeUpdate",
+    "paramDescription": "true if the update must be writte. No double check if information is already present",
+    "paramRequired": true
+  },
+  {
+    "paramName":"sg",
+    "paramLongName":"saveGraph",
+    "paramDescription": "true if the new version of the graph must be saved",
+    "paramRequired": true
  }

 ]
--- a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java
+++ b/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java
@ -0,0 +1,4 @@
+package eu.dnetlib.dhp.resulttocommunityfromorganization;
+
+public class ResultToCommunityJobTest {
+}
--- a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java
+++ b/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java
@ -0,0 +1,4 @@
+package eu.dnetlib.dhp.resulttocommunityfromsemrel;
+
+public class ResultToCommunityJobTest {
+}