Merge remote-tracking branch 'upstream/master'

2020-03-16 12:45:23 +01:00 · 2020-03-16 12:45:23 +01:00 · ab08a37024
parent e80f80ca93 af835f2f98
commit ab08a37024
61 changed files with 3180 additions and 954 deletions
--- a/dhp-doc-resources/img/data_provision_workflow.png
+++ b/dhp-doc-resources/img/data_provision_workflow.png
--- a/dhp-schemas/pom.xml
+++ b/dhp-schemas/pom.xml
@ -30,6 +30,12 @@
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-databind</artifactId>
        </dependency>
         <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>${junit.version}</version>
        </dependency>
        <dependency>
            <groupId>eu.dnetlib.dhp</groupId>
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java
@ -1,66 +1,83 @@
 package eu.dnetlib.dhp.schema.oaf;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import org.junit.Assert;
 public class Relation extends Oaf {
-    private String relType;
+	private String relType;
-    private String subRelType;
+	private String subRelType;
-    private String relClass;
+	private String relClass;
-    private String source;
+	private String source;
-    private String target;
+	private String target;
-    private List<KeyValue> collectedFrom;
+	private List<KeyValue> collectedFrom = new ArrayList<>();
-    public String getRelType() {
+	public String getRelType() {
-        return relType;
+		return relType;
-    }
+	}
-    public void setRelType(String relType) {
+	public void setRelType(final String relType) {
-        this.relType = relType;
+		this.relType = relType;
-    }
+	}
-    public String getSubRelType() {
+	public String getSubRelType() {
-        return subRelType;
+		return subRelType;
-    }
+	}
-    public void setSubRelType(String subRelType) {
+	public void setSubRelType(final String subRelType) {
-        this.subRelType = subRelType;
+		this.subRelType = subRelType;
-    }
+	}
-    public String getRelClass() {
+	public String getRelClass() {
-        return relClass;
+		return relClass;
-    }
+	}
-    public void setRelClass(String relClass) {
+	public void setRelClass(final String relClass) {
-        this.relClass = relClass;
+		this.relClass = relClass;
-    }
+	}
-    public String getSource() {
+	public String getSource() {
-        return source;
+		return source;
-    }
+	}
-    public void setSource(String source) {
+	public void setSource(final String source) {
-        this.source = source;
+		this.source = source;
-    }
+	}
-    public String getTarget() {
+	public String getTarget() {
-        return target;
+		return target;
-    }
+	}
-    public void setTarget(String target) {
+	public void setTarget(final String target) {
-        this.target = target;
+		this.target = target;
-    }
+	}
-    public List<KeyValue> getCollectedFrom() {
+	public List<KeyValue> getCollectedFrom() {
-        return collectedFrom;
+		return collectedFrom;
-    }
+	}
 	public void setCollectedFrom(final List<KeyValue> collectedFrom) {
 		this.collectedFrom = collectedFrom;
 	}
 	public void mergeFrom(final Relation r) {
 		Assert.assertEquals("source ids must be equal", getSource(), r.getSource());
 		Assert.assertEquals("target ids must be equal", getTarget(), r.getTarget());
 		Assert.assertEquals("relType(s) must be equal", getRelType(), r.getRelType());
 		Assert.assertEquals("subRelType(s) must be equal", getSubRelType(), r.getSubRelType());
 		Assert.assertEquals("relClass(es) must be equal", getRelClass(), r.getRelClass());
 		setCollectedFrom(Stream.concat(getCollectedFrom().stream(), r.getCollectedFrom().stream())
 				.distinct() // relies on KeyValue.equals
 				.collect(Collectors.toList()));
 	}
    public void setCollectedFrom(List<KeyValue> collectedFrom) {
        this.collectedFrom = collectedFrom;
    }
 }
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Result.java
@ -4,7 +4,7 @@ import java.io.Serializable;
 import java.util.Comparator;
 import java.util.List;
-public abstract class Result extends OafEntity implements Serializable {
+public class Result extends OafEntity implements Serializable {
    private List<Author> author;
--- a/dhp-workflows/dhp-aggregation/pom.xml
+++ b/dhp-workflows/dhp-aggregation/pom.xml
@ -24,6 +24,12 @@
            <groupId>eu.dnetlib.dhp</groupId>
            <artifactId>dhp-common</artifactId>
            <version>${project.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>com.sun.xml.bind</groupId>
                    <artifactId>jaxb-core</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
         <dependency>
@ -32,6 +38,49 @@
            <version>${project.version}</version>
        </dependency>
        <dependency>
            <groupId>eu.dnetlib</groupId>
            <artifactId>dnet-actionmanager-common</artifactId>
            <exclusions>
                <exclusion>
                    <groupId>eu.dnetlib</groupId>
                    <artifactId>dnet-openaireplus-mapping-utils</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>saxonica</groupId>
                    <artifactId>saxon</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>saxonica</groupId>
                    <artifactId>saxon-dom</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>jgrapht</groupId>
                    <artifactId>jgrapht</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>net.sf.ehcache</groupId>
                    <artifactId>ehcache</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.springframework</groupId>
                    <artifactId>spring-test</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.*</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>apache</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>eu.dnetlib</groupId>
            <artifactId>dnet-openaire-data-protos</artifactId>
        </dependency>
        <dependency>
            <groupId>net.sf.saxon</groupId>
            <artifactId>Saxon-HE</artifactId>
@ -55,6 +104,11 @@
 			<groupId>org.mongodb</groupId>
 			<artifactId>mongo-java-driver</artifactId>
 		</dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-distcp</artifactId>
        </dependency>
 		<dependency>
            <groupId>org.postgresql</groupId>
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/ExtractEntitiesFromHDFSJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/ExtractEntitiesFromHDFSJob.java
@ -1,56 +0,0 @@
 package eu.dnetlib.dhp.migration;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.io.Text;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.SparkSession;
 import scala.Tuple2;
 import java.util.Arrays;
 import java.util.List;
 public class ExtractEntitiesFromHDFSJob {
    private static List<String> folderNames = Arrays.asList("db_entities", "oaf_entities", "odf_entities");
    public static void main(String[] args) throws Exception {
        final ArgumentApplicationParser parser = new ArgumentApplicationParser(
                IOUtils.toString(MigrateMongoMdstoresApplication.class.getResourceAsStream("/eu/dnetlib/dhp/migration/extract_entities_from_hdfs_parameters.json")));
        parser.parseArgument(args);
        final SparkSession spark = SparkSession
                .builder()
                .appName(ExtractEntitiesFromHDFSJob.class.getSimpleName())
                .master(parser.get("master"))
                .getOrCreate();
        final String sourcePath = parser.get("sourcePath");
        final String targetPath = parser.get("graphRawPath");
        final String entity = parser.get("entity");
        final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
        JavaRDD<String> inputRdd = sc.emptyRDD();
        folderNames.forEach(p -> inputRdd.union(
                sc.sequenceFile(sourcePath+"/"+p, Text.class, Text.class)
                    .map(k -> new Tuple2<>(k._1().toString(), k._2().toString()))
                        .filter(k -> isEntityType(k._1(), entity))
                        .map(Tuple2::_2))
        );
        inputRdd.saveAsTextFile(targetPath+"/"+entity);
    }
    private static boolean isEntityType(final String item, final String entity) {
        return StringUtils.substringAfter(item, ":").equalsIgnoreCase(entity);
    }
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrateMongoMdstoresApplication.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/MigrateMongoMdstoresApplication.java
@ -1,45 +0,0 @@
 package eu.dnetlib.dhp.migration;
 import org.apache.commons.io.IOUtils;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 public class MigrateMongoMdstoresApplication {
 	public static void main(final String[] args) throws Exception {
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
 				IOUtils.toString(MigrateMongoMdstoresApplication.class.getResourceAsStream("/eu/dnetlib/dhp/migration/migrate_mongo_mstores_parameters.json")));
 		parser.parseArgument(args);
 		final String mongoBaseUrl = parser.get("mongoBaseUrl");
 		final String mongoDb = parser.get("mongoDb");
 		final String mdFormat = parser.get("mdFormat");
 		final String mdLayout = parser.get("mdLayout");
 		final String mdInterpretation = parser.get("mdInterpretation");
 		final String hdfsPath = parser.get("hdfsPath");
 		final String hdfsNameNode = parser.get("namenode");
 		final String hdfsUser = parser.get("hdfsUser");
 		final String dbUrl = parser.get("postgresUrl");
 		final String dbUser = parser.get("postgresUser");
 		final String dbPassword = parser.get("postgresPassword");
 		if (mdFormat.equalsIgnoreCase("oaf")) {
 			try (final OafMigrationExecutor mig =
 					new OafMigrationExecutor(hdfsPath, hdfsNameNode, hdfsUser, mongoBaseUrl, mongoDb, dbUrl, dbUser, dbPassword)) {
 				mig.processMdRecords(mdFormat, mdLayout, mdInterpretation);
 			}
 		} else if (mdFormat.equalsIgnoreCase("odf")) {
 			try (final OdfMigrationExecutor mig =
 					new OdfMigrationExecutor(hdfsPath, hdfsNameNode, hdfsUser, mongoBaseUrl, mongoDb, dbUrl, dbUser, dbPassword)) {
 				mig.processMdRecords(mdFormat, mdLayout, mdInterpretation);
 			}
 		} else {
 			throw new RuntimeException("Format not supported: " + mdFormat);
 		}
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/actions/LicenseComparator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/actions/LicenseComparator.java
@ -0,0 +1,49 @@
 package eu.dnetlib.dhp.migration.actions;
 import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
 import java.util.Comparator;
 public class LicenseComparator implements Comparator<Qualifier> {
    @Override
    public int compare(Qualifier left, Qualifier right) {
        if (left == null && right == null) return 0;
        if (left == null) return 1;
        if (right == null) return -1;
        String lClass = left.getClassid();
        String rClass = right.getClassid();
        if (lClass.equals(rClass)) return 0;
        if (lClass.equals("OPEN SOURCE")) return -1;
        if (rClass.equals("OPEN SOURCE")) return 1;
        if (lClass.equals("OPEN")) return -1;
        if (rClass.equals("OPEN")) return 1;
        if (lClass.equals("6MONTHS")) return -1;
        if (rClass.equals("6MONTHS")) return 1;
        if (lClass.equals("12MONTHS")) return -1;
        if (rClass.equals("12MONTHS")) return 1;
        if (lClass.equals("EMBARGO")) return -1;
        if (rClass.equals("EMBARGO")) return 1;
        if (lClass.equals("RESTRICTED")) return -1;
        if (rClass.equals("RESTRICTED")) return 1;
        if (lClass.equals("CLOSED")) return -1;
        if (rClass.equals("CLOSED")) return 1;
        if (lClass.equals("UNKNOWN")) return -1;
        if (rClass.equals("UNKNOWN")) return 1;
        // Else (but unlikely), lexicographical ordering will do.
        return lClass.compareTo(rClass);
    }
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/actions/MigrateActionSet.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/actions/MigrateActionSet.java
@ -0,0 +1,170 @@
 package eu.dnetlib.dhp.migration.actions;
 import com.google.common.base.Splitter;
 import com.google.common.collect.Lists;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.utils.ISLookupClientFactory;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.tools.DistCp;
 import org.apache.hadoop.tools.DistCpOptions;
 import org.apache.hadoop.util.ToolRunner;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.OutputStream;
 import java.util.*;
 import java.util.stream.Collectors;
 public class MigrateActionSet {
    private static final Log log = LogFactory.getLog(MigrateActionSet.class);
    private static final String SEPARATOR = "/";
    private static final String TARGET_PATHS = "target_paths";
    private static final String RAWSET_PREFIX = "rawset_";
    private static Boolean DEFAULT_TRANSFORM_ONLY = false;
    public static void main(String[] args) throws Exception {
        final ArgumentApplicationParser parser = new ArgumentApplicationParser(
                IOUtils.toString(MigrateActionSet.class.getResourceAsStream(
                        "/eu/dnetlib/dhp/migration/migrate_actionsets_parameters.json")));
        parser.parseArgument(args);
        new MigrateActionSet().run(parser);
    }
    private void run(ArgumentApplicationParser parser) throws Exception {
        final String isLookupUrl = parser.get("isLookupUrl");
        final String sourceNN = parser.get("sourceNameNode");
        final String targetNN = parser.get("targetNameNode");
        final String workDir  = parser.get("workingDirectory");
        final Integer distcp_num_maps = Integer.parseInt(parser.get("distcp_num_maps"));
        final String distcp_memory_mb = parser.get("distcp_memory_mb");
        final String distcp_task_timeout = parser.get("distcp_task_timeout");
        final String transform_only_s = parser.get("transform_only");
        log.info("transform only param: " + transform_only_s);
        final Boolean transformOnly = Boolean.valueOf(parser.get("transform_only"));
        log.info("transform only: " + transformOnly);
        ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
        Configuration conf = getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
        FileSystem targetFS = FileSystem.get(conf);
        Configuration sourceConf = getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
        sourceConf.set(FileSystem.FS_DEFAULT_NAME_KEY, sourceNN);
        FileSystem sourceFS = FileSystem.get(sourceConf);
        Properties props = new Properties();
        List<Path> targetPaths = new ArrayList<>();
        final List<Path> sourcePaths = getSourcePaths(sourceNN, isLookUp);
        log.info(String.format("paths to process:\n%s", sourcePaths.stream().map(p -> p.toString()).collect(Collectors.joining("\n"))));
        for(Path source : sourcePaths) {
            if (!sourceFS.exists(source)) {
                log.warn(String.format("skipping unexisting path: %s", source));
            } else {
                LinkedList<String> pathQ = Lists.newLinkedList(Splitter.on(SEPARATOR).split(source.toUri().getPath()));
                final String rawSet = pathQ.pollLast();
                log.info(String.format("got RAWSET: %s", rawSet));
                if (StringUtils.isNotBlank(rawSet) && rawSet.startsWith(RAWSET_PREFIX)) {
                    final String actionSetDirectory = pathQ.pollLast();
                    final Path targetPath = new Path(targetNN + workDir + SEPARATOR + actionSetDirectory + SEPARATOR + rawSet);
                    log.info(String.format("using TARGET PATH: %s", targetPath));
                    if (!transformOnly) {
                        if (targetFS.exists(targetPath)) {
                            targetFS.delete(targetPath, true);
                        }
                        runDistcp(distcp_num_maps, distcp_memory_mb, distcp_task_timeout, conf, source, targetPath);
                    }
                    targetPaths.add(targetPath);
                }
            }
        }
        props.setProperty(TARGET_PATHS, targetPaths
                .stream()
                .map(p -> p.toString())
                .collect(Collectors.joining(",")));
        File file = new File(System.getProperty("oozie.action.output.properties"));
        try(OutputStream os = new FileOutputStream(file)) {
            props.store(os, "");
        }
        System.out.println(file.getAbsolutePath());
    }
    private void runDistcp(Integer distcp_num_maps, String distcp_memory_mb, String distcp_task_timeout, Configuration conf, Path source, Path targetPath) throws Exception {
        final DistCpOptions op = new DistCpOptions(source, targetPath);
        op.setMaxMaps(distcp_num_maps);
        op.preserve(DistCpOptions.FileAttribute.BLOCKSIZE);
        op.preserve(DistCpOptions.FileAttribute.REPLICATION);
        op.preserve(DistCpOptions.FileAttribute.CHECKSUMTYPE);
        int res = ToolRunner.run(new DistCp(conf, op), new String[]{
                "-Dmapred.task.timeout=" + distcp_task_timeout,
                "-Dmapreduce.map.memory.mb=" + distcp_memory_mb,
                "-pb",
                "-m " + distcp_num_maps,
                source.toString(),
                targetPath.toString()});
        if (res != 0) {
            throw new RuntimeException(String.format("distcp exited with code %s", res));
        }
    }
    private Configuration getConfiguration(String distcp_task_timeout, String distcp_memory_mb, Integer distcp_num_maps) {
        final Configuration conf = new Configuration();
        conf.set("dfs.webhdfs.socket.connect-timeout", distcp_task_timeout);
        conf.set("dfs.webhdfs.socket.read-timeout", distcp_task_timeout);
        conf.set("dfs.http.client.retry.policy.enabled", "true");
        conf.set("mapred.task.timeout", distcp_task_timeout);
        conf.set("mapreduce.map.memory.mb", distcp_memory_mb);
        conf.set("mapred.map.tasks", String.valueOf(distcp_num_maps));
        return conf;
    }
    private List<Path> getSourcePaths(String sourceNN, ISLookUpService isLookUp) throws ISLookUpException {
        String XQUERY = "distinct-values(\n" +
                "let $basePath := collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()\n" +
                "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') \n" +
                "let $setDir := $x//SET/@directory/string()\n" +
                "let $rawSet := $x//RAW_SETS/LATEST/@id/string()\n" +
                "return concat($basePath, '/', $setDir, '/', $rawSet))";
        log.info(String.format("running xquery:\n%s", XQUERY));
        return isLookUp.quickSearchProfile(XQUERY)
                .stream()
                .map(p -> sourceNN + p)
                .map(Path::new)
                .collect(Collectors.toList());
    }
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/actions/ProtoConverter.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/actions/ProtoConverter.java
@ -0,0 +1,580 @@
 package eu.dnetlib.dhp.migration.actions;
 import com.google.common.collect.Lists;
 import com.googlecode.protobuf.format.JsonFormat;
 import eu.dnetlib.data.proto.*;
 import eu.dnetlib.dhp.schema.oaf.*;
 import org.apache.commons.lang3.StringUtils;
 import java.io.Serializable;
 import java.util.List;
 import java.util.Optional;
 import java.util.stream.Collectors;
 public class ProtoConverter implements Serializable {
    public static final String UNKNOWN = "UNKNOWN";
    public static final String NOT_AVAILABLE = "not available";
    public static final String DNET_ACCESS_MODES = "dnet:access_modes";
    public static Oaf convert(OafProtos.Oaf oaf) {
        try {
            switch (oaf.getKind()) {
                case entity:
                    return convertEntity(oaf);
                case relation:
                    return convertRelation(oaf);
                default:
                    throw new IllegalArgumentException("invalid kind " + oaf.getKind());
            }
        } catch (Throwable e) {
            throw new RuntimeException("error on getting " + JsonFormat.printToString(oaf), e);
        }
    }
    private static Relation convertRelation(OafProtos.Oaf oaf) {
        final OafProtos.OafRel r = oaf.getRel();
        final Relation rel = new Relation();
        rel.setDataInfo(mapDataInfo(oaf.getDataInfo()));
        rel.setLastupdatetimestamp(oaf.getLastupdatetimestamp());
        rel.setSource(r.getSource());
        rel.setTarget(r.getTarget());
        rel.setRelType(r.getRelType().toString());
        rel.setSubRelType(r.getSubRelType().toString());
        rel.setRelClass(r.getRelClass());
        rel.setCollectedFrom(r.getCollectedfromCount() > 0 ?
                r.getCollectedfromList().stream()
                        .map(kv -> mapKV(kv))
                        .collect(Collectors.toList()) : null);
        return rel;
    }
    private static OafEntity convertEntity(OafProtos.Oaf oaf) {
        switch (oaf.getEntity().getType()) {
            case result:
                final Result r = convertResult(oaf);
                r.setInstance(convertInstances(oaf));
                return r;
            case project:
                return convertProject(oaf);
            case datasource:
                return convertDataSource(oaf);
            case organization:
                return convertOrganization(oaf);
            default:
                throw new RuntimeException("received unknown type");
        }
    }
    private static List<Instance> convertInstances(OafProtos.Oaf oaf) {
        final ResultProtos.Result r = oaf.getEntity().getResult();
        if (r.getInstanceCount() > 0) {
            return r.getInstanceList()
                    .stream()
                    .map(i -> convertInstance(i))
                    .collect(Collectors.toList());
        }
        return Lists.newArrayList();
    }
    private static Instance convertInstance(ResultProtos.Result.Instance ri) {
        final Instance i = new Instance();
        i.setAccessright(mapQualifier(ri.getAccessright()));
        i.setCollectedfrom(mapKV(ri.getCollectedfrom()));
        i.setDateofacceptance(mapStringField(ri.getDateofacceptance()));
        i.setDistributionlocation(ri.getDistributionlocation());
        i.setHostedby(mapKV(ri.getHostedby()));
        i.setInstancetype(mapQualifier(ri.getInstancetype()));
        i.setLicense(mapStringField(ri.getLicense()));
        i.setUrl(ri.getUrlList());
        i.setRefereed(mapStringField(ri.getRefereed()));
        i.setProcessingchargeamount(mapStringField(ri.getProcessingchargeamount()));
        i.setProcessingchargecurrency(mapStringField(ri.getProcessingchargecurrency()));
        return i;
    }
    private static Organization convertOrganization(OafProtos.Oaf oaf) {
        final OrganizationProtos.Organization.Metadata m = oaf.getEntity().getOrganization().getMetadata();
        final Organization org = setOaf(new Organization(), oaf);
        setEntity(org, oaf);
        org.setLegalshortname(mapStringField(m.getLegalshortname()));
        org.setLegalname(mapStringField(m.getLegalname()));
        org.setAlternativeNames(m.getAlternativeNamesList().
                stream()
                .map(ProtoConverter::mapStringField)
                .collect(Collectors.toList()));
        org.setWebsiteurl(mapStringField(m.getWebsiteurl()));
        org.setLogourl(mapStringField(m.getLogourl()));
        org.setEclegalbody(mapStringField(m.getEclegalbody()));
        org.setEclegalperson(mapStringField(m.getEclegalperson()));
        org.setEcnonprofit(mapStringField(m.getEcnonprofit()));
        org.setEcresearchorganization(mapStringField(m.getEcresearchorganization()));
        org.setEchighereducation(mapStringField(m.getEchighereducation()));
        org.setEcinternationalorganizationeurinterests(mapStringField(m.getEcinternationalorganizationeurinterests()));
        org.setEcinternationalorganization(mapStringField(m.getEcinternationalorganization()));
        org.setEcenterprise(mapStringField(m.getEcenterprise()));
        org.setEcsmevalidated(mapStringField(m.getEcsmevalidated()));
        org.setEcnutscode(mapStringField(m.getEcnutscode()));
        org.setCountry(mapQualifier(m.getCountry()));
        return org;
    }
    private static Datasource convertDataSource(OafProtos.Oaf oaf) {
        final DatasourceProtos.Datasource.Metadata m = oaf.getEntity().getDatasource().getMetadata();
        final Datasource datasource = setOaf(new Datasource(), oaf);
        setEntity(datasource, oaf);
        datasource.setAccessinfopackage(m.getAccessinfopackageList()
                .stream()
                .map(ProtoConverter::mapStringField)
                .collect(Collectors.toList()));
        datasource.setCertificates(mapStringField(m.getCertificates()));
        datasource.setCitationguidelineurl(mapStringField(m.getCitationguidelineurl()));
        datasource.setContactemail(mapStringField(m.getContactemail()));
        datasource.setDatabaseaccessrestriction(mapStringField(m.getDatabaseaccessrestriction()));
        datasource.setDatabaseaccesstype(mapStringField(m.getDatabaseaccesstype()));
        datasource.setDataprovider(mapBoolField(m.getDataprovider()));
        datasource.setDatasourcetype(mapQualifier(m.getDatasourcetype()));
        datasource.setDatauploadrestriction(mapStringField(m.getDatauploadrestriction()));
        datasource.setCitationguidelineurl(mapStringField(m.getCitationguidelineurl()));
        datasource.setDatauploadtype(mapStringField(m.getDatauploadtype()));
        datasource.setDateofvalidation(mapStringField(m.getDateofvalidation()));
        datasource.setDescription(mapStringField(m.getDescription()));
        datasource.setEnglishname(mapStringField(m.getEnglishname()));
        datasource.setLatitude(mapStringField(m.getLatitude()));
        datasource.setLongitude(mapStringField(m.getLongitude()));
        datasource.setLogourl(mapStringField(m.getLogourl()));
        datasource.setMissionstatementurl(mapStringField(m.getMissionstatementurl()));
        datasource.setNamespaceprefix(mapStringField(m.getNamespaceprefix()));
        datasource.setOdcontenttypes(m.getOdcontenttypesList()
                .stream()
                .map(ProtoConverter::mapStringField)
                .collect(Collectors.toList()));
        datasource.setOdlanguages(m.getOdlanguagesList()
                .stream()
                .map(ProtoConverter::mapStringField)
                .collect(Collectors.toList()));
        datasource.setOdnumberofitems(mapStringField(m.getOdnumberofitems()));
        datasource.setOdnumberofitemsdate(mapStringField(m.getOdnumberofitemsdate()));
        datasource.setOdpolicies(mapStringField(m.getOdpolicies()));
        datasource.setOfficialname(mapStringField(m.getOfficialname()));
        datasource.setOpenairecompatibility(mapQualifier(m.getOpenairecompatibility()));
        datasource.setPidsystems(mapStringField(m.getPidsystems()));
        datasource.setPolicies(m.getPoliciesList()
                .stream()
                .map(ProtoConverter::mapKV)
                .collect(Collectors.toList()));
        datasource.setQualitymanagementkind(mapStringField(m.getQualitymanagementkind()));
        datasource.setReleaseenddate(mapStringField(m.getReleaseenddate()));
        datasource.setServiceprovider(mapBoolField(m.getServiceprovider()));
        datasource.setReleasestartdate(mapStringField(m.getReleasestartdate()));
        datasource.setSubjects(m.getSubjectsList()
                .stream()
                .map(ProtoConverter::mapStructuredProperty)
                .collect(Collectors.toList()));
        datasource.setVersioning(mapBoolField(m.getVersioning()));
        datasource.setWebsiteurl(mapStringField(m.getWebsiteurl()));
        datasource.setJournal(mapJournal(m.getJournal()));
        return datasource;
    }
    private static Project convertProject(OafProtos.Oaf oaf) {
        final ProjectProtos.Project.Metadata m = oaf.getEntity().getProject().getMetadata();
        final Project project = setOaf(new Project(), oaf);
        setEntity(project, oaf);
        project.setAcronym(mapStringField(m.getAcronym()));
        project.setCallidentifier(mapStringField(m.getCallidentifier()));
        project.setCode(mapStringField(m.getCode()));
        project.setContactemail(mapStringField(m.getContactemail()));
        project.setContactfax(mapStringField(m.getContactfax()));
        project.setContactfullname(mapStringField(m.getContactfullname()));
        project.setContactphone(mapStringField(m.getContactphone()));
        project.setContracttype(mapQualifier(m.getContracttype()));
        project.setCurrency(mapStringField(m.getCurrency()));
        project.setDuration(mapStringField(m.getDuration()));
        project.setEcarticle29_3(mapStringField(m.getEcarticle293()));
        project.setEcsc39(mapStringField(m.getEcsc39()));
        project.setOamandatepublications(mapStringField(m.getOamandatepublications()));
        project.setStartdate(mapStringField(m.getStartdate()));
        project.setEnddate(mapStringField(m.getEnddate()));
        project.setFundedamount(m.getFundedamount());
        project.setTotalcost(m.getTotalcost());
        project.setKeywords(mapStringField(m.getKeywords()));
        project.setSubjects(m.getSubjectsList().stream()
                .map(sp -> mapStructuredProperty(sp))
                .collect(Collectors.toList()));
        project.setTitle(mapStringField(m.getTitle()));
        project.setWebsiteurl(mapStringField(m.getWebsiteurl()));
        project.setFundingtree(m.getFundingtreeList().stream()
                .map(f -> mapStringField(f))
                .collect(Collectors.toList()));
        project.setJsonextrainfo(mapStringField(m.getJsonextrainfo()));
        project.setSummary(mapStringField(m.getSummary()));
        project.setOptional1(mapStringField(m.getOptional1()));
        project.setOptional2(mapStringField(m.getOptional2()));
        return project;
    }
    private static Result convertResult(OafProtos.Oaf oaf) {
        switch (oaf.getEntity().getResult().getMetadata().getResulttype().getClassid()) {
            case "dataset":
                return createDataset(oaf);
            case "publication":
                return createPublication(oaf);
            case "software":
                return createSoftware(oaf);
            case "other":
                return createORP(oaf);
            default:
                Result result = setOaf(new Result(), oaf);
                setEntity(result, oaf);
                return setResult(result, oaf);
        }
    }
    private static Software createSoftware(OafProtos.Oaf oaf) {
        ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
        Software software = setOaf(new Software(), oaf);
        setEntity(software, oaf);
        setResult(software, oaf);
        software.setDocumentationUrl(m.getDocumentationUrlList()
                .stream()
                .map(ProtoConverter::mapStringField)
                .collect(Collectors.toList()));
        software.setLicense(m.getLicenseList()
                .stream()
                .map(ProtoConverter::mapStructuredProperty)
                .collect(Collectors.toList()));
        software.setCodeRepositoryUrl(mapStringField(m.getCodeRepositoryUrl()));
        software.setProgrammingLanguage(mapQualifier(m.getProgrammingLanguage()));
        return software;
    }
    private static OtherResearchProduct createORP(OafProtos.Oaf oaf) {
        ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
        OtherResearchProduct otherResearchProducts = setOaf(new OtherResearchProduct(), oaf);
        setEntity(otherResearchProducts, oaf);
        setResult(otherResearchProducts, oaf);
        otherResearchProducts.setContactperson(m.getContactpersonList()
                .stream()
                .map(ProtoConverter::mapStringField)
                .collect(Collectors.toList()));
        otherResearchProducts.setContactgroup(m.getContactgroupList()
                .stream()
                .map(ProtoConverter::mapStringField)
                .collect(Collectors.toList()));
        otherResearchProducts.setTool(m.getToolList()
                .stream()
                .map(ProtoConverter::mapStringField)
                .collect(Collectors.toList()));
        return otherResearchProducts;
    }
    private static Publication createPublication(OafProtos.Oaf oaf) {
        ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
        Publication publication = setOaf(new Publication(), oaf);
        setEntity(publication, oaf);
        setResult(publication, oaf);
        publication.setJournal(mapJournal(m.getJournal()));
        return publication;
    }
    private static Dataset createDataset(OafProtos.Oaf oaf) {
        ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
        Dataset dataset = setOaf(new Dataset(), oaf);
        setEntity(dataset, oaf);
        setResult(dataset, oaf);
        dataset.setStoragedate(mapStringField(m.getStoragedate()));
        dataset.setDevice(mapStringField(m.getDevice()));
        dataset.setSize(mapStringField(m.getSize()));
        dataset.setVersion(mapStringField(m.getVersion()));
        dataset.setLastmetadataupdate(mapStringField(m.getLastmetadataupdate()));
        dataset.setMetadataversionnumber(mapStringField(m.getMetadataversionnumber()));
        dataset.setGeolocation(m.getGeolocationList()
                .stream()
                .map(ProtoConverter::mapGeolocation)
                .collect(Collectors.toList()));
        return dataset;
    }
    public static <T extends Oaf> T setOaf(T oaf, OafProtos.Oaf o) {
        oaf.setDataInfo(mapDataInfo(o.getDataInfo()));
        oaf.setLastupdatetimestamp(o.getLastupdatetimestamp());
        return oaf;
    }
    public static <T extends OafEntity> T setEntity(T entity, OafProtos.Oaf oaf) {
        //setting Entity fields
        final OafProtos.OafEntity e = oaf.getEntity();
        entity.setId(e.getId());
        entity.setOriginalId(e.getOriginalIdList());
        entity.setCollectedfrom(e.getCollectedfromList()
                .stream()
                .map(ProtoConverter::mapKV)
                .collect(Collectors.toList()));
        entity.setPid(e.getPidList().stream()
                .map(ProtoConverter::mapStructuredProperty)
                .collect(Collectors.toList()));
        entity.setDateofcollection(e.getDateofcollection());
        entity.setDateoftransformation(e.getDateoftransformation());
        entity.setExtraInfo(e.getExtraInfoList()
                .stream()
                .map(ProtoConverter::mapExtraInfo)
                .collect(Collectors.toList()));
        return entity;
    }
    public static <T extends Result> T setResult(T entity, OafProtos.Oaf oaf) {
        //setting Entity fields
        final ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
        entity.setAuthor(m.getAuthorList()
                .stream()
                .map(ProtoConverter::mapAuthor)
                .collect(Collectors.toList()));
        entity.setResulttype(mapQualifier(m.getResulttype()));
        entity.setLanguage(mapQualifier(m.getLanguage()));
        entity.setCountry(m.getCountryList()
                .stream()
                .map(ProtoConverter::mapQualifierAsCountry)
                .collect(Collectors.toList()));
        entity.setSubject(m.getSubjectList()
                .stream()
                .map(ProtoConverter::mapStructuredProperty)
                .collect(Collectors.toList()));
        entity.setTitle(m.getTitleList()
                .stream()
                .map(ProtoConverter::mapStructuredProperty)
                .collect(Collectors.toList()));
        entity.setRelevantdate(m.getRelevantdateList()
                .stream()
                .map(ProtoConverter::mapStructuredProperty)
                .collect(Collectors.toList()));
        entity.setDescription(m.getDescriptionList()
                .stream()
                .map(ProtoConverter::mapStringField)
                .collect(Collectors.toList()));
        entity.setDateofacceptance(mapStringField(m.getDateofacceptance()));
        entity.setPublisher(mapStringField(m.getPublisher()));
        entity.setEmbargoenddate(mapStringField(m.getEmbargoenddate()));
        entity.setSource(m.getSourceList()
                .stream()
                .map(ProtoConverter::mapStringField)
                .collect(Collectors.toList()));
        entity.setFulltext(m.getFulltextList()
                .stream()
                .map(ProtoConverter::mapStringField)
                .collect(Collectors.toList()));
        entity.setFormat(m.getFormatList()
                .stream()
                .map(ProtoConverter::mapStringField)
                .collect(Collectors.toList()));
        entity.setContributor(m.getContributorList()
                .stream()
                .map(ProtoConverter::mapStringField)
                .collect(Collectors.toList()));
        entity.setResourcetype(mapQualifier(m.getResourcetype()));
        entity.setCoverage(m.getCoverageList()
                .stream()
                .map(ProtoConverter::mapStringField)
                .collect(Collectors.toList()));
        entity.setContext(m.getContextList()
                .stream()
                .map(ProtoConverter::mapContext)
                .collect(Collectors.toList()));
        entity.setBestaccessright(getBestAccessRights(oaf.getEntity().getResult().getInstanceList()));
        return entity;
    }
    private static Qualifier getBestAccessRights(List<ResultProtos.Result.Instance> instanceList) {
        if (instanceList != null) {
            final Optional<FieldTypeProtos.Qualifier> min = instanceList.stream()
                    .map(i -> i.getAccessright()).min(new LicenseComparator());
            final Qualifier rights = min.isPresent() ? mapQualifier(min.get()) : new Qualifier();
            if (StringUtils.isBlank(rights.getClassid())) {
                rights.setClassid(UNKNOWN);
            }
            if (StringUtils.isBlank(rights.getClassname()) || UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
                rights.setClassname(NOT_AVAILABLE);
            }
            if (StringUtils.isBlank(rights.getSchemeid())) {
                rights.setSchemeid(DNET_ACCESS_MODES);
            }
            if (StringUtils.isBlank(rights.getSchemename())) {
                rights.setSchemename(DNET_ACCESS_MODES);
            }
            return rights;
        }
        return null;
    }
    private static Context mapContext(ResultProtos.Result.Context context) {
        final Context entity = new Context();
        entity.setId(context.getId());
        entity.setDataInfo(context.getDataInfoList()
                .stream()
                .map(ProtoConverter::mapDataInfo)
                .collect(Collectors.toList()));
        return entity;
    }
    public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
        final KeyValue keyValue = new KeyValue();
        keyValue.setKey(kv.getKey());
        keyValue.setValue(kv.getValue());
        keyValue.setDataInfo(mapDataInfo(kv.getDataInfo()));
        return keyValue;
    }
    public static DataInfo mapDataInfo(FieldTypeProtos.DataInfo d) {
        final DataInfo dataInfo = new DataInfo();
        dataInfo.setDeletedbyinference(d.getDeletedbyinference());
        dataInfo.setInferenceprovenance(d.getInferenceprovenance());
        dataInfo.setInferred(d.getInferred());
        dataInfo.setInvisible(d.getInvisible());
        dataInfo.setProvenanceaction(mapQualifier(d.getProvenanceaction()));
        dataInfo.setTrust(d.getTrust());
        return dataInfo;
    }
    public static Qualifier mapQualifier(FieldTypeProtos.Qualifier q) {
        final Qualifier qualifier = new Qualifier();
        qualifier.setClassid(q.getClassid());
        qualifier.setClassname(q.getClassname());
        qualifier.setSchemeid(q.getSchemeid());
        qualifier.setSchemename(q.getSchemename());
        return qualifier;
    }
    public static Country mapQualifierAsCountry(FieldTypeProtos.Qualifier q) {
        final Country c = new Country();
        c.setClassid(q.getClassid());
        c.setClassname(q.getClassname());
        c.setSchemeid(q.getSchemeid());
        c.setSchemename(q.getSchemename());
        c.setDataInfo(mapDataInfo(q.getDataInfo()));
        return c;
    }
    public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) {
        final StructuredProperty structuredProperty = new StructuredProperty();
        structuredProperty.setValue(sp.getValue());
        structuredProperty.setQualifier(mapQualifier(sp.getQualifier()));
        structuredProperty.setDataInfo(mapDataInfo(sp.getDataInfo()));
        return structuredProperty;
    }
    public static ExtraInfo mapExtraInfo(FieldTypeProtos.ExtraInfo extraInfo) {
        final ExtraInfo entity = new ExtraInfo();
        entity.setName(extraInfo.getName());
        entity.setTypology(extraInfo.getTypology());
        entity.setProvenance(extraInfo.getProvenance());
        entity.setTrust(extraInfo.getTrust());
        entity.setValue(extraInfo.getValue());
        return entity;
    }
    public static OAIProvenance mapOAIProvenance(FieldTypeProtos.OAIProvenance oaiProvenance) {
        final OAIProvenance entity = new OAIProvenance();
        entity.setOriginDescription(mapOriginalDescription(oaiProvenance.getOriginDescription()));
        return entity;
    }
    public static OriginDescription mapOriginalDescription(FieldTypeProtos.OAIProvenance.OriginDescription originDescription) {
        final OriginDescription originDescriptionResult = new OriginDescription();
        originDescriptionResult.setHarvestDate(originDescription.getHarvestDate());
        originDescriptionResult.setAltered(originDescription.getAltered());
        originDescriptionResult.setBaseURL(originDescription.getBaseURL());
        originDescriptionResult.setIdentifier(originDescription.getIdentifier());
        originDescriptionResult.setDatestamp(originDescription.getDatestamp());
        originDescriptionResult.setMetadataNamespace(originDescription.getMetadataNamespace());
        return originDescriptionResult;
    }
    public static Field<String> mapStringField(FieldTypeProtos.StringField s) {
        final Field<String> stringField = new Field<>();
        stringField.setValue(s.getValue());
        stringField.setDataInfo(mapDataInfo(s.getDataInfo()));
        return stringField;
    }
    public static Field<Boolean> mapBoolField(FieldTypeProtos.BoolField b) {
        final Field<Boolean> booleanField = new Field<>();
        booleanField.setValue(b.getValue());
        booleanField.setDataInfo(mapDataInfo(b.getDataInfo()));
        return booleanField;
    }
    public static Field<Integer> mapIntField(FieldTypeProtos.IntField b) {
        final Field<Integer> entity = new Field<>();
        entity.setValue(b.getValue());
        entity.setDataInfo(mapDataInfo(b.getDataInfo()));
        return entity;
    }
    public static Journal mapJournal(FieldTypeProtos.Journal j) {
        final Journal journal = new Journal();
        journal.setConferencedate(j.getConferencedate());
        journal.setConferenceplace(j.getConferenceplace());
        journal.setEdition(j.getEdition());
        journal.setEp(j.getEp());
        journal.setIss(j.getIss());
        journal.setIssnLinking(j.getIssnLinking());
        journal.setIssnOnline(j.getIssnOnline());
        journal.setIssnPrinted(j.getIssnPrinted());
        journal.setName(j.getName());
        journal.setSp(j.getSp());
        journal.setVol(j.getVol());
        journal.setDataInfo(mapDataInfo(j.getDataInfo()));
        return journal;
    }
    public static Author mapAuthor(FieldTypeProtos.Author author) {
        final Author entity = new Author();
        entity.setFullname(author.getFullname());
        entity.setName(author.getName());
        entity.setSurname(author.getSurname());
        entity.setRank(author.getRank());
        entity.setPid(author.getPidList()
                .stream()
                .map(kv -> {
                    final StructuredProperty sp = new StructuredProperty();
                    sp.setValue(kv.getValue());
                    final Qualifier q = new Qualifier();
                    q.setClassid(kv.getKey());
                    q.setClassname(kv.getKey());
                    sp.setQualifier(q);
                    return sp;
                })
                .collect(Collectors.toList()));
        entity.setAffiliation(author.getAffiliationList()
                .stream()
                .map(ProtoConverter::mapStringField)
                .collect(Collectors.toList()));
        return entity;
    }
    public static GeoLocation mapGeolocation(ResultProtos.Result.GeoLocation geoLocation) {
        final GeoLocation entity = new GeoLocation();
        entity.setPoint(geoLocation.getPoint());
        entity.setBox(geoLocation.getBox());
        entity.setPlace(geoLocation.getPlace());
        return entity;
    }
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/actions/TransformActions.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/actions/TransformActions.java
@ -0,0 +1,159 @@
 package eu.dnetlib.dhp.migration.actions;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.base.Splitter;
 import com.google.common.collect.Lists;
 import com.google.protobuf.InvalidProtocolBufferException;
 import eu.dnetlib.actionmanager.actions.AtomicAction;
 import eu.dnetlib.data.proto.OafProtos;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.oaf.DataInfo;
 import eu.dnetlib.dhp.schema.oaf.Oaf;
 import eu.dnetlib.dhp.schema.oaf.Qualifier;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import eu.dnetlib.dhp.utils.ISLookupClientFactory;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.SparkSession;
 import scala.Tuple2;
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.LinkedList;
 public class TransformActions implements Serializable {
    private static final Log log = LogFactory.getLog(TransformActions.class);
    private static final String SEPARATOR = "/";
    public static void main(String[] args) throws Exception {
        final ArgumentApplicationParser parser = new ArgumentApplicationParser(
                IOUtils.toString(MigrateActionSet.class.getResourceAsStream(
                        "/eu/dnetlib/dhp/migration/transform_actionsets_parameters.json")));
        parser.parseArgument(args);
        new TransformActions().run(parser);
    }
    private void run(ArgumentApplicationParser parser) throws ISLookUpException, IOException {
        final String isLookupUrl = parser.get("isLookupUrl");
        log.info("isLookupUrl: " + isLookupUrl);
        final String inputPaths  = parser.get("inputPaths");
        if (StringUtils.isBlank(inputPaths)) {
            throw new RuntimeException("empty inputPaths");
        }
        log.info("inputPaths: " + inputPaths);
        final String targetBaseDir = getTargetBaseDir(isLookupUrl);
        try(SparkSession spark = getSparkSession(parser)) {
            final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
            final FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration());
            for(String sourcePath : Lists.newArrayList(Splitter.on(",").split(inputPaths))) {
                LinkedList<String> pathQ = Lists.newLinkedList(Splitter.on(SEPARATOR).split(sourcePath));
                final String rawset = pathQ.pollLast();
                final String actionSetDirectory = pathQ.pollLast();
                final Path targetDirectory = new Path(targetBaseDir + SEPARATOR + actionSetDirectory + SEPARATOR + rawset);
                if (fs.exists(targetDirectory)) {
                    log.info(String.format("found target directory '%s", targetDirectory));
                    fs.delete(targetDirectory, true);
                    log.info(String.format("deleted target directory '%s", targetDirectory));
                }
                log.info(String.format("transforming actions from '%s' to '%s'", sourcePath, targetDirectory));
                sc.sequenceFile(sourcePath, Text.class, Text.class)
                    .mapToPair(a -> new Tuple2<>(a._1(), AtomicAction.fromJSON(a._2().toString())))
                    .mapToPair(a -> new Tuple2<>(a._1(), transformAction(a._1().toString(), a._2())))
                    .saveAsHadoopFile(targetDirectory.toString(), Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
            }
        }
    }
    private Text transformAction(String atomicaActionId, AtomicAction aa) throws InvalidProtocolBufferException, JsonProcessingException {
        final ObjectMapper mapper = new ObjectMapper();
        if (aa.getTargetValue() != null && aa.getTargetValue().length > 0) {
            Oaf oaf = ProtoConverter.convert(OafProtos.Oaf.parseFrom(aa.getTargetValue()));
            aa.setTargetValue(mapper.writeValueAsString(oaf).getBytes());
        } else {
            if (atomicaActionId.contains("dedupSimilarity")) {
                final String[] splitId = atomicaActionId.split("@");
                String source = splitId[0];
                String target = splitId[2];
                String[] relSemantic = splitId[1].split("_");
                Relation rel = new Relation();
                rel.setSource(source);
                rel.setTarget(target);
                rel.setRelType(relSemantic[0]);
                rel.setSubRelType(relSemantic[1]);
                rel.setRelClass(relSemantic[2]);
                DataInfo d = new DataInfo();
                d.setDeletedbyinference(false);
                d.setInferenceprovenance("deduplication");
                d.setInferred(true);
                d.setInvisible(false);
                Qualifier provenanceaction = new Qualifier();
                provenanceaction.setClassid("deduplication");
                provenanceaction.setClassname("deduplication");
                provenanceaction.setSchemeid("dnet:provenanceActions");
                provenanceaction.setSchemename("dnet:provenanceActions");
                d.setProvenanceaction(provenanceaction);
                rel.setDataInfo(d);
                aa.setTargetValue(mapper.writeValueAsString(rel).getBytes());
            }
        }
        return new Text(mapper.writeValueAsString(aa));
    }
    private String getTargetBaseDir(String isLookupUrl) throws ISLookUpException {
        ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
        String XQUERY = "collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()";
        return isLookUp.getResourceProfileByQuery(XQUERY);
    }
    private static SparkSession getSparkSession(ArgumentApplicationParser parser) {
        SparkConf conf = new SparkConf();
        return SparkSession
                .builder()
                .appName(TransformActions.class.getSimpleName())
                .master(parser.get("master"))
                .config(conf)
                .enableHiveSupport()
                .getOrCreate();
    }
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/step1/MigrateDbEntitiesApplication.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/step1/MigrateDbEntitiesApplication.java
@ -1,4 +1,14 @@
-package eu.dnetlib.dhp.migration;
+package eu.dnetlib.dhp.migration.step1;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.asString;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.createOpenaireId;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.dataInfo;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.field;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.journal;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.listFields;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.listKeyValues;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.qualifier;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.structuredProperty;
 import java.io.Closeable;
 import java.io.IOException;
@ -17,18 +27,26 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.migration.utils.AbstractMigrationApplication;
 import eu.dnetlib.dhp.migration.utils.DbClient;
 import eu.dnetlib.dhp.schema.oaf.Context;
 import eu.dnetlib.dhp.schema.oaf.DataInfo;
 import eu.dnetlib.dhp.schema.oaf.Dataset;
 import eu.dnetlib.dhp.schema.oaf.Datasource;
 import eu.dnetlib.dhp.schema.oaf.Field;
 import eu.dnetlib.dhp.schema.oaf.Journal;
 import eu.dnetlib.dhp.schema.oaf.KeyValue;
 import eu.dnetlib.dhp.schema.oaf.Organization;
 import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
 import eu.dnetlib.dhp.schema.oaf.Project;
 import eu.dnetlib.dhp.schema.oaf.Publication;
 import eu.dnetlib.dhp.schema.oaf.Qualifier;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import eu.dnetlib.dhp.schema.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.Software;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
-public class MigrateDbEntitiesApplication extends AbstractMigrationExecutor implements Closeable {
+public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable {
 	private static final Qualifier ENTITYREGISTRY_PROVENANCE_ACTION =
 			qualifier("sysimport:crosswalk:entityregistry", "sysimport:crosswalk:entityregistry", "dnet:provenance_actions", "dnet:provenance_actions");
@ -50,32 +68,36 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationExecutor impl
 		final String dbPassword = parser.get("postgresPassword");
 		final String hdfsPath = parser.get("hdfsPath");
 		final String hdfsNameNode = parser.get("namenode");
 		final String hdfsUser = parser.get("hdfsUser");
-		try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, hdfsNameNode, hdfsUser, dbUrl, dbUser, dbPassword)) {
+		final boolean processClaims = parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims");
 			log.info("Processing datasources...");
 			smdbe.execute("queryDatasources.sql", smdbe::processDatasource);
-			log.info("Processing projects...");
+		try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser, dbPassword)) {
-			smdbe.execute("queryProjects.sql", smdbe::processProject);
+			if (processClaims) {
 				log.info("Processing claims...");
 				smdbe.execute("queryClaims.sql", smdbe::processClaims);
 			} else {
 				log.info("Processing datasources...");
 				smdbe.execute("queryDatasources.sql", smdbe::processDatasource);
-			log.info("Processing orgs...");
+				log.info("Processing projects...");
-			smdbe.execute("queryOrganizations.sql", smdbe::processOrganization);
+				smdbe.execute("queryProjects.sql", smdbe::processProject);
-			log.info("Processing relations ds <-> orgs ...");
+				log.info("Processing orgs...");
-			smdbe.execute("queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization);
+				smdbe.execute("queryOrganizations.sql", smdbe::processOrganization);
-			log.info("Processing projects <-> orgs ...");
+				log.info("Processing relations ds <-> orgs ...");
-			smdbe.execute("queryProjectOrganization.sql", smdbe::processProjectOrganization);
+				smdbe.execute("queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization);
 				log.info("Processing projects <-> orgs ...");
 				smdbe.execute("queryProjectOrganization.sql", smdbe::processProjectOrganization);
 			}
 			log.info("All done.");
 		}
 	}
-	public MigrateDbEntitiesApplication(final String hdfsPath, final String hdfsNameNode, final String hdfsUser, final String dbUrl, final String dbUser,
+	public MigrateDbEntitiesApplication(final String hdfsPath, final String dbUrl, final String dbUser,
 			final String dbPassword) throws Exception {
-		super(hdfsPath, hdfsNameNode, hdfsUser);
+		super(hdfsPath);
 		this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
 		this.lastUpdateTimestamp = new Date().getTime();
 	}
@ -93,7 +115,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationExecutor impl
 			final Datasource ds = new Datasource();
-			ds.setId(createOpenaireId(10, rs.getString("datasourceid")));
+			ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true));
 			ds.setOriginalId(Arrays.asList(rs.getString("datasourceid")));
 			ds.setCollectedfrom(listKeyValues(rs.getString("collectedfromid"), rs.getString("collectedfromname")));
 			ds.setPid(new ArrayList<>());
@ -200,7 +222,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationExecutor impl
 			final Project p = new Project();
-			p.setId(createOpenaireId(40, rs.getString("projectid")));
+			p.setId(createOpenaireId(40, rs.getString("projectid"), true));
 			p.setOriginalId(Arrays.asList(rs.getString("projectid")));
 			p.setCollectedfrom(listKeyValues(rs.getString("collectedfromid"), rs.getString("collectedfromname")));
 			p.setPid(new ArrayList<>());
@ -290,7 +312,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationExecutor impl
 			final Organization o = new Organization();
-			o.setId(createOpenaireId(20, rs.getString("organizationid")));
+			o.setId(createOpenaireId(20, rs.getString("organizationid"), true));
 			o.setOriginalId(Arrays.asList(rs.getString("organizationid")));
 			o.setCollectedfrom(listKeyValues(rs.getString("collectedfromid"), rs.getString("collectedfromname")));
 			o.setPid(new ArrayList<>());
@ -354,8 +376,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationExecutor impl
 		try {
 			final DataInfo info = prepareDataInfo(rs);
-			final String orgId = createOpenaireId(20, rs.getString("organization"));
+			final String orgId = createOpenaireId(20, rs.getString("organization"), true);
-			final String dsId = createOpenaireId(10, rs.getString("datasource"));
+			final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
 			final List<KeyValue> collectedFrom = listKeyValues(rs.getString("collectedfromid"), rs.getString("collectedfromname"));
 			final Relation r1 = new Relation();
@ -377,7 +399,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationExecutor impl
 			r2.setTarget(dsId);
 			r2.setCollectedFrom(collectedFrom);
 			r2.setDataInfo(info);
-			r1.setLastupdatetimestamp(lastUpdateTimestamp);
+			r2.setLastupdatetimestamp(lastUpdateTimestamp);
 			emitOaf(r2);
 			// rs.getString("datasource");
@ -403,8 +425,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationExecutor impl
 		try {
 			final DataInfo info = prepareDataInfo(rs);
-			final String orgId = createOpenaireId(20, rs.getString("resporganization"));
+			final String orgId = createOpenaireId(20, rs.getString("resporganization"), true);
-			final String projectId = createOpenaireId(40, rs.getString("project"));
+			final String projectId = createOpenaireId(40, rs.getString("project"), true);
 			final List<KeyValue> collectedFrom = listKeyValues(rs.getString("collectedfromid"), rs.getString("collectedfromname"));
 			final Relation r1 = new Relation();
@ -426,7 +448,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationExecutor impl
 			r2.setTarget(projectId);
 			r2.setCollectedFrom(collectedFrom);
 			r2.setDataInfo(info);
-			r1.setLastupdatetimestamp(lastUpdateTimestamp);
+			r2.setLastupdatetimestamp(lastUpdateTimestamp);
 			emitOaf(r2);
 			// rs.getString("project");
@ -450,6 +472,81 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationExecutor impl
 		}
 	}
 	public void processClaims(final ResultSet rs) {
 		final DataInfo info =
 				dataInfo(false, null, false, false, qualifier("user:claim", "user:claim", "dnet:provenanceActions", "dnet:provenanceActions"), "0.9");
 		try {
 			if (rs.getString("source_type").equals("context")) {
 				final Result r;
 				if (rs.getString("target_type").equals("dataset")) {
 					r = new Dataset();
 				} else if (rs.getString("target_type").equals("software")) {
 					r = new Software();
 				} else if (rs.getString("target_type").equals("other")) {
 					r = new OtherResearchProduct();
 				} else {
 					r = new Publication();
 				}
 				r.setId(createOpenaireId(50, rs.getString("target_id"), false));
 				r.setLastupdatetimestamp(lastUpdateTimestamp);
 				r.setContext(prepareContext(rs.getString("source_id"), info));
 				r.setDataInfo(info);
 				emitOaf(r);
 			} else {
 				final String sourceId = createOpenaireId(rs.getString("source_type"), rs.getString("source_id"), false);
 				final String targetId = createOpenaireId(rs.getString("target_type"), rs.getString("target_id"), false);
 				final Relation r1 = new Relation();
 				final Relation r2 = new Relation();
 				if (rs.getString("source_type").equals("project")) {
 					r1.setRelType("resultProject");
 					r1.setSubRelType("outcome");
 					r1.setRelClass("produces");
 					r2.setRelType("resultProject");
 					r2.setSubRelType("outcome");
 					r2.setRelClass("isProducedBy");
 				} else {
 					r1.setRelType("resultResult");
 					r1.setSubRelType("relationship");
 					r1.setRelClass("isRelatedTo");
 					r2.setRelType("resultResult");
 					r2.setSubRelType("relationship");
 					r2.setRelClass("isRelatedTo");
 				}
 				r1.setSource(sourceId);
 				r1.setTarget(targetId);
 				r1.setDataInfo(info);
 				r1.setLastupdatetimestamp(lastUpdateTimestamp);
 				emitOaf(r1);
 				r2.setSource(targetId);
 				r2.setTarget(sourceId);
 				r2.setDataInfo(info);
 				r2.setLastupdatetimestamp(lastUpdateTimestamp);
 				emitOaf(r2);
 			}
 		} catch (final Exception e) {
 			throw new RuntimeException(e);
 		}
 	}
 	private List<Context> prepareContext(final String id, final DataInfo dataInfo) {
 		final Context context = new Context();
 		context.setId(id);
 		context.setDataInfo(Arrays.asList(dataInfo));
 		return Arrays.asList(context);
 	}
 	private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException {
 		final Boolean deletedbyinference = rs.getBoolean("deletedbyinference");
 		final String inferenceprovenance = rs.getString("inferenceprovenance");
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/step1/MigrateMongoMdstoresApplication.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/step1/MigrateMongoMdstoresApplication.java
@ -0,0 +1,67 @@
 package eu.dnetlib.dhp.migration.step1;
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.Map;
 import java.util.Map.Entry;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.migration.utils.AbstractMigrationApplication;
 import eu.dnetlib.dhp.migration.utils.MdstoreClient;
 public class MigrateMongoMdstoresApplication extends AbstractMigrationApplication implements Closeable {
 	private static final Log log = LogFactory.getLog(MigrateMongoMdstoresApplication.class);
 	private final MdstoreClient mdstoreClient;
 	public static void main(final String[] args) throws Exception {
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
 				IOUtils.toString(MigrateMongoMdstoresApplication.class.getResourceAsStream("/eu/dnetlib/dhp/migration/migrate_mongo_mstores_parameters.json")));
 		parser.parseArgument(args);
 		final String mongoBaseUrl = parser.get("mongoBaseUrl");
 		final String mongoDb = parser.get("mongoDb");
 		final String mdFormat = parser.get("mdFormat");
 		final String mdLayout = parser.get("mdLayout");
 		final String mdInterpretation = parser.get("mdInterpretation");
 		final String hdfsPath = parser.get("hdfsPath");
 		try (MigrateMongoMdstoresApplication app = new MigrateMongoMdstoresApplication(hdfsPath, mongoBaseUrl, mongoDb)) {
 			app.execute(mdFormat, mdLayout, mdInterpretation);
 		}
 	}
 	public MigrateMongoMdstoresApplication(final String hdfsPath, final String mongoBaseUrl, final String mongoDb) throws Exception {
 		super(hdfsPath);
 		this.mdstoreClient = new MdstoreClient(mongoBaseUrl, mongoDb);
 	}
 	public void execute(final String format, final String layout, final String interpretation) {
 		final Map<String, String> colls = mdstoreClient.validCollections(format, layout, interpretation);
 		log.info("Found " + colls.size() + " mdstores");
 		for (final Entry<String, String> entry : colls.entrySet()) {
 			log.info("Processing mdstore " + entry.getKey() + " (collection: " + entry.getValue() + ")");
 			final String currentColl = entry.getValue();
 			for (final String xml : mdstoreClient.listRecords(currentColl)) {
 				emit(xml, "native_" + format);
 			}
 		}
 	}
 	@Override
 	public void close() throws IOException {
 		super.close();
 		mdstoreClient.close();
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/step2/AbstractMdRecordToOafMapper.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/step2/AbstractMdRecordToOafMapper.java
@ -1,20 +1,24 @@
-package eu.dnetlib.dhp.migration;
+package eu.dnetlib.dhp.migration.step2;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.createOpenaireId;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.dataInfo;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.field;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.journal;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.keyValue;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.listFields;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.oaiIProvenance;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.qualifier;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.structuredProperty;
 import java.io.IOException;
 import java.sql.SQLException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.dom4j.Document;
 import org.dom4j.DocumentException;
 import org.dom4j.DocumentFactory;
 import org.dom4j.DocumentHelper;
 import org.dom4j.Node;
@ -37,11 +41,9 @@ import eu.dnetlib.dhp.schema.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.Software;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
-public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
+public abstract class AbstractMdRecordToOafMapper {
-	protected final Map<String, String> code2name = new HashMap<>();
+	protected final Map<String, String> code2name;
 	protected final MdstoreClient mdstoreClient;
 	protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier("main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
@ -51,79 +53,36 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
 	protected static final Qualifier SOFTWARE_RESULTTYPE_QUALIFIER = qualifier("software", "software", "dnet:result_typologies", "dnet:result_typologies");
 	protected static final Qualifier OTHER_RESULTTYPE_QUALIFIER = qualifier("other", "other", "dnet:result_typologies", "dnet:result_typologies");
-	private static final Log log = LogFactory.getLog(AbstractMongoExecutor.class);
+	protected AbstractMdRecordToOafMapper(final Map<String, String> code2name) {
-
+		this.code2name = code2name;
 	public AbstractMongoExecutor(final String hdfsPath, final String hdfsNameNode, final String hdfsUser, final String mongoBaseUrl,
 			final String mongoDb, final String dbUrl, final String dbUser,
 			final String dbPassword) throws Exception {
 		super(hdfsPath, hdfsNameNode, hdfsUser);
 		this.mdstoreClient = new MdstoreClient(mongoBaseUrl, mongoDb);
 		loadClassNames(dbUrl, dbUser, dbPassword);
 		final Map<String, String> nsContext = new HashMap<>();
 		registerNamespaces(nsContext);
 		DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
 	}
-	private void loadClassNames(final String dbUrl, final String dbUser, final String dbPassword) throws IOException {
+	public List<Oaf> processMdRecord(final String xml) {
 		try {
 			final Map<String, String> nsContext = new HashMap<>();
 			nsContext.put("dr", "http://www.driver-repository.eu/namespace/dr");
 			nsContext.put("dri", "http://www.driver-repository.eu/namespace/dri");
 			nsContext.put("oaf", "http://namespace.openaire.eu/oaf");
 			nsContext.put("oai", "http://www.openarchives.org/OAI/2.0/");
 			nsContext.put("prov", "http://www.openarchives.org/OAI/2.0/provenance");
 			nsContext.put("dc", "http://purl.org/dc/elements/1.1/");
 			nsContext.put("datacite", "http://datacite.org/schema/kernel-3");
 			DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
-		log.info("Loading vocabulary terms from db...");
+			final Document doc = DocumentHelper.parseText(xml);
-		try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) {
+			final String type = doc.valueOf("//dr:CobjCategory/@type");
-			code2name.clear();
+			final KeyValue collectedFrom = keyValue(doc.valueOf("//oaf:collectedFrom/@id"), doc.valueOf("//oaf:collectedFrom/@name"));
-			dbClient.processResults("select code, name from class", rs -> {
+			final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) ? collectedFrom
-				try {
+					: keyValue(doc.valueOf("//oaf:hostedBy/@id"), doc.valueOf("//oaf:hostedBy/@name"));
-					code2name.put(rs.getString("code"), rs.getString("name"));
+
-				} catch (final SQLException e) {
+			final DataInfo info = prepareDataInfo(doc);
-					e.printStackTrace();
+			final long lastUpdateTimestamp = new Date().getTime();
-				}
+
-			});
+			return createOafs(doc, type, collectedFrom, hostedBy, info, lastUpdateTimestamp);
 		} catch (final Exception e) {
 			throw new RuntimeException(e);
 		}
 		log.info("Found " + code2name.size() + " terms.");
 	}
 	public void processMdRecords(final String mdFormat, final String mdLayout, final String mdInterpretation) throws DocumentException {
 		log.info(String.format("Searching mdstores (format: %s, layout: %s, interpretation: %s)", mdFormat, mdLayout, mdInterpretation));
 		final Map<String, String> colls = mdstoreClient.validCollections(mdFormat, mdLayout, mdInterpretation);
 		log.info("Found " + colls.size() + " mdstores");
 		for (final Entry<String, String> entry : colls.entrySet()) {
 			log.info("Processing mdstore " + entry.getKey() + " (collection: " + entry.getValue() + ")");
 			final String currentColl = entry.getValue();
 			for (final String xml : mdstoreClient.listRecords(currentColl)) {
 				final Document doc = DocumentHelper.parseText(xml);
 				final String type = doc.valueOf("//dr:CobjCategory/@type");
 				final KeyValue collectedFrom = keyValue(doc.valueOf("//oaf:collectedFrom/@id"), doc.valueOf("//oaf:collectedFrom/@name"));
 				final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) ? collectedFrom
 						: keyValue(doc.valueOf("//oaf:hostedBy/@id"), doc.valueOf("//oaf:hostedBy/@name"));
 				final DataInfo info = prepareDataInfo(doc);
 				final long lastUpdateTimestamp = new Date().getTime();
 				for (final Oaf oaf : createOafs(doc, type, collectedFrom, hostedBy, info, lastUpdateTimestamp)) {
 					emitOaf(oaf);
 				}
 			}
 		}
 		log.info("All Done.");
 	}
 	protected void registerNamespaces(final Map<String, String> nsContext) {
 		nsContext.put("dr", "http://www.driver-repository.eu/namespace/dr");
 		nsContext.put("dri", "http://www.driver-repository.eu/namespace/dri");
 		nsContext.put("oaf", "http://namespace.openaire.eu/oaf");
 		nsContext.put("oai", "http://www.openarchives.org/OAI/2.0/");
 		nsContext.put("prov", "http://www.openarchives.org/OAI/2.0/provenance");
 	}
 	protected List<Oaf> createOafs(final Document doc,
@ -194,10 +153,10 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
 		final List<Oaf> res = new ArrayList<>();
-		final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"));
+		final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false);
 		for (final Object o : doc.selectNodes("//oaf:projectid")) {
-			final String projectId = createOpenaireId(40, ((Node) o).getText());
+			final String projectId = createOpenaireId(40, ((Node) o).getText(), true);
 			final Relation r1 = new Relation();
 			r1.setRelType("resultProject");
@ -238,7 +197,7 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
 			final long lastUpdateTimestamp) {
 		r.setDataInfo(info);
 		r.setLastupdatetimestamp(lastUpdateTimestamp);
-		r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier")));
+		r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false));
 		r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier")));
 		r.setCollectedfrom(Arrays.asList(collectedFrom));
 		r.setPid(prepareListStructProps(doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info));
@ -398,6 +357,8 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
 	protected DataInfo prepareDataInfo(final Document doc) {
 		final Node n = doc.selectSingleNode("//oaf:datainfo");
 		if (n == null) { return null; }
 		final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
 		final String paClassName = n.valueOf("./oaf:provenanceaction/@classname");
 		final String paSchemeId = n.valueOf("./oaf:provenanceaction/@schemeid");
@ -430,10 +391,4 @@ public abstract class AbstractMongoExecutor extends AbstractMigrationExecutor {
 		return res;
 	}
 	@Override
 	public void close() throws IOException {
 		super.close();
 		mdstoreClient.close();
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/step2/GenerateEntitiesApplication.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/step2/GenerateEntitiesApplication.java
@ -0,0 +1,172 @@
 package eu.dnetlib.dhp.migration.step2;
 import java.io.IOException;
 import java.sql.SQLException;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.SparkSession;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication;
 import eu.dnetlib.dhp.migration.utils.DbClient;
 import eu.dnetlib.dhp.schema.oaf.Dataset;
 import eu.dnetlib.dhp.schema.oaf.Datasource;
 import eu.dnetlib.dhp.schema.oaf.Oaf;
 import eu.dnetlib.dhp.schema.oaf.Organization;
 import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
 import eu.dnetlib.dhp.schema.oaf.Project;
 import eu.dnetlib.dhp.schema.oaf.Publication;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import eu.dnetlib.dhp.schema.oaf.Software;
 import scala.Tuple2;
 public class GenerateEntitiesApplication {
 	private static final Log log = LogFactory.getLog(GenerateEntitiesApplication.class);
 	public static void main(final String[] args) throws Exception {
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
 				IOUtils.toString(MigrateMongoMdstoresApplication.class
 						.getResourceAsStream("/eu/dnetlib/dhp/migration/generate_entities_parameters.json")));
 		parser.parseArgument(args);
 		final String sourcePaths = parser.get("sourcePaths");
 		final String targetPath = parser.get("targetPath");
 		final String dbUrl = parser.get("postgresUrl");
 		final String dbUser = parser.get("postgresUser");
 		final String dbPassword = parser.get("postgresPassword");
 		final Map<String, String> code2name = loadClassNames(dbUrl, dbUser, dbPassword);
 		try (final SparkSession spark = newSparkSession(parser); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext())) {
 			final List<String> existingSourcePaths = Arrays.stream(sourcePaths.split(",")).filter(p -> exists(sc, p)).collect(Collectors.toList());
 			generateEntities(sc, code2name, existingSourcePaths, targetPath);
 		}
 	}
 	private static SparkSession newSparkSession(final ArgumentApplicationParser parser) {
 		return SparkSession
 				.builder()
 				.appName(GenerateEntitiesApplication.class.getSimpleName())
 				.master(parser.get("master"))
 				.getOrCreate();
 	}
 	private static void generateEntities(final JavaSparkContext sc,
 			final Map<String, String> code2name,
 			final List<String> sourcePaths,
 			final String targetPath) {
 		log.info("Generate entities from files:");
 		sourcePaths.forEach(log::info);
 		JavaRDD<String> inputRdd = sc.emptyRDD();
 		for (final String sp : sourcePaths) {
 			inputRdd = inputRdd.union(sc.sequenceFile(sp, Text.class, Text.class)
 					.map(k -> new Tuple2<>(k._1().toString(), k._2().toString()))
 					.map(k -> convertToListOaf(k._1(), k._2(), code2name))
 					.flatMap(list -> list.iterator())
 					.map(oaf -> oaf.getClass().getSimpleName().toLowerCase() + "|" + convertToJson(oaf)));
 		}
 		inputRdd.saveAsTextFile(targetPath);
 	}
 	private static List<Oaf> convertToListOaf(final String id, final String s, final Map<String, String> code2name) {
 		final String type = StringUtils.substringAfter(id, ":");
 		switch (type.toLowerCase()) {
 		case "native_oaf":
 			return new OafToOafMapper(code2name).processMdRecord(s);
 		case "native_odf":
 			return new OdfToOafMapper(code2name).processMdRecord(s);
 		case "datasource":
 			return Arrays.asList(convertFromJson(s, Datasource.class));
 		case "organization":
 			return Arrays.asList(convertFromJson(s, Organization.class));
 		case "project":
 			return Arrays.asList(convertFromJson(s, Project.class));
 		case "relation":
 			return Arrays.asList(convertFromJson(s, Relation.class));
 		case "publication":
 			return Arrays.asList(convertFromJson(s, Publication.class));
 		case "dataset":
 			return Arrays.asList(convertFromJson(s, Dataset.class));
 		case "software":
 			return Arrays.asList(convertFromJson(s, Software.class));
 		case "otherresearchproducts":
 		default:
 			return Arrays.asList(convertFromJson(s, OtherResearchProduct.class));
 		}
 	}
 	private static Map<String, String> loadClassNames(final String dbUrl, final String dbUser, final String dbPassword) throws IOException {
 		log.info("Loading vocabulary terms from db...");
 		final Map<String, String> map = new HashMap<>();
 		try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) {
 			dbClient.processResults("select code, name from class", rs -> {
 				try {
 					map.put(rs.getString("code"), rs.getString("name"));
 				} catch (final SQLException e) {
 					e.printStackTrace();
 				}
 			});
 		}
 		log.info("Found " + map.size() + " terms.");
 		return map;
 	}
 	private static String convertToJson(final Oaf oaf) {
 		try {
 			return new ObjectMapper().writeValueAsString(oaf);
 		} catch (final Exception e) {
 			throw new RuntimeException(e);
 		}
 	}
 	private static Oaf convertFromJson(final String s, final Class<? extends Oaf> clazz) {
 		try {
 			return new ObjectMapper().readValue(s, clazz);
 		} catch (final Exception e) {
 			log.error("Error parsing object of class: " + clazz);
 			log.error(s);
 			throw new RuntimeException(e);
 		}
 	}
 	private static boolean exists(final JavaSparkContext context, final String pathToFile) {
 		try {
 			final FileSystem hdfs = org.apache.hadoop.fs.FileSystem.get(context.hadoopConfiguration());
 			final Path path = new Path(pathToFile);
 			return hdfs.exists(path);
 		} catch (final IOException e) {
 			throw new RuntimeException(e);
 		}
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/step2/OafToOafMapper.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/step2/OafToOafMapper.java
@ -1,16 +1,17 @@
-package eu.dnetlib.dhp.migration;
+package eu.dnetlib.dhp.migration.step2;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.createOpenaireId;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.field;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.dom4j.Document;
 import org.dom4j.Node;
-import eu.dnetlib.dhp.migration.pace.PacePerson;
+import eu.dnetlib.dhp.migration.utils.PacePerson;
 import eu.dnetlib.dhp.schema.oaf.Author;
 import eu.dnetlib.dhp.schema.oaf.DataInfo;
 import eu.dnetlib.dhp.schema.oaf.Field;
@ -22,20 +23,10 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
-public class OafMigrationExecutor extends AbstractMongoExecutor {
+public class OafToOafMapper extends AbstractMdRecordToOafMapper {
-	private static final Log log = LogFactory.getLog(OafMigrationExecutor.class);
+	public OafToOafMapper(final Map<String, String> code2name) {
-
+		super(code2name);
 	public OafMigrationExecutor(final String hdfsPath, final String hdfsNameNode, final String hdfsUser, final String mongoBaseUrl, final String mongoDb,
 			final String dbUrl, final String dbUser,
 			final String dbPassword) throws Exception {
 		super(hdfsPath, hdfsNameNode, hdfsUser, mongoBaseUrl, mongoDb, dbUrl, dbUser, dbPassword);
 	}
 	@Override
 	protected void registerNamespaces(final Map<String, String> nsContext) {
 		super.registerNamespaces(nsContext);
 		nsContext.put("dc", "http://purl.org/dc/elements/1.1/");
 	}
 	@Override
@ -211,12 +202,12 @@ public class OafMigrationExecutor extends AbstractMongoExecutor {
 			final KeyValue collectedFrom,
 			final DataInfo info,
 			final long lastUpdateTimestamp) {
-		final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"));
+		final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false);
 		final List<Oaf> res = new ArrayList<>();
 		for (final Object o : doc.selectNodes("//*[local-name()='relatedDataset']")) {
-			final String otherId = createOpenaireId(50, ((Node) o).getText());
+			final String otherId = createOpenaireId(50, ((Node) o).getText(), false);
 			final Relation r1 = new Relation();
 			r1.setRelType("resultResult");
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/step2/OdfToOafMapper.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/step2/OdfToOafMapper.java
@ -1,4 +1,8 @@
-package eu.dnetlib.dhp.migration;
+package eu.dnetlib.dhp.migration.step2;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.createOpenaireId;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.field;
 import static eu.dnetlib.dhp.migration.utils.OafMapperUtils.structuredProperty;
 import java.util.ArrayList;
 import java.util.Arrays;
@ -6,8 +10,6 @@ import java.util.List;
 import java.util.Map;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.dom4j.Document;
 import org.dom4j.Node;
@ -22,38 +24,28 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
-public class OdfMigrationExecutor extends AbstractMongoExecutor {
+public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
-	private static final Log log = LogFactory.getLog(OdfMigrationExecutor.class);
+	public OdfToOafMapper(final Map<String, String> code2name) {
-
+		super(code2name);
 	public OdfMigrationExecutor(final String hdfsPath, final String hdfsNameNode, final String hdfsUser, final String mongoBaseUrl, final String mongoDb,
 			final String dbUrl, final String dbUser,
 			final String dbPassword) throws Exception {
 		super(hdfsPath, hdfsNameNode, hdfsUser, mongoBaseUrl, mongoDb, dbUrl, dbUser, dbPassword);
 	}
 	@Override
 	protected void registerNamespaces(final Map<String, String> nsContext) {
 		super.registerNamespaces(nsContext);
 		nsContext.put("dc", "http://datacite.org/schema/kernel-3");
 	}
 	@Override
 	protected List<StructuredProperty> prepareTitles(final Document doc, final DataInfo info) {
-		return prepareListStructProps(doc, "//dc:title", MAIN_TITLE_QUALIFIER, info);
+		return prepareListStructProps(doc, "//datacite:title", MAIN_TITLE_QUALIFIER, info);
 	}
 	@Override
 	protected List<Author> prepareAuthors(final Document doc, final DataInfo info) {
 		final List<Author> res = new ArrayList<>();
 		int pos = 1;
-		for (final Object o : doc.selectNodes("//dc:creator")) {
+		for (final Object o : doc.selectNodes("//datacite:creator")) {
 			final Node n = (Node) o;
 			final Author author = new Author();
-			author.setFullname(n.valueOf("./dc:creatorName"));
+			author.setFullname(n.valueOf("./datacite:creatorName"));
-			author.setName(n.valueOf("./dc:givenName"));
+			author.setName(n.valueOf("./datacite:givenName"));
-			author.setSurname(n.valueOf("./dc:familyName"));
+			author.setSurname(n.valueOf("./datacite:familyName"));
-			author.setAffiliation(prepareListFields(doc, "./dc:affiliation", info));
+			author.setAffiliation(prepareListFields(doc, "./datacite:affiliation", info));
 			author.setPid(preparePids(doc, info));
 			author.setRank(pos++);
 			res.add(author);
@ -63,7 +55,7 @@ public class OdfMigrationExecutor extends AbstractMongoExecutor {
 	private List<StructuredProperty> preparePids(final Document doc, final DataInfo info) {
 		final List<StructuredProperty> res = new ArrayList<>();
-		for (final Object o : doc.selectNodes("./dc:nameIdentifier")) {
+		for (final Object o : doc.selectNodes("./datacite:nameIdentifier")) {
 			res.add(structuredProperty(((Node) o).getText(), prepareQualifier((Node) o, "./@nameIdentifierScheme", "dnet:pid_types", "dnet:pid_types"), info));
 		}
 		return res;
@ -72,7 +64,7 @@ public class OdfMigrationExecutor extends AbstractMongoExecutor {
 	@Override
 	protected List<Instance> prepareInstances(final Document doc, final DataInfo info, final KeyValue collectedfrom, final KeyValue hostedby) {
 		final List<Instance> res = new ArrayList<>();
-		for (final Object o : doc.selectNodes("//dc:alternateIdentifier[@alternateIdentifierType='URL']")) {
+		for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
 			final Instance instance = new Instance();
 			instance.setUrl(Arrays.asList(((Node) o).getText().trim()));
 			instance.setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", "dnet:publication_resource", "dnet:publication_resource"));
@ -98,7 +90,7 @@ public class OdfMigrationExecutor extends AbstractMongoExecutor {
 	@Override
 	protected List<StructuredProperty> prepareRelevantDates(final Document doc, final DataInfo info) {
 		final List<StructuredProperty> res = new ArrayList<>();
-		for (final Object o : doc.selectNodes("//dc:date")) {
+		for (final Object o : doc.selectNodes("//datacite:date")) {
 			final String dateType = ((Node) o).valueOf("@dateType");
 			if (StringUtils.isBlank(dateType) && !dateType.equalsIgnoreCase("Accepted") && !dateType.equalsIgnoreCase("Issued")
 					&& !dateType.equalsIgnoreCase("Updated") && !dateType.equalsIgnoreCase("Available")) {
@ -115,32 +107,32 @@ public class OdfMigrationExecutor extends AbstractMongoExecutor {
 	@Override
 	protected List<Field<String>> prepareContributors(final Document doc, final DataInfo info) {
-		return prepareListFields(doc, "//dc:contributorName", info);
+		return prepareListFields(doc, "//datacite:contributorName", info);
 	}
 	@Override
 	protected List<Field<String>> prepareFormats(final Document doc, final DataInfo info) {
-		return prepareListFields(doc, "//dc:format", info);
+		return prepareListFields(doc, "//datacite:format", info);
 	}
 	@Override
 	protected Field<String> preparePublisher(final Document doc, final DataInfo info) {
-		return prepareField(doc, "//dc:publisher", info);
+		return prepareField(doc, "//datacite:publisher", info);
 	}
 	@Override
 	protected List<Field<String>> prepareDescriptions(final Document doc, final DataInfo info) {
-		return prepareListFields(doc, "//dc:description[@descriptionType='Abstract']", info);
+		return prepareListFields(doc, "//datacite:description[@descriptionType='Abstract']", info);
 	}
 	@Override
 	protected List<StructuredProperty> prepareSubjects(final Document doc, final DataInfo info) {
-		return prepareListStructProps(doc, "//dc:subject", info);
+		return prepareListStructProps(doc, "//datacite:subject", info);
 	}
 	@Override
 	protected Qualifier prepareLanguages(final Document doc) {
-		return prepareQualifier(doc, "//dc:language", "dnet:languages", "dnet:languages");
+		return prepareQualifier(doc, "//datacite:language", "dnet:languages", "dnet:languages");
 	}
 	@Override
@ -150,17 +142,17 @@ public class OdfMigrationExecutor extends AbstractMongoExecutor {
 	@Override
 	protected List<Field<String>> prepareOtherResearchProductContactGroups(final Document doc, final DataInfo info) {
-		return prepareListFields(doc, "//dc:contributor[@contributorType='ContactGroup']/dc:contributorName", info);
+		return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info);
 	}
 	@Override
 	protected List<Field<String>> prepareOtherResearchProductContactPersons(final Document doc, final DataInfo info) {
-		return prepareListFields(doc, "//dc:contributor[@contributorType='ContactPerson']/dc:contributorName", info);
+		return prepareListFields(doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info);
 	}
 	@Override
 	protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
-		return prepareQualifier(doc, "//dc:format", "dnet:programming_languages", "dnet:programming_languages");
+		return prepareQualifier(doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages");
 	}
 	@Override
@ -175,7 +167,7 @@ public class OdfMigrationExecutor extends AbstractMongoExecutor {
 	@Override
 	protected List<Field<String>> prepareSoftwareDocumentationUrls(final Document doc, final DataInfo info) {
-		return prepareListFields(doc, "//dc:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info);
+		return prepareListFields(doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info);
 	}
 	// DATASETS
@ -184,11 +176,11 @@ public class OdfMigrationExecutor extends AbstractMongoExecutor {
 	protected List<GeoLocation> prepareDatasetGeoLocations(final Document doc, final DataInfo info) {
 		final List<GeoLocation> res = new ArrayList<>();
-		for (final Object o : doc.selectNodes("//dc:geoLocation")) {
+		for (final Object o : doc.selectNodes("//datacite:geoLocation")) {
 			final GeoLocation loc = new GeoLocation();
-			loc.setBox(((Node) o).valueOf("./dc:geoLocationBox"));
+			loc.setBox(((Node) o).valueOf("./datacite:geoLocationBox"));
-			loc.setPlace(((Node) o).valueOf("./dc:geoLocationPlace"));
+			loc.setPlace(((Node) o).valueOf("./datacite:geoLocationPlace"));
-			loc.setPoint(((Node) o).valueOf("./dc:geoLocationPoint"));
+			loc.setPoint(((Node) o).valueOf("./datacite:geoLocationPoint"));
 			res.add(loc);
 		}
 		return res;
@ -201,17 +193,17 @@ public class OdfMigrationExecutor extends AbstractMongoExecutor {
 	@Override
 	protected Field<String> prepareDatasetLastMetadataUpdate(final Document doc, final DataInfo info) {
-		return prepareField(doc, "//dc:date[@dateType='Updated']", info);
+		return prepareField(doc, "//datacite:date[@dateType='Updated']", info);
 	}
 	@Override
 	protected Field<String> prepareDatasetVersion(final Document doc, final DataInfo info) {
-		return prepareField(doc, "//dc:version", info);
+		return prepareField(doc, "//datacite:version", info);
 	}
 	@Override
 	protected Field<String> prepareDatasetSize(final Document doc, final DataInfo info) {
-		return prepareField(doc, "//dc:size", info);
+		return prepareField(doc, "//datacite:size", info);
 	}
 	@Override
@ -221,18 +213,18 @@ public class OdfMigrationExecutor extends AbstractMongoExecutor {
 	@Override
 	protected Field<String> prepareDatasetStorageDate(final Document doc, final DataInfo info) {
-		return prepareField(doc, "//dc:date[@dateType='Issued']", info);
+		return prepareField(doc, "//datacite:date[@dateType='Issued']", info);
 	}
 	@Override
 	protected List<Oaf> addOtherResultRels(final Document doc, final KeyValue collectedFrom, final DataInfo info, final long lastUpdateTimestamp) {
-		final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"));
+		final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false);
 		final List<Oaf> res = new ArrayList<>();
-		for (final Object o : doc.selectNodes("//*[local-name() = 'resource']//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='OPENAIRE']")) {
+		for (final Object o : doc.selectNodes("//datacite:relatedIdentifier[@relatedIdentifierType='OPENAIRE']")) {
-			final String otherId = createOpenaireId(50, ((Node) o).getText());
+			final String otherId = createOpenaireId(50, ((Node) o).getText(), false);
 			final String type = ((Node) o).valueOf("@relationType");
 			if (type.equals("IsSupplementTo")) {
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/step3/DispatchEntitiesApplication.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/step3/DispatchEntitiesApplication.java
@ -0,0 +1,70 @@
 package eu.dnetlib.dhp.migration.step3;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.SparkSession;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication;
 import eu.dnetlib.dhp.schema.oaf.Dataset;
 import eu.dnetlib.dhp.schema.oaf.Datasource;
 import eu.dnetlib.dhp.schema.oaf.Organization;
 import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
 import eu.dnetlib.dhp.schema.oaf.Project;
 import eu.dnetlib.dhp.schema.oaf.Publication;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import eu.dnetlib.dhp.schema.oaf.Software;
 public class DispatchEntitiesApplication {
 	private static final Log log = LogFactory.getLog(DispatchEntitiesApplication.class);
 	public static void main(final String[] args) throws Exception {
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
 				IOUtils.toString(MigrateMongoMdstoresApplication.class
 						.getResourceAsStream("/eu/dnetlib/dhp/migration/dispatch_entities_parameters.json")));
 		parser.parseArgument(args);
 		try (final SparkSession spark = newSparkSession(parser); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext())) {
 			final String sourcePath = parser.get("sourcePath");
 			final String targetPath = parser.get("graphRawPath");
 			processEntity(sc, Publication.class, sourcePath, targetPath);
 			processEntity(sc, Dataset.class, sourcePath, targetPath);
 			processEntity(sc, Software.class, sourcePath, targetPath);
 			processEntity(sc, OtherResearchProduct.class, sourcePath, targetPath);
 			processEntity(sc, Datasource.class, sourcePath, targetPath);
 			processEntity(sc, Organization.class, sourcePath, targetPath);
 			processEntity(sc, Project.class, sourcePath, targetPath);
 			processEntity(sc, Relation.class, sourcePath, targetPath);
 		}
 	}
 	private static SparkSession newSparkSession(final ArgumentApplicationParser parser) {
 		return SparkSession
 				.builder()
 				.appName(DispatchEntitiesApplication.class.getSimpleName())
 				.master(parser.get("master"))
 				.getOrCreate();
 	}
 	private static void processEntity(final JavaSparkContext sc, final Class<?> clazz, final String sourcePath, final String targetPath) {
 		final String type = clazz.getSimpleName().toLowerCase();
 		log.info(String.format("Processing entities (%s) in file: %s", type, sourcePath));
 		sc.textFile(sourcePath)
 				.filter(l -> isEntityType(l, type))
 				.map(l -> StringUtils.substringAfter(l, "|"))
 				.saveAsTextFile(targetPath + "/" + type); // use repartition(XXX) ???
 	}
 	private static boolean isEntityType(final String line, final String type) {
 		return StringUtils.substringBefore(line, "|").equalsIgnoreCase(type);
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/utils/AbstractMigrationApplication.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/utils/AbstractMigrationApplication.java
@ -0,0 +1,77 @@
 package eu.dnetlib.dhp.migration.utils;
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
 import org.codehaus.jackson.map.ObjectMapper;
 import eu.dnetlib.dhp.schema.oaf.Oaf;
 public class AbstractMigrationApplication implements Closeable {
 	private final AtomicInteger counter = new AtomicInteger(0);
 	private final Text key = new Text();
 	private final Text value = new Text();
 	private final SequenceFile.Writer writer;
 	private final ObjectMapper objectMapper = new ObjectMapper();
 	private static final Log log = LogFactory.getLog(AbstractMigrationApplication.class);
 	public AbstractMigrationApplication(final String hdfsPath) throws Exception {
 		log.info(String.format("Creating SequenceFile Writer, hdfsPath=%s", hdfsPath));
 		this.writer = SequenceFile.createWriter(getConf(), SequenceFile.Writer.file(new Path(hdfsPath)), SequenceFile.Writer
 				.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class));
 	}
 	private Configuration getConf() throws IOException {
 		final Configuration conf = new Configuration();
 		/*
 		 * conf.set("fs.defaultFS", hdfsNameNode); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
 		 * conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); System.setProperty("HADOOP_USER_NAME", hdfsUser);
 		 * System.setProperty("hadoop.home.dir", "/"); FileSystem.get(URI.create(hdfsNameNode), conf);
 		 */
 		return conf;
 	}
 	protected void emit(final String s, final String type) {
 		try {
 			key.set(counter.getAndIncrement() + ":" + type);
 			value.set(s);
 			writer.append(key, value);
 		} catch (final Exception e) {
 			throw new RuntimeException(e);
 		}
 	}
 	protected void emitOaf(final Oaf oaf) {
 		try {
 			emit(objectMapper.writeValueAsString(oaf), oaf.getClass().getSimpleName().toLowerCase());
 		} catch (final Exception e) {
 			throw new RuntimeException(e);
 		}
 	}
 	public ObjectMapper getObjectMapper() {
 		return objectMapper;
 	}
 	@Override
 	public void close() throws IOException {
 		writer.hflush();
 		writer.close();
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/utils/DbClient.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/utils/DbClient.java
@ -1,4 +1,4 @@
-package eu.dnetlib.dhp.migration;
+package eu.dnetlib.dhp.migration.utils;
 import java.io.Closeable;
 import java.io.IOException;
@ -28,8 +28,8 @@ public class DbClient implements Closeable {
 					StringUtils.isNoneBlank(login, password) ? DriverManager.getConnection(address, login, password) : DriverManager.getConnection(address);
 			this.connection.setAutoCommit(false);
 		} catch (final Exception e) {
-			log.error(e.getClass().getName() + ": " + e.getMessage());
+			log.error("Connection to postgresDB failed");
-			throw new RuntimeException(e);
+			throw new RuntimeException("Connection to postgresDB failed", e);
 		}
 		log.info("Opened database successfully");
 	}
@ -44,10 +44,12 @@ public class DbClient implements Closeable {
 					consumer.accept(rs);
 				}
 			} catch (final SQLException e) {
-				throw new RuntimeException(e);
+				log.error("Error executing sql query: " + sql, e);
 				throw new RuntimeException("Error executing sql query", e);
 			}
 		} catch (final SQLException e1) {
-			throw new RuntimeException(e1);
+			log.error("Error preparing sql statement", e1);
 			throw new RuntimeException("Error preparing sql statement", e1);
 		}
 	}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/utils/MdstoreClient.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/utils/MdstoreClient.java
@ -1,4 +1,4 @@
-package eu.dnetlib.dhp.migration;
+package eu.dnetlib.dhp.migration.utils;
 import java.io.Closeable;
 import java.io.IOException;
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/AbstractMigrationExecutor.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/AbstractMigrationExecutor.java
@ -1,24 +1,12 @@
-package eu.dnetlib.dhp.migration;
+package eu.dnetlib.dhp.migration.utils;
 import java.io.Closeable;
 import java.io.IOException;
 import java.net.URI;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Objects;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
 import org.codehaus.jackson.map.ObjectMapper;
 import eu.dnetlib.dhp.schema.oaf.DataInfo;
 import eu.dnetlib.dhp.schema.oaf.ExtraInfo;
@ -26,60 +14,12 @@ import eu.dnetlib.dhp.schema.oaf.Field;
 import eu.dnetlib.dhp.schema.oaf.Journal;
 import eu.dnetlib.dhp.schema.oaf.KeyValue;
 import eu.dnetlib.dhp.schema.oaf.OAIProvenance;
 import eu.dnetlib.dhp.schema.oaf.Oaf;
 import eu.dnetlib.dhp.schema.oaf.OriginDescription;
 import eu.dnetlib.dhp.schema.oaf.Qualifier;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 import eu.dnetlib.dhp.utils.DHPUtils;
-public class AbstractMigrationExecutor implements Closeable {
+public class OafMapperUtils {
 	private final AtomicInteger counter = new AtomicInteger(0);
 	private final Text key = new Text();
 	private final Text value = new Text();
 	private final ObjectMapper objectMapper = new ObjectMapper();
 	private final SequenceFile.Writer writer;
 	private static final Log log = LogFactory.getLog(AbstractMigrationExecutor.class);
 	public AbstractMigrationExecutor(final String hdfsPath, final String hdfsNameNode, final String hdfsUser) throws Exception {
 		log.info(String.format("Creating SequenceFile Writer, hdfsPath=%s, nameNode=%s, user=%s", hdfsPath, hdfsNameNode, hdfsUser));
 		this.writer = SequenceFile.createWriter(getConf(hdfsNameNode, hdfsUser), SequenceFile.Writer.file(new Path(hdfsPath)), SequenceFile.Writer
 				.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class));
 	}
 	private Configuration getConf(final String hdfsNameNode, final String hdfsUser) throws IOException {
 		final Configuration conf = new Configuration();
 		conf.set("fs.defaultFS", hdfsNameNode);
 		conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
 		conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
 		System.setProperty("HADOOP_USER_NAME", hdfsUser);
 		System.setProperty("hadoop.home.dir", "/");
 		FileSystem.get(URI.create(hdfsNameNode), conf);
 		return conf;
 	}
 	protected void emitOaf(final Oaf oaf) {
 		try {
 			key.set(counter.getAndIncrement() + ":" + oaf.getClass().getSimpleName().toLowerCase());
 			value.set(objectMapper.writeValueAsString(oaf));
 			writer.append(key, value);
 		} catch (final Exception e) {
 			throw new RuntimeException(e);
 		}
 	}
 	@Override
 	public void close() throws IOException {
 		writer.hflush();
 		writer.close();
 	}
 	public static KeyValue keyValue(final String k, final String v) {
 		final KeyValue kv = new KeyValue();
@ -223,14 +163,33 @@ public class AbstractMigrationExecutor implements Closeable {
 		return d;
 	}
-	public static String createOpenaireId(final int prefix, final String originalId) {
+	public static String createOpenaireId(final int prefix, final String originalId, final boolean to_md5) {
-		final String nsPrefix = StringUtils.substringBefore(originalId, "::");
+		if (to_md5) {
-		final String rest = StringUtils.substringAfter(originalId, "::");
+			final String nsPrefix = StringUtils.substringBefore(originalId, "::");
-		return String.format("%s|%s::%s", prefix, nsPrefix, DHPUtils.md5(rest));
+			final String rest = StringUtils.substringAfter(originalId, "::");
 			return String.format("%s|%s::%s", prefix, nsPrefix, DHPUtils.md5(rest));
 		} else {
 			return String.format("%s|%s", prefix, originalId);
 		}
 	}
 	public static String createOpenaireId(final String type, final String originalId, final boolean to_md5) {
 		switch (type) {
 		case "datasource":
 			return createOpenaireId(10, originalId, to_md5);
 		case "organization":
 			return createOpenaireId(20, originalId, to_md5);
 		case "person":
 			return createOpenaireId(30, originalId, to_md5);
 		case "project":
 			return createOpenaireId(40, originalId, to_md5);
 		default:
 			return createOpenaireId(50, originalId, to_md5);
 		}
 	}
 	public static String asString(final Object o) {
 		return o == null ? "" : o.toString();
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/utils/PacePerson.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/migration/utils/PacePerson.java
@ -1,4 +1,4 @@
-package eu.dnetlib.dhp.migration.pace;
+package eu.dnetlib.dhp.migration.utils;
 import java.nio.charset.Charset;
 import java.text.Normalizer;
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/extract_entities_from_hdfs_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/extract_entities_from_hdfs_parameters.json
@ -2,7 +2,7 @@
 	{
 		"paramName": "s",
 		"paramLongName": "sourcePath",
-		"paramDescription": "the HDFS  source path which contains the sequential file",
+		"paramDescription": "the source path",
 		"paramRequired": true
 	},
 	{
@ -16,11 +16,5 @@
 		"paramLongName": "graphRawPath",
 		"paramDescription": "the path of the graph Raw in hdfs",
 		"paramRequired": true
 	},
 	{
 		"paramName": "e",
 		"paramLongName": "entity",
 		"paramDescription": "The entity to extract",
 		"paramRequired": true
 	}
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/generate_entities_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/generate_entities_parameters.json
@ -0,0 +1,39 @@
 [
 	{
 		"paramName": "s",
 		"paramLongName": "sourcePaths",
 		"paramDescription": "the HDFS source paths which contains the sequential file (comma separated)",
 		"paramRequired": true
 	},
 	{
 		"paramName": "mt",
 		"paramLongName": "master",
 		"paramDescription": "should be local or yarn",
 		"paramRequired": true
 	},
 	{
 		"paramName": "t",
 		"paramLongName": "targetPath",
 		"paramDescription": "the path of the target file",
 		"paramRequired": true
 	},
 	{
 		"paramName": "pgurl",
 		"paramLongName": "postgresUrl",
 		"paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb",
 		"paramRequired": true
 	},
 	{
 		"paramName": "pguser",
 		"paramLongName": "postgresUser",
 		"paramDescription": "postgres user",
 		"paramRequired": false
 	},
 	{
 		"paramName": "pgpasswd",
 		"paramLongName": "postgresPassword",
 		"paramDescription": "postgres password",
 		"paramRequired": false
 	}
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/migrate_actionsets_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/migrate_actionsets_parameters.json
@ -0,0 +1,10 @@
 [
  {"paramName":"is", "paramLongName":"isLookupUrl",      "paramDescription": "URL of the isLookUp Service",       "paramRequired": true},
  {"paramName":"sn", "paramLongName":"sourceNameNode",   "paramDescription": "nameNode of the source cluster",    "paramRequired": true},
  {"paramName":"tn", "paramLongName":"targetNameNode",   "paramDescription": "namoNode of the target cluster",    "paramRequired": true},
  {"paramName":"w",  "paramLongName":"workingDirectory", "paramDescription": "working directory",    "paramRequired": true},
  {"paramName":"nm", "paramLongName":"distcp_num_maps",  "paramDescription": "maximum number of map tasks used in the distcp process",    "paramRequired": true},
  {"paramName":"mm", "paramLongName":"distcp_memory_mb", "paramDescription": "memory for distcp action copying actionsets from remote cluster",    "paramRequired": true},
  {"paramName":"tt", "paramLongName":"distcp_task_timeout", "paramDescription": "timeout for distcp copying actions from remote cluster",   "paramRequired": true},
  {"paramName":"tr", "paramLongName":"transform_only",   "paramDescription": "activate tranform-only mode. Only apply transformation step", "paramRequired": true}
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/migrate_db_entities_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/migrate_db_entities_parameters.json
@ -6,33 +6,27 @@
 		"paramRequired": true
 	},
 	{
-		"paramName": "n",
+		"paramName": "pgurl",
 		"paramLongName": "namenode",
 		"paramDescription": "the Name Node URI",
 		"paramRequired": true
 	},
 	{
 		"paramName": "u",
 		"paramLongName": "hdfsUser",
 		"paramDescription": "the user wich create the hdfs seq file",
 		"paramRequired": true
 	},
 	{
 		"paramName": "dburl",
 		"paramLongName": "postgresUrl",
 		"paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb",
 		"paramRequired": true
 	},
 	{
-		"paramName": "dbuser",
+		"paramName": "pguser",
 		"paramLongName": "postgresUser",
 		"paramDescription": "postgres user",
 		"paramRequired": false
 	},
 	{
-		"paramName": "dbpasswd",
+		"paramName": "pgpasswd",
 		"paramLongName": "postgresPassword",
 		"paramDescription": "postgres password",
 		"paramRequired": false
 	},
 	{
 		"paramName": "a",
 		"paramLongName": "action",
 		"paramDescription": "process claims",
 		"paramRequired": false
 	}
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/migrate_mongo_mstores_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/migrate_mongo_mstores_parameters.json
@ -5,18 +5,6 @@
 		"paramDescription": "the path where storing the sequential file",
 		"paramRequired": true
 	},
 	{
 		"paramName": "n",
 		"paramLongName": "namenode",
 		"paramDescription": "the Name Node URI",
 		"paramRequired": true
 	},
 	{
 		"paramName": "u",
 		"paramLongName": "hdfsUser",
 		"paramDescription": "the user wich create the hdfs seq file",
 		"paramRequired": true
 	},
 	{
 		"paramName": "mongourl",
 		"paramLongName": "mongoBaseUrl",
@ -24,7 +12,7 @@
 		"paramRequired": true
 	},
 	{
-		"paramName": "db",
+		"paramName": "mongodb",
 		"paramLongName": "mongoDb",
 		"paramDescription": "mongo database",
 		"paramRequired": true
@ -46,23 +34,5 @@
 		"paramLongName": "mdInterpretation",
 		"paramDescription": "metadata interpretation",
 		"paramRequired": true
 	},
 	{
 		"paramName": "pgurl",
 		"paramLongName": "postgresUrl",
 		"paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb",
 		"paramRequired": true
 	},
 	{
 		"paramName": "pguser",
 		"paramLongName": "postgresUser",
 		"paramDescription": "postgres user",
 		"paramRequired": false
 	},
 	{
 		"paramName": "pgpasswd",
 		"paramLongName": "postgresPassword",
 		"paramDescription": "postgres password",
 		"paramRequired": false
 	}
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/oozie_app/workflow.xml
@ -1,282 +0,0 @@
 <workflow-app name="import Entities from aggretor to HDFS" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>workingPath</name>
            <description>the base path to store hdfs file</description>
        </property>
        <property>
            <name>graphRawPath</name>
            <description>the graph Raw base path</description>
        </property>
        <property>
            <name>postgresURL</name>
            <description>the postgres URL to access to the database</description>
        </property>
        <property>
            <name>postgresUser</name>
            <description>the user postgres</description>
        </property>
        <property>
            <name>postgresPassword</name>
            <description>the password postgres</description>
        </property>
        <property>
            <name>mongourl</name>
            <description>mongoDB url, example: mongodb://[username:password@]host[:port]</description>
        </property>
        <property>
            <name>mongoDb</name>
            <description>mongo database</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
        </property>
        <property>
            <name>sparkExecutorMemory</name>
            <description>memory for individual executor</description>
        </property>
        <property>
            <name>sparkExecutorCores</name>
            <description>number of cores used by single executor</description>
        </property>
    </parameters>
    <start to="ImportODFEntitiesFromMongoDB"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="ResetWorkingPath">
        <fs>
            <delete path='${workingPath}'/>
            <mkdir path='${workingPath}'/>
        </fs>
        <ok to="ImportEntitiesFromPostgres"/>
        <error to="Kill"/>
    </action>
    <action name="ImportEntitiesFromPostgres">
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <main-class>eu.dnetlib.dhp.migration.MigrateDbEntitiesApplication</main-class>
            <arg>-p</arg><arg>${workingPath}/db_entities</arg>
            <arg>-n</arg><arg>${nameNode}</arg>
            <arg>-u</arg><arg>${hdfsUser}</arg>
            <arg>-dburl</arg><arg>${postgresURL}</arg>
            <arg>-dbuser</arg><arg>${postgresUser}</arg>
            <arg>-dbpasswd</arg><arg>${postgresPassword}</arg>
        </java>
        <ok to="ImportODFEntitiesFromMongoDB"/>
        <error to="Kill"/>
    </action>
    <action name="ImportODFEntitiesFromMongoDB">
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <main-class>eu.dnetlib.dhp.migration.MigrateMongoMdstoresApplication</main-class>
            <arg>-p</arg><arg>${workingPath}/odf_entities</arg>
            <arg>-n</arg><arg>${nameNode}</arg>
            <arg>-u</arg><arg>${hdfsUser}</arg>
            <arg>-mongourl</arg><arg>${mongourl}</arg>
            <arg>-db</arg><arg>${mongoDb}</arg>
            <arg>-f</arg><arg>ODF</arg>
            <arg>-l</arg><arg>store</arg>
            <arg>-i</arg><arg>cleaned</arg>
            <arg>-pgurl</arg><arg>${postgresURL}</arg>
            <arg>-pguser</arg><arg>${postgresUser}</arg>
            <arg>-pgpasswd</arg><arg>${postgresPassword}</arg>
        </java>
        <ok to="ImportOAFEntitiesFromMongoDB"/>
        <error to="Kill"/>
    </action>
    <action name="ImportOAFEntitiesFromMongoDB">
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <main-class>eu.dnetlib.dhp.migration.MigrateMongoMdstoresApplication</main-class>
            <arg>-p</arg><arg>${workingPath}/oaf_entities</arg>
            <arg>-n</arg><arg>${nameNode}</arg>
            <arg>-u</arg><arg>${hdfsUser}</arg>
            <arg>-mongourl</arg><arg>${mongourl}</arg>
            <arg>-db</arg><arg>${mongoDb}</arg>
            <arg>-f</arg><arg>OAF</arg>
            <arg>-l</arg><arg>store</arg>
            <arg>-i</arg><arg>cleaned</arg>
            <arg>-pgurl</arg><arg>${postgresURL}</arg>
            <arg>-pguser</arg><arg>${postgresUser}</arg>
            <arg>-pgpasswd</arg><arg>${postgresPassword}</arg>
        </java>
        <ok to="ExtractPublication"/>
        <error to="Kill"/>
    </action>
    <action name="ExtractPublication">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>ExtractEntities: publication</name>
            <class>eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
            <arg>-mt</arg> <arg>yarn-cluster</arg>
            <arg>--sourcePath</arg><arg>${workingPath}</arg>
            <arg>-g</arg><arg>${graphRawPath}/publication</arg>
            <arg>-e</arg><arg>publication</arg>
        </spark>
        <ok to="ExtractDataset"/>
        <error to="Kill"/>
    </action>
    <action name="ExtractDataset">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>ExtractEntities: dataset</name>
            <class>eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
            <arg>-mt</arg> <arg>yarn-cluster</arg>
            <arg>--sourcePath</arg><arg>${workingPath}</arg>
            <arg>-g</arg><arg>${graphRawPath}/dataset</arg>
            <arg>-e</arg><arg>dataset</arg>
        </spark>
        <ok to="ExtractSoftware"/>
        <error to="Kill"/>
    </action>
    <action name="ExtractSoftware">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>ExtractEntities: software</name>
            <class>eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
            <arg>-mt</arg> <arg>yarn-cluster</arg>
            <arg>--sourcePath</arg><arg>${workingPath}</arg>
            <arg>-g</arg><arg>${graphRawPath}/software</arg>
            <arg>-e</arg><arg>software</arg>
        </spark>
        <ok to="ExtractORP"/>
        <error to="Kill"/>
    </action>
    <action name="ExtractORP">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>ExtractEntities: otherresearchproduct</name>
            <class>eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
            <arg>-mt</arg> <arg>yarn-cluster</arg>
            <arg>--sourcePath</arg><arg>${workingPath}</arg>
            <arg>-g</arg><arg>${graphRawPath}/otherresearchproduct</arg>
            <arg>-e</arg><arg>otherresearchproduct</arg>
        </spark>
        <ok to="ExtractDatasource"/>
        <error to="Kill"/>
    </action>
    <action name="ExtractDatasource">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>ExtractEntities: datasource</name>
            <class>eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
            <arg>-mt</arg> <arg>yarn-cluster</arg>
            <arg>--sourcePath</arg><arg>${workingPath}</arg>
            <arg>-g</arg><arg>${graphRawPath}/datasource</arg>
            <arg>-e</arg><arg>datasource</arg>
        </spark>
        <ok to="ExtractOrganization"/>
        <error to="Kill"/>
    </action>
    <action name="ExtractOrganization">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>ExtractEntities: organization</name>
            <class>eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
            <arg>-mt</arg> <arg>yarn-cluster</arg>
            <arg>--sourcePath</arg><arg>${workingPath}</arg>
            <arg>-g</arg><arg>${graphRawPath}/organization</arg>
            <arg>-e</arg><arg>organization</arg>
        </spark>
        <ok to="ExtractProject"/>
        <error to="Kill"/>
    </action>
    <action name="ExtractProject">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>ExtractEntities: project</name>
            <class>eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
            <arg>-mt</arg> <arg>yarn-cluster</arg>
            <arg>--sourcePath</arg><arg>${workingPath}</arg>
            <arg>-g</arg><arg>${graphRawPath}/project</arg>
            <arg>-e</arg><arg>project</arg>
        </spark>
        <ok to="ExtractRelation"/>
        <error to="Kill"/>
    </action>
    <action name="ExtractRelation">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>ExtractEntities: relation</name>
            <class>eu.dnetlib.dhp.migration.ExtractEntitiesFromHDFSJob</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
            <arg>-mt</arg> <arg>yarn-cluster</arg>
            <arg>--sourcePath</arg><arg>${workingPath}</arg>
            <arg>-g</arg><arg>${graphRawPath}/relation</arg>
            <arg>-e</arg><arg>relation</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/sql/queryClaims.sql
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/sql/queryClaims.sql
@ -0,0 +1 @@
 SELECT source_type, source_id, target_type, target_id, semantics FROM claim WHERE approved=TRUE;
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/sql/queryProjects_production.sql
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/sql/queryProjects_production.sql
@ -0,0 +1,90 @@
 SELECT
                p.id                                                                                                       AS projectid,
                p.code                                                                                                     AS code,
                p.websiteurl                                                                                               AS websiteurl,
                p.acronym                                                                                                  AS acronym,
                p.title                                                                                                    AS title,
                p.startdate                                                                                                AS startdate,
                p.enddate                                                                                                  AS enddate,
                p.call_identifier                                                                                          AS callidentifier,
                p.keywords                                                                                                 AS keywords,
                p.duration                                                                                                 AS duration,
                p.ec_sc39                                                                                                  AS ecsc39,
                p.oa_mandate_for_publications                                                                              AS oamandatepublications,
                p.ec_article29_3                                                                                           AS ecarticle29_3,
                p.dateofcollection                                                                                         AS dateofcollection,
                p.lastupdate                                                                                               AS dateoftransformation,
                p.inferred                                                                                                 AS inferred,
                p.deletedbyinference                                                                                       AS deletedbyinference,
                p.trust                                                                                                    AS trust,
                p.inferenceprovenance                                                                                      AS inferenceprovenance,
                p.optional1                                                                                                AS optional1,
                p.optional2                                                                                                AS optional2,
                p.jsonextrainfo                                                                                            AS jsonextrainfo,
                p.contactfullname                                                                                          AS contactfullname,
                p.contactfax                                                                                               AS contactfax,
                p.contactphone                                                                                             AS contactphone,
                p.contactemail                                                                                             AS contactemail,
                p.summary                                                                                                  AS summary,
                p.currency                                                                                                 AS currency,
                p.totalcost                                                                                                AS totalcost,
        p.fundedamount                                                                                             AS fundedamount,
                dc.id                                                                                                      AS collectedfromid,
                dc.officialname                                                                                            AS collectedfromname,
                ctc.code || '@@@' || ctc.name || '@@@' || cts.code || '@@@' || cts.name                                    AS contracttype,
                pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name                                    AS provenanceaction,
                array_agg(DISTINCT i.pid || '###' || i.issuertype)                                                         AS pid,
                array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects,
                array_agg(DISTINCT fp.path)                                                                                AS fundingtree
        FROM projects p
                LEFT OUTER JOIN class pac ON (pac.code = p.provenanceactionclass)
                LEFT OUTER JOIN scheme pas ON (pas.code = p.provenanceactionscheme)
                LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
                LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
                LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom)
                LEFT OUTER JOIN project_fundingpath pf ON (pf.project = p.id)
                LEFT OUTER JOIN fundingpaths fp ON (fp.id = pf.funding)
                LEFT OUTER JOIN project_subject ps ON (ps.project = p.id)
                LEFT OUTER JOIN subjects s ON (s.id = ps.subject)
                LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass)
                LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme)
                LEFT OUTER JOIN class ctc ON (ctc.code = p.contracttypeclass)
                LEFT OUTER JOIN scheme cts ON (cts.code = p.contracttypescheme)
        GROUP BY
                p.id,
                p.code,
                p.websiteurl,
                p.acronym,
                p.title,
                p.startdate,
                p.enddate,
                p.call_identifier,
                p.keywords,
                p.duration,
                p.ec_sc39,
                p.oa_mandate_for_publications,
                p.ec_article29_3,
                p.dateofcollection,
                p.inferred,
                p.deletedbyinference,
                p.trust,
                p.inferenceprovenance,
                p.contactfullname,
                p.contactfax,
                p.contactphone,
                p.contactemail,
                p.summary,
                p.currency,
                p.totalcost,
                p.fundedamount,
                dc.id,
                dc.officialname,
                pac.code, pac.name, pas.code, pas.name,
                ctc.code, ctc.name, cts.code, cts.name;
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/transform_actionsets_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/transform_actionsets_parameters.json
@ -0,0 +1,5 @@
 [
  {"paramName":"mt", "paramLongName":"master",        "paramDescription": "should be local or yarn",        "paramRequired": true},
  {"paramName":"is", "paramLongName":"isLookupUrl",   "paramDescription": "URL of the isLookUp Service",    "paramRequired": true},
  {"paramName":"i",  "paramLongName":"inputPaths",    "paramDescription": "URL of the isLookUp Service",    "paramRequired": true}
 ]
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/actions/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/actions/oozie_app/config-default.xml
@ -0,0 +1,30 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>sourceNN</name>
        <value>webhdfs://namenode2.hadoop.dm.openaire.eu:50071</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>spark2YarnHistoryServerAddress</name>
        <value>http://iis-cdh5-test-gw.ocean.icm.edu.pl:18088</value>
    </property>
    <property>
        <name>spark2EventLogDir</name>
        <value>/user/spark/applicationHistory</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/actions/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/actions/oozie_app/workflow.xml
@ -0,0 +1,111 @@
 <workflow-app xmlns='uri:oozie:workflow:0.5' name='migrate_actions'>
    <parameters>
        <property>
            <name>sourceNN</name>
            <description>the source name node</description>
        </property>
        <property>
            <name>isLookupUrl</name>
            <description>the isLookup service endpoint</description>
        </property>
        <property>
            <name>workingDirectory</name>
            <value>/tmp/actionsets</value>
            <description>working directory</description>
        </property>
        <property>
            <name>distcp_memory_mb</name>
            <value>6144</value>
            <description>memory for distcp copying actionsets from remote cluster</description>
        </property>
        <property>
            <name>distcp_task_timeout</name>
            <value>60000000</value>
            <description>timeout for distcp copying actions from remote cluster</description>
        </property>
        <property>
            <name>distcp_num_maps</name>
            <value>1</value>
            <description>mmaximum number of map tasks used in the distcp process</description>
        </property>
        <property>
            <name>transform_only</name>
            <description>activate tranform-only mode. Only apply transformation step</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
        </property>
        <property>
            <name>sparkExecutorMemory</name>
            <description>memory for individual executor</description>
        </property>
        <property>
            <name>sparkExecutorCores</name>
            <description>number of cores used by single executor</description>
        </property>
        <property>
            <name>spark2YarnHistoryServerAddress</name>
            <description>spark 2.* yarn history server address</description>
        </property>
        <property>
            <name>spark2EventLogDir</name>
            <description>spark 2.* event log dir location</description>
        </property>
    </parameters>
    <start to='migrate_actionsets' />
    <action name='migrate_actionsets'>
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <main-class>eu.dnetlib.dhp.migration.actions.MigrateActionSet</main-class>
            <java-opt>-Dmapred.task.timeout=${distcp_task_timeout}</java-opt>
            <arg>-is</arg><arg>${isLookupUrl}</arg>
            <arg>-sn</arg><arg>${sourceNN}</arg>
            <arg>-tn</arg><arg>${nameNode}</arg>
            <arg>-w</arg><arg>${workingDirectory}</arg>
            <arg>-nm</arg><arg>${distcp_num_maps}</arg>
            <arg>-mm</arg><arg>${distcp_memory_mb}</arg>
            <arg>-tt</arg><arg>${distcp_task_timeout}</arg>
            <arg>-tr</arg><arg>${transform_only}</arg>
            <capture-output/>
        </java>
        <ok to="transform_actions" />
        <error to="fail" />
    </action>
    <action name="transform_actions">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn</master>
            <mode>cluster</mode>
            <name>transform_actions</name>
            <class>eu.dnetlib.dhp.migration.actions.TransformActions</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-cores ${sparkExecutorCores}
                --executor-memory ${sparkExecutorMemory}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
                --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
            </spark-opts>
            <arg>-mt</arg><arg>yarn</arg>
            <arg>-is</arg><arg>${isLookupUrl}</arg>
            <arg>--inputPaths</arg><arg>${wf:actionData('migrate_actionsets')['target_paths']}</arg>
        </spark>
        <ok to="end"/>
        <error to="fail"/>
    </action>
    <kill name="fail">
        <message>migrate_actions failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <end name="end" />
 </workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/claims/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/claims/oozie_app/config-default.xml
@ -15,8 +15,4 @@
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
    <property>
        <name>hdfsUser</name>
        <value>dnet</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/claims/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/claims/oozie_app/workflow.xml
@ -0,0 +1,169 @@
 <workflow-app name="import Claims as Graph" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>migrationClaimsPathStep1</name>
            <description>the base path to store hdfs file</description>
        </property>
        <property>
            <name>migrationClaimsPathStep2</name>
            <description>the temporary path to store entities before dispatching</description>
        </property>
        <property>
            <name>migrationClaimsPathStep3</name>
            <description>the graph Raw base path</description>
        </property>
        <property>
            <name>postgresURL</name>
            <description>the postgres URL to access to the database</description>
        </property>
        <property>
            <name>postgresUser</name>
            <description>the user postgres</description>
        </property>
        <property>
            <name>postgresPassword</name>
            <description>the password postgres</description>
        </property>
        <property>
            <name>mongoURL</name>
            <description>mongoDB url, example: mongodb://[username:password@]host[:port]</description>
        </property>
        <property>
            <name>mongoDb</name>
            <description>mongo database</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
        </property>
        <property>
            <name>sparkExecutorMemory</name>
            <description>memory for individual executor</description>
        </property>
        <property>
            <name>sparkExecutorCores</name>
            <description>number of cores used by single executor</description>
        </property>
    </parameters>
 	<start to="ResetWorkingPath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="ResetWorkingPath">
        <fs>
            <delete path='${migrationClaimsPathStep1}'/>
            <mkdir path='${migrationClaimsPathStep1}'/>
        </fs>
        <ok to="ImportDBClaims"/>
        <error to="Kill"/>
    </action>
    <action name="ImportDBClaims">
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <main-class>eu.dnetlib.dhp.migration.step1.MigrateDbEntitiesApplication</main-class>
            <arg>-p</arg><arg>${migrationClaimsPathStep1}/db_claims</arg>
            <arg>-pgurl</arg><arg>${postgresURL}</arg>
            <arg>-pguser</arg><arg>${postgresUser}</arg>
            <arg>-pgpasswd</arg><arg>${postgresPassword}</arg>
            <arg>-a</arg><arg>claims</arg>
        </java>
 		<ok to="ImportODFClaims"/>
        <error to="Kill"/>
    </action>
    <action name="ImportODFClaims">
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <main-class>eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication</main-class>
            <arg>-p</arg><arg>${migrationClaimsPathStep1}/odf_claims</arg>
            <arg>-mongourl</arg><arg>${mongoURL}</arg>
            <arg>-mongodb</arg><arg>${mongoDb}</arg>
            <arg>-f</arg><arg>ODF</arg>
            <arg>-l</arg><arg>store</arg>
            <arg>-i</arg><arg>claim</arg>
        </java>
        <ok to="ImportOAFClaims"/>
        <error to="Kill"/>
    </action>
    <action name="ImportOAFClaims">
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <main-class>eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication</main-class>
            <arg>-p</arg><arg>${migrationClaimsPathStep1}/oaf_claims</arg>
            <arg>-mongourl</arg><arg>${mongoURL}</arg>
            <arg>-mongodb</arg><arg>${mongoDb}</arg>
            <arg>-f</arg><arg>OAF</arg>
            <arg>-l</arg><arg>store</arg>
            <arg>-i</arg><arg>claim</arg>
        </java>
        <ok to="ResetClaimEntities"/>
        <error to="Kill"/>
    </action>
 	<action name="ResetClaimEntities">
        <fs>
            <delete path='${migrationClaimsPathStep2}'/>
            <mkdir path='${migrationClaimsPathStep2}'/>
        </fs>
        <ok to="GenerateClaimEntities"/>
        <error to="Kill"/>
    </action>
    <action name="GenerateClaimEntities">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>GenerateClaimEntities</name>
            <class>eu.dnetlib.dhp.migration.step2.GenerateEntitiesApplication</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
            <arg>-mt</arg> <arg>yarn-cluster</arg>
            <arg>-s</arg><arg>${migrationClaimsPathStep1}/db_claims,${migrationClaimsPathStep1}/oaf_claims,${migrationClaimsPathStep1}/odf_claims</arg>
            <arg>-t</arg><arg>${migrationClaimsPathStep2}/claim_entities</arg>
            <arg>-pgurl</arg><arg>${postgresURL}</arg>
            <arg>-pguser</arg><arg>${postgresUser}</arg>
            <arg>-pgpasswd</arg><arg>${postgresPassword}</arg>
        </spark>
        <ok to="ResetClaimGraph"/>
        <error to="Kill"/>
    </action>
 	<action name="ResetClaimGraph">
        <fs>
            <delete path='${migrationClaimsPathStep3}'/>
            <mkdir path='${migrationClaimsPathStep3}'/>
        </fs>
        <ok to="GenerateClaimGraph"/>
        <error to="Kill"/>
    </action>
    <action name="GenerateClaimGraph">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>GenerateClaimGraph</name>
            <class>eu.dnetlib.dhp.migration.step3.DispatchEntitiesApplication</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
            <arg>-mt</arg> <arg>yarn-cluster</arg>
            <arg>-s</arg><arg>${migrationClaimsPathStep2}/claim_entities</arg>
            <arg>-g</arg><arg>${migrationClaimsPathStep3}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>  
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_all_steps/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_all_steps/oozie_app/config-default.xml
@ -0,0 +1,18 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_all_steps/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_all_steps/oozie_app/workflow.xml
@ -0,0 +1,168 @@
 <workflow-app name="import regular entities as Graph (all steps)" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>migrationPathStep1</name>
            <description>the base path to store hdfs file</description>
        </property>
        <property>
            <name>migrationPathStep2</name>
            <description>the temporary path to store entities before dispatching</description>
        </property>
        <property>
            <name>migrationPathStep3</name>
            <description>the graph Raw base path</description>
        </property>
        <property>
            <name>postgresURL</name>
            <description>the postgres URL to access to the database</description>
        </property>
        <property>
            <name>postgresUser</name>
            <description>the user postgres</description>
        </property>
        <property>
            <name>postgresPassword</name>
            <description>the password postgres</description>
        </property>
        <property>
            <name>mongoURL</name>
            <description>mongoDB url, example: mongodb://[username:password@]host[:port]</description>
        </property>
        <property>
            <name>mongoDb</name>
            <description>mongo database</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
        </property>
        <property>
            <name>sparkExecutorMemory</name>
            <description>memory for individual executor</description>
        </property>
        <property>
            <name>sparkExecutorCores</name>
            <description>number of cores used by single executor</description>
        </property>
    </parameters>
 	<start to="ResetWorkingPath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="ResetWorkingPath">
        <fs>
            <delete path='${migrationPathStep1}'/>
            <mkdir path='${migrationPathStep1}'/>
        </fs>
        <ok to="ImportDB"/>
        <error to="Kill"/>
    </action>
    <action name="ImportDB">
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <main-class>eu.dnetlib.dhp.migration.step1.MigrateDbEntitiesApplication</main-class>
            <arg>-p</arg><arg>${migrationPathStep1}/db_records</arg>
            <arg>-pgurl</arg><arg>${postgresURL}</arg>
            <arg>-pguser</arg><arg>${postgresUser}</arg>
            <arg>-pgpasswd</arg><arg>${postgresPassword}</arg>
        </java>
        <ok to="ImportODF"/>
        <error to="Kill"/>
    </action>
    <action name="ImportODF">
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <main-class>eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication</main-class>
            <arg>-p</arg><arg>${migrationPathStep1}/odf_records</arg>
            <arg>-mongourl</arg><arg>${mongoURL}</arg>
            <arg>-mongodb</arg><arg>${mongoDb}</arg>
            <arg>-f</arg><arg>ODF</arg>
            <arg>-l</arg><arg>store</arg>
            <arg>-i</arg><arg>cleaned</arg>
        </java>
        <ok to="ImportOAF"/>
        <error to="Kill"/>
    </action>
    <action name="ImportOAF">
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <main-class>eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication</main-class>
            <arg>-p</arg><arg>${migrationPathStep1}/oaf_records</arg>
            <arg>-mongourl</arg><arg>${mongoURL}</arg>
            <arg>-mongodb</arg><arg>${mongoDb}</arg>
            <arg>-f</arg><arg>OAF</arg>
            <arg>-l</arg><arg>store</arg>
            <arg>-i</arg><arg>cleaned</arg>
        </java>
        <ok to="ResetEntities"/>
        <error to="Kill"/>
    </action>
 	<action name="ResetEntities">
        <fs>
            <delete path='${migrationPathStep2}'/>
            <mkdir path='${migrationPathStep2}'/>
        </fs>
        <ok to="GenerateEntities"/>
        <error to="Kill"/>
    </action>
    <action name="GenerateEntities">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>GenerateEntities</name>
            <class>eu.dnetlib.dhp.migration.step2.GenerateEntitiesApplication</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
            <arg>-mt</arg> <arg>yarn-cluster</arg>
            <arg>-s</arg><arg>${migrationPathStep1}/db_records,${migrationPathStep1}/oaf_records,${migrationPathStep1}/odf_records</arg>
            <arg>-t</arg><arg>${migrationPathStep2}/all_entities</arg>
            <arg>-pgurl</arg><arg>${postgresURL}</arg>
            <arg>-pguser</arg><arg>${postgresUser}</arg>
            <arg>-pgpasswd</arg><arg>${postgresPassword}</arg>
        </spark>
        <ok to="ResetGraph"/>
        <error to="Kill"/>
    </action>
 	<action name="ResetGraph">
        <fs>
            <delete path='${migrationPathStep3}'/>
            <mkdir path='${migrationPathStep3}'/>
        </fs>
        <ok to="GenerateGraph"/>
        <error to="Kill"/>
    </action>
    <action name="GenerateGraph">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>GenerateGraph</name>
            <class>eu.dnetlib.dhp.migration.step3.DispatchEntitiesApplication</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
            <arg>-mt</arg> <arg>yarn-cluster</arg>
            <arg>-s</arg><arg>${migrationPathStep2}/all_entities</arg>
            <arg>-g</arg><arg>${migrationPathStep3}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>  
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_step1/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_step1/oozie_app/config-default.xml
@ -0,0 +1,18 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_step1/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_step1/oozie_app/workflow.xml
@ -0,0 +1,103 @@
 <workflow-app name="import regular entities as Graph (step 1)" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>migrationPathStep1</name>
            <description>the base path to store hdfs file</description>
        </property>
        <property>
            <name>postgresURL</name>
            <description>the postgres URL to access to the database</description>
        </property>
        <property>
            <name>postgresUser</name>
            <description>the user postgres</description>
        </property>
        <property>
            <name>postgresPassword</name>
            <description>the password postgres</description>
        </property>
        <property>
            <name>mongoURL</name>
            <description>mongoDB url, example: mongodb://[username:password@]host[:port]</description>
        </property>
        <property>
            <name>mongoDb</name>
            <description>mongo database</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
        </property>
        <property>
            <name>sparkExecutorMemory</name>
            <description>memory for individual executor</description>
        </property>
        <property>
            <name>sparkExecutorCores</name>
            <description>number of cores used by single executor</description>
        </property>
    </parameters>
 	<start to="ResetWorkingPath"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="ResetWorkingPath">
        <fs>
            <delete path='${migrationPathStep1}'/>
            <mkdir path='${migrationPathStep1}'/>
        </fs>
        <ok to="ImportDB"/>
        <error to="Kill"/>
    </action>
    <action name="ImportDB">
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <main-class>eu.dnetlib.dhp.migration.step1.MigrateDbEntitiesApplication</main-class>
            <arg>-p</arg><arg>${migrationPathStep1}/db_records</arg>
            <arg>-pgurl</arg><arg>${postgresURL}</arg>
            <arg>-pguser</arg><arg>${postgresUser}</arg>
            <arg>-pgpasswd</arg><arg>${postgresPassword}</arg>
        </java>
        <ok to="ImportODF"/>
        <error to="Kill"/>
    </action>
    <action name="ImportODF">
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <main-class>eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication</main-class>
            <arg>-p</arg><arg>${migrationPathStep1}/odf_records</arg>
            <arg>-mongourl</arg><arg>${mongoURL}</arg>
            <arg>-mongodb</arg><arg>${mongoDb}</arg>
            <arg>-f</arg><arg>ODF</arg>
            <arg>-l</arg><arg>store</arg>
            <arg>-i</arg><arg>cleaned</arg>
        </java>
        <ok to="ImportOAF"/>
        <error to="Kill"/>
    </action>
    <action name="ImportOAF">
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <main-class>eu.dnetlib.dhp.migration.step1.MigrateMongoMdstoresApplication</main-class>
            <arg>-p</arg><arg>${migrationPathStep1}/oaf_records</arg>
            <arg>-mongourl</arg><arg>${mongoURL}</arg>
            <arg>-mongodb</arg><arg>${mongoDb}</arg>
            <arg>-f</arg><arg>OAF</arg>
            <arg>-l</arg><arg>store</arg>
            <arg>-i</arg><arg>cleaned</arg>
        </java>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_step2/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_step2/oozie_app/config-default.xml
@ -0,0 +1,18 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_step2/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_step2/oozie_app/workflow.xml
@ -0,0 +1,74 @@
 <workflow-app name="import regular entities as Graph (step 2)" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>migrationPathStep1</name>
            <description>the base path to store hdfs file</description>
        </property>
        <property>
            <name>migrationPathStep2</name>
            <description>the temporary path to store entities before dispatching</description>
        </property>
        <property>
            <name>postgresURL</name>
            <description>the postgres URL to access to the database</description>
        </property>
        <property>
            <name>postgresUser</name>
            <description>the user postgres</description>
        </property>
        <property>
            <name>postgresPassword</name>
            <description>the password postgres</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
        </property>
        <property>
            <name>sparkExecutorMemory</name>
            <description>memory for individual executor</description>
        </property>
        <property>
            <name>sparkExecutorCores</name>
            <description>number of cores used by single executor</description>
        </property>
    </parameters>
 	<start to="ResetEntities"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
 	<action name="ResetEntities">
        <fs>
            <delete path='${migrationPathStep2}'/>
            <mkdir path='${migrationPathStep2}'/>
        </fs>
        <ok to="GenerateEntities"/>
        <error to="Kill"/>
    </action>
    <action name="GenerateEntities">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>GenerateEntities</name>
            <class>eu.dnetlib.dhp.migration.step2.GenerateEntitiesApplication</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
            <arg>-mt</arg> <arg>yarn-cluster</arg>
            <arg>-s</arg><arg>${migrationPathStep1}/db_records,${migrationPathStep1}/oaf_records,${migrationPathStep1}/odf_records</arg>
            <arg>-t</arg><arg>${migrationPathStep2}/all_entities</arg>
            <arg>-pgurl</arg><arg>${postgresURL}</arg>
            <arg>-pguser</arg><arg>${postgresUser}</arg>
            <arg>-pgpasswd</arg><arg>${postgresPassword}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_step3/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_step3/oozie_app/config-default.xml
@ -0,0 +1,18 @@
 <configuration>
    <property>
        <name>jobTracker</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>nameNode</name>
        <value>hdfs://nameservice1</value>
    </property>
    <property>
        <name>oozie.use.system.libpath</name>
        <value>true</value>
    </property>
    <property>
        <name>oozie.action.sharelib.for.spark</name>
        <value>spark2</value>
    </property>
 </configuration>
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_step3/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/migration/wfs/regular_step3/oozie_app/workflow.xml
@ -0,0 +1,60 @@
 <workflow-app name="import regular entities as Graph (step 3)" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>migrationPathStep2</name>
            <description>the temporary path to store entities before dispatching</description>
        </property>
        <property>
            <name>migrationPathStep3</name>
            <description>the graph Raw base path</description>
        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
        </property>
        <property>
            <name>sparkExecutorMemory</name>
            <description>memory for individual executor</description>
        </property>
        <property>
            <name>sparkExecutorCores</name>
            <description>number of cores used by single executor</description>
        </property>
    </parameters>
 	<start to="ResetGraph"/>
    <kill name="Kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <action name="ResetGraph">
        <fs>
            <delete path='${migrationPathStep3}'/>
            <mkdir path='${migrationPathStep3}'/>
        </fs>
        <ok to="GenerateGraph"/>
        <error to="Kill"/>
    </action>
    <action name="GenerateGraph">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <master>yarn-cluster</master>
            <mode>cluster</mode>
            <name>GenerateGraph</name>
            <class>eu.dnetlib.dhp.migration.step3.DispatchEntitiesApplication</class>
            <jar>dhp-aggregation-${projectVersion}.jar</jar>
            <spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
            <arg>-mt</arg> <arg>yarn-cluster</arg>
            <arg>-s</arg><arg>${migrationPathStep2}/all_entities</arg>
            <arg>-g</arg><arg>${migrationPathStep3}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>  
    <end name="End"/>
 </workflow-app>
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionJobTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionJobTest.java
@ -1,79 +1,87 @@
 package eu.dnetlib.dhp.collection;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
 import eu.dnetlib.dhp.model.mdstore.Provenance;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.junit.*;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
 import eu.dnetlib.dhp.model.mdstore.Provenance;
 public class CollectionJobTest {
    private Path testDir;
-    @Before
+	private Path testDir;
    public void setup() throws IOException {
        testDir = Files.createTempDirectory("dhp-collection");
    }
-    @After
+	@Before
-    public void teadDown() throws IOException {
+	public void setup() throws IOException {
-        FileUtils.deleteDirectory(testDir.toFile());
+		testDir = Files.createTempDirectory("dhp-collection");
-    }
+	}
-    @Test
+	@After
-    public void tesCollection() throws Exception {
+	public void teadDown() throws IOException {
-        Provenance provenance = new Provenance("pippo", "puppa", "ns_prefix");
+		FileUtils.deleteDirectory(testDir.toFile());
-        GenerateNativeStoreSparkJob.main(new String[] {
+	}
                "-mt", "local",
                "-w",  "wid",
                "-e",  "XML",
                "-d",  ""+System.currentTimeMillis(),
                "-p",  new ObjectMapper().writeValueAsString(provenance),
                "-x",  "./*[local-name()='record']/*[local-name()='header']/*[local-name()='identifier']",
                "-i",  this.getClass().getResource("/eu/dnetlib/dhp/collection/native.seq").toString(),
                "-o",  testDir.toString()+"/store",
                "-t",  "true",
                "-ru", "",
                "-rp", "",
                "-rh", "",
                "-ro", "",
                "-rr", ""});
        System.out.println(new ObjectMapper().writeValueAsString(provenance));
    }
 	@Test
 	public void tesCollection() throws Exception {
 		final Provenance provenance = new Provenance("pippo", "puppa", "ns_prefix");
 		GenerateNativeStoreSparkJob.main(new String[] {
 				"-mt", "local",
 				"-w", "wid",
 				"-e", "XML",
 				"-d", "" + System.currentTimeMillis(),
 				"-p", new ObjectMapper().writeValueAsString(provenance),
 				"-x", "./*[local-name()='record']/*[local-name()='header']/*[local-name()='identifier']",
 				"-i", this.getClass().getResource("/eu/dnetlib/dhp/collection/native.seq").toString(),
 				"-o", testDir.toString() + "/store",
 				"-t", "true",
 				"-ru", "",
 				"-rp", "",
 				"-rh", "",
 				"-ro", "",
 				"-rr", "" });
 		System.out.println(new ObjectMapper().writeValueAsString(provenance));
 	}
 	@Test
 	public void testGenerationMetadataRecord() throws Exception {
-    @Test
+		final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml"));
    public void testGenerationMetadataRecord() throws Exception {
-        final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml"));
+		final MetadataRecord record = GenerateNativeStoreSparkJob
 				.parseRecord(xml, "./*[local-name()='record']/*[local-name()='header']/*[local-name()='identifier']", "XML", new Provenance("foo", "bar",
 						"ns_prefix"), System.currentTimeMillis(), null, null);
-        MetadataRecord record = GenerateNativeStoreSparkJob.parseRecord(xml, "./*[local-name()='record']/*[local-name()='header']/*[local-name()='identifier']", "XML", new Provenance("foo", "bar", "ns_prefix"), System.currentTimeMillis(), null,null);
+		assert record != null;
 		System.out.println(record.getId());
 		System.out.println(record.getOriginalId());
-        assert record != null;
+	}
        System.out.println(record.getId());
        System.out.println(record.getOriginalId());
 	@Test
 	public void TestEquals() throws IOException {
-    }
+		final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml"));
 		final MetadataRecord record = GenerateNativeStoreSparkJob
 				.parseRecord(xml, "./*[local-name()='record']/*[local-name()='header']/*[local-name()='identifier']", "XML", new Provenance("foo", "bar",
 						"ns_prefix"), System.currentTimeMillis(), null, null);
 		final MetadataRecord record1 = GenerateNativeStoreSparkJob
 				.parseRecord(xml, "./*[local-name()='record']/*[local-name()='header']/*[local-name()='identifier']", "XML", new Provenance("foo", "bar",
 						"ns_prefix"), System.currentTimeMillis(), null, null);
 		assert record != null;
 		record.setBody("ciao");
 		assert record1 != null;
 		record1.setBody("mondo");
 		Assert.assertEquals(record, record1);
-
+	}
    @Test
    public void TestEquals () throws IOException {
        final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml"));
        MetadataRecord record = GenerateNativeStoreSparkJob.parseRecord(xml, "./*[local-name()='record']/*[local-name()='header']/*[local-name()='identifier']", "XML", new Provenance("foo", "bar", "ns_prefix"), System.currentTimeMillis(), null,null);
        MetadataRecord record1 = GenerateNativeStoreSparkJob.parseRecord(xml, "./*[local-name()='record']/*[local-name()='header']/*[local-name()='identifier']", "XML", new Provenance("foo", "bar", "ns_prefix"), System.currentTimeMillis(), null,null);
        assert record != null;
        record.setBody("ciao");
        assert record1 != null;
        record1.setBody("mondo");
        Assert.assertEquals(record, record1);
    }
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/GraphMappingUtils.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/GraphMappingUtils.java
@ -18,13 +18,13 @@ public class GraphMappingUtils {
 	public final static Map<String, Class> types = Maps.newHashMap();
 	static {
-			types.put("datasource", Datasource.class);
+		types.put("datasource", Datasource.class);
-			types.put("organization", Organization.class);
+		types.put("organization", Organization.class);
 		types.put("project", Project.class);
-			types.put("dataset", Dataset.class);
+		types.put("dataset", Dataset.class);
-			types.put("otherresearchproduct", OtherResearchProduct.class);
+		types.put("otherresearchproduct", OtherResearchProduct.class);
-			types.put("software", Software.class);
+		types.put("software", Software.class);
-			types.put("publication", Publication.class);
+		types.put("publication", Publication.class);
 		types.put("relation", Relation.class);
 	}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java
@ -3,7 +3,7 @@ package eu.dnetlib.dhp.graph;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.io.Text;
+import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
@ -13,31 +13,40 @@ public class SparkGraphImporterJob {
    public static void main(String[] args) throws Exception {
-        final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkGraphImporterJob.class.getResourceAsStream("/eu/dnetlib/dhp/graph/input_graph_parameters.json")));
+        final ArgumentApplicationParser parser = new ArgumentApplicationParser(
                IOUtils.toString(SparkGraphImporterJob.class.getResourceAsStream(
                        "/eu/dnetlib/dhp/graph/input_graph_parameters.json")));
        parser.parseArgument(args);
-        final SparkSession spark = SparkSession
+
        try(SparkSession spark = getSparkSession(parser)) {
            final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
            final String inputPath = parser.get("sourcePath");
            final String hiveDbName = parser.get("hive_db_name");
            spark.sql(String.format("DROP DATABASE IF EXISTS %s CASCADE", hiveDbName));
            spark.sql(String.format("CREATE DATABASE IF NOT EXISTS %s", hiveDbName));
            // Read the input file and convert it into RDD of serializable object
            GraphMappingUtils.types.forEach((name, clazz) -> spark.createDataset(sc.textFile(inputPath + "/" + name)
                    .map(s -> new ObjectMapper().readValue(s, clazz))
                    .rdd(), Encoders.bean(clazz))
                    .write()
                    .mode(SaveMode.Overwrite)
                    .saveAsTable(hiveDbName + "." + name));
        }
    }
    private static SparkSession getSparkSession(ArgumentApplicationParser parser) {
        SparkConf conf = new SparkConf();
        conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
        return SparkSession
                .builder()
                .appName(SparkGraphImporterJob.class.getSimpleName())
                .master(parser.get("master"))
-                .config("hive.metastore.uris", parser.get("hive_metastore_uris"))
+                .config(conf)
                .enableHiveSupport()
                .getOrCreate();
        final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
        final String inputPath = parser.get("sourcePath");
        final String hiveDbName = parser.get("hive_db_name");
        spark.sql(String.format("CREATE DATABASE IF NOT EXISTS %s", hiveDbName));
        // Read the input file and convert it into RDD of serializable object
        GraphMappingUtils.types.forEach((name, clazz) -> {
            spark.createDataset(sc.sequenceFile(inputPath + "/" + name, Text.class, Text.class)
                    .map(s -> new ObjectMapper().readValue(s._2().toString(), clazz))
                    .rdd(), Encoders.bean(clazz))
                .write()
                .mode(SaveMode.Overwrite)
                .saveAsTable(hiveDbName + "." + name);
        });
    }
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/hive/postprocessing.sql
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/hive/postprocessing.sql
@ -0,0 +1,8 @@
 CREATE view result as
    select id, dateofcollection, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hive_db_name}.publication p
    union all
    select id, dateofcollection, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hive_db_name}.dataset d
    union all
    select id, dateofcollection, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hive_db_name}.software s
    union all
    select id, dateofcollection, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hive_db_name}.otherresearchproduct o;
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/graph/oozie_app/workflow.xml
@ -37,12 +37,30 @@
            <name>MapGraphIntoDataFrame</name>
            <class>eu.dnetlib.dhp.graph.SparkGraphImporterJob</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
-            <spark-opts>--executor-memory ${sparkExecutorMemory} --executor-cores ${sparkExecutorCores} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse"</spark-opts>
+            <spark-opts>
                --executor-memory ${sparkExecutorMemory}
                --executor-cores ${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
                --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
                --conf spark.sql.warehouse.dir="/user/hive/warehouse"
            </spark-opts>
            <arg>-mt</arg> <arg>yarn-cluster</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--hive_db_name</arg><arg>${hive_db_name}</arg>
            <arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
        </spark>
        <ok to="PostProcessing"/>
        <error to="Kill"/>
    </action>
    <action name="PostProcessing">
        <hive xmlns="uri:oozie:hive-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <script>/eu/dnetlib/dhp/graph/hive/postprocessing.sql</script>
            <param>hive_db_name=${hive_db_name}</param>
        </hive>
        <ok to="End"/>
        <error to="Kill"/>
    </action>
--- a/dhp-workflows/dhp-graph-provision/job-override.properties
+++ b/dhp-workflows/dhp-graph-provision/job-override.properties
@ -1,10 +1,12 @@
-sparkDriverMemory=8G
+sparkDriverMemory=10G
-sparkExecutorMemory=8G
+sparkExecutorMemory=15G
 #isLookupUrl=http://services.openaire.eu:8280/is/services/isLookUp
 isLookupUrl=http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl
 sourcePath=/tmp/db_openaireplus_services.export_dhp.2020.02.03
 outputPath=/tmp/openaire_provision
 format=TMF
 batchSize=2000
 sparkExecutorCoresForJoining=128
 sparkExecutorCoresForIndexing=64
-reuseRecords=true
+reuseRecords=false
 otherDsTypeId=scholarcomminfra, infospace, pubsrepository::mock, entityregistry, entityregistry::projects, entityregistry::repositories, websource
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/graph/GraphJoiner.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/graph/GraphJoiner.java
@ -1,31 +1,32 @@
 package eu.dnetlib.dhp.graph;
-import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.databind.DeserializationFeature;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import com.jayway.jsonpath.DocumentContext;
 import com.jayway.jsonpath.JsonPath;
 import eu.dnetlib.dhp.graph.model.*;
 import eu.dnetlib.dhp.graph.utils.ContextMapper;
 import eu.dnetlib.dhp.graph.utils.GraphMappingUtils;
 import eu.dnetlib.dhp.graph.utils.RelationPartitioner;
 import eu.dnetlib.dhp.graph.utils.XmlRecordFactory;
 import eu.dnetlib.dhp.schema.oaf.*;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.spark.SparkContext;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.PairFunction;
 import org.apache.spark.sql.SparkSession;
 import org.apache.spark.util.LongAccumulator;
 import scala.Tuple2;
 import java.io.IOException;
 import java.io.Serializable;
-import java.util.HashSet;
+import java.util.Map;
 import java.util.List;
 import java.util.stream.Collectors;
 import static eu.dnetlib.dhp.graph.utils.GraphMappingUtils.asRelatedEntity;
@ -45,10 +46,12 @@ import static eu.dnetlib.dhp.graph.utils.GraphMappingUtils.asRelatedEntity;
 * 3) we only need a subset of fields from the related entities, so we introduce a distinction between E_source = S
 *      and E_target = T. Objects in T are heavily pruned by all the unnecessary information
 *
- * 4) perform the join as (((T join R) union S) groupby S.id) yield S -> [ <T, R> ]
+ * 4) perform the join as (((T.id join R.target) union S) groupby S.id) yield S -> [ <T, R> ]
 */
 public class GraphJoiner implements Serializable {
    private Map<String, LongAccumulator> accumulators = Maps.newHashMap();
    public static final int MAX_RELS = 100;
    public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd";
@ -61,24 +64,30 @@ public class GraphJoiner implements Serializable {
    private String outPath;
-    public GraphJoiner(SparkSession spark, ContextMapper contextMapper, String inputPath, String outPath) {
+    private String otherDsTypeId;
    public GraphJoiner(SparkSession spark, ContextMapper contextMapper, String otherDsTypeId, String inputPath, String outPath) {
        this.spark = spark;
        this.contextMapper = contextMapper;
        this.otherDsTypeId = otherDsTypeId;
        this.inputPath = inputPath;
        this.outPath = outPath;
        final SparkContext sc = spark.sparkContext();
        prepareAccumulators(sc);
    }
    public GraphJoiner adjacencyLists() {
-        final JavaSparkContext sc = new JavaSparkContext(getSpark().sparkContext());
+        final JavaSparkContext jsc = new JavaSparkContext(getSpark().sparkContext());
        // read each entity
-        JavaPairRDD<String, TypedRow> datasource = readPathEntity(sc, getInputPath(), "datasource");
+        JavaPairRDD<String, TypedRow> datasource = readPathEntity(jsc, getInputPath(), "datasource");
-        JavaPairRDD<String, TypedRow> organization = readPathEntity(sc, getInputPath(), "organization");
+        JavaPairRDD<String, TypedRow> organization = readPathEntity(jsc, getInputPath(), "organization");
-        JavaPairRDD<String, TypedRow> project = readPathEntity(sc, getInputPath(), "project");
+        JavaPairRDD<String, TypedRow> project = readPathEntity(jsc, getInputPath(), "project");
-        JavaPairRDD<String, TypedRow> dataset = readPathEntity(sc, getInputPath(), "dataset");
+        JavaPairRDD<String, TypedRow> dataset = readPathEntity(jsc, getInputPath(), "dataset");
-        JavaPairRDD<String, TypedRow> otherresearchproduct = readPathEntity(sc, getInputPath(), "otherresearchproduct");
+        JavaPairRDD<String, TypedRow> otherresearchproduct = readPathEntity(jsc, getInputPath(), "otherresearchproduct");
-        JavaPairRDD<String, TypedRow> software = readPathEntity(sc, getInputPath(), "software");
+        JavaPairRDD<String, TypedRow> software = readPathEntity(jsc, getInputPath(), "software");
-        JavaPairRDD<String, TypedRow> publication = readPathEntity(sc, getInputPath(), "publication");
+        JavaPairRDD<String, TypedRow> publication = readPathEntity(jsc, getInputPath(), "publication");
        // create the union between all the entities
        final String entitiesPath = getOutPath() + "/entities";
@ -93,31 +102,43 @@ public class GraphJoiner implements Serializable {
                .map(GraphMappingUtils::serialize)
                .saveAsTextFile(entitiesPath, GzipCodec.class);
-        JavaPairRDD<String, EntityRelEntity> entities = sc.textFile(entitiesPath)
+        JavaPairRDD<String, EntityRelEntity> entities = jsc.textFile(entitiesPath)
                .map(t -> new ObjectMapper().readValue(t, EntityRelEntity.class))
                .mapToPair(t -> new Tuple2<>(t.getSource().getSourceId(), t));
        final String relationPath = getOutPath() + "/relation";
        // reads the relationships
-        final JavaPairRDD<String, EntityRelEntity> relation = readPathRelation(sc, getInputPath())
+        final JavaPairRDD<SortableRelationKey, EntityRelEntity> rels = readPathRelation(jsc, getInputPath())
-                .filter(r -> !r.getDeleted()) //only consider those that are not virtually deleted
+                .filter(rel -> !rel.getDeleted()) //only consider those that are not virtually deleted
                .map(p -> new EntityRelEntity().setRelation(p))
-                .mapToPair(p -> new Tuple2<>(p.getRelation().getSourceId(), p))
+                .mapToPair(p -> new Tuple2<>(SortableRelationKey.from(p), p));
-                .groupByKey()
+        rels
                .groupByKey(new RelationPartitioner(rels.getNumPartitions()))
                .map(p -> Iterables.limit(p._2(), MAX_RELS))
                .flatMap(p -> p.iterator())
                .map(s -> new ObjectMapper().writeValueAsString(s))
                .saveAsTextFile(relationPath, GzipCodec.class);
        final JavaPairRDD<String, EntityRelEntity> relation = jsc.textFile(relationPath)
                .map(s -> new ObjectMapper().readValue(s, EntityRelEntity.class))
                .mapToPair(p -> new Tuple2<>(p.getRelation().getTargetId(), p));
-        //final String bySource = getOutPath() + "/1_join_by_target";
+        final String bySourcePath = getOutPath() + "/join_by_source";
-        JavaPairRDD<String, EntityRelEntity> bySource = relation
+        relation
                .join(entities
                        .filter(e -> !e._2().getSource().getDeleted())
                        .mapToPair(e -> new Tuple2<>(e._1(), asRelatedEntity(e._2()))))
                .map(s -> new EntityRelEntity()
                        .setRelation(s._2()._1().getRelation())
                        .setTarget(s._2()._2().getSource()))
                .map(j -> new ObjectMapper().writeValueAsString(j))
                .saveAsTextFile(bySourcePath, GzipCodec.class);
        JavaPairRDD<String, EntityRelEntity> bySource = jsc.textFile(bySourcePath)
                .map(e -> getObjectMapper().readValue(e, EntityRelEntity.class))
                .mapToPair(t -> new Tuple2<>(t.getRelation().getSourceId(), t));
-        final XmlRecordFactory recordFactory = new XmlRecordFactory(contextMapper, false, schemaLocation, new HashSet<>());
+        final XmlRecordFactory recordFactory = new XmlRecordFactory(accumulators, contextMapper, false, schemaLocation, otherDsTypeId);
        entities
                .union(bySource)
                .groupByKey()   // by source id
@ -130,20 +151,6 @@ public class GraphJoiner implements Serializable {
        return this;
    }
    public GraphJoiner asXML() {
        final JavaSparkContext sc = new JavaSparkContext(getSpark().sparkContext());
        final XmlRecordFactory recordFactory = new XmlRecordFactory(contextMapper, true, "", new HashSet<>());
        final ObjectMapper mapper = new ObjectMapper();
        final String joinedEntitiesPath = getOutPath() + "/1_joined_entities";
        sc.textFile(joinedEntitiesPath)
                .map(s -> mapper.readValue(s, JoinedEntity.class))
                .mapToPair(je -> new Tuple2<>(new Text(je.getEntity().getId()), new Text(recordFactory.build(je))))
                .saveAsHadoopFile(getOutPath() + "/2_xml", Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
        return this;
    }
    public SparkSession getSpark() {
        return spark;
    }
@ -158,24 +165,23 @@ public class GraphJoiner implements Serializable {
    // HELPERS
-    private OafEntity parseOaf(final String json, final String type) {
+    private OafEntity parseOaf(final String json, final String type, final ObjectMapper mapper) {
        final ObjectMapper o = new ObjectMapper();
        try {
            switch (GraphMappingUtils.EntityType.valueOf(type)) {
                case publication:
-                    return o.readValue(json, Publication.class);
+                    return mapper.readValue(json, Publication.class);
                case dataset:
-                    return o.readValue(json, Dataset.class);
+                    return mapper.readValue(json, Dataset.class);
                case otherresearchproduct:
-                    return o.readValue(json, OtherResearchProduct.class);
+                    return mapper.readValue(json, OtherResearchProduct.class);
                case software:
-                    return o.readValue(json, Software.class);
+                    return mapper.readValue(json, Software.class);
                case datasource:
-                    return o.readValue(json, Datasource.class);
+                    return mapper.readValue(json, Datasource.class);
                case organization:
-                    return o.readValue(json, Organization.class);
+                    return mapper.readValue(json, Organization.class);
                case project:
-                    return o.readValue(json, Project.class);
+                    return mapper.readValue(json, Project.class);
                default:
                    throw new IllegalArgumentException("invalid type: " + type);
            }
@ -185,26 +191,26 @@ public class GraphJoiner implements Serializable {
    }
    private JoinedEntity toJoinedEntity(Tuple2<String, Iterable<EntityRelEntity>> p) {
-        final ObjectMapper o = new ObjectMapper();
+        final ObjectMapper mapper = getObjectMapper();
        final JoinedEntity j = new JoinedEntity();
-        final Links links2 = new Links();
+        final Links links = new Links();
        for(EntityRelEntity rel : p._2()) {
            if (rel.hasMainEntity() & j.getEntity() == null) {
                j.setType(rel.getSource().getType());
-                j.setEntity(parseOaf(rel.getSource().getOaf(), rel.getSource().getType()));
+                j.setEntity(parseOaf(rel.getSource().getOaf(), rel.getSource().getType(), mapper));
            }
            if (rel.hasRelatedEntity()) {
                try {
-                    links2.add(
+                    links.add(
                            new eu.dnetlib.dhp.graph.model.Tuple2()
-                                    .setRelation(o.readValue(rel.getRelation().getOaf(), Relation.class))
+                                    .setRelation(mapper.readValue(rel.getRelation().getOaf(), Relation.class))
-                                    .setRelatedEntity(o.readValue(rel.getTarget().getOaf(), RelatedEntity.class)));
+                                    .setRelatedEntity(mapper.readValue(rel.getTarget().getOaf(), RelatedEntity.class)));
                } catch (IOException e) {
                    throw new IllegalArgumentException(e);
                }
            }
        }
-        j.setLinks(links2);
+        j.setLinks(links);
        if (j.getEntity() == null) {
            throw new IllegalStateException("missing main entity on '" + p._1() + "'");
        }
@ -250,8 +256,38 @@ public class GraphJoiner implements Serializable {
                            .setTargetId(json.read("$.target"))
                            .setDeleted(json.read("$.dataInfo.deletedbyinference"))
                            .setType("relation")
                            .setRelType("$.relType")
                            .setSubRelType("$.subRelType")
                            .setRelClass("$.relClass")
                            .setOaf(s);
                });
    }
    private ObjectMapper getObjectMapper() {
        return new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
    }
    private void prepareAccumulators(SparkContext sc) {
        accumulators.put("resultResult_similarity_isAmongTopNSimilarDocuments", sc.longAccumulator("resultResult_similarity_isAmongTopNSimilarDocuments"));
        accumulators.put("resultResult_similarity_hasAmongTopNSimilarDocuments", sc.longAccumulator("resultResult_similarity_hasAmongTopNSimilarDocuments"));
        accumulators.put("resultResult_supplement_isSupplementTo", sc.longAccumulator("resultResult_supplement_isSupplementTo"));
        accumulators.put("resultResult_supplement_isSupplementedBy", sc.longAccumulator("resultResult_supplement_isSupplementedBy"));
        accumulators.put("resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn"));
        accumulators.put("resultResult_dedup_merges", sc.longAccumulator("resultResult_dedup_merges"));
        accumulators.put("resultResult_publicationDataset_isRelatedTo", sc.longAccumulator("resultResult_publicationDataset_isRelatedTo"));
        accumulators.put("resultResult_relationship_isRelatedTo", sc.longAccumulator("resultResult_relationship_isRelatedTo"));
        accumulators.put("resultProject_outcome_isProducedBy", sc.longAccumulator("resultProject_outcome_isProducedBy"));
        accumulators.put("resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces"));
        accumulators.put("resultOrganization_affiliation_isAuthorInstitutionOf", sc.longAccumulator("resultOrganization_affiliation_isAuthorInstitutionOf"));
        accumulators.put("resultOrganization_affiliation_hasAuthorInstitution", sc.longAccumulator("resultOrganization_affiliation_hasAuthorInstitution"));
        accumulators.put("projectOrganization_participation_hasParticipant", sc.longAccumulator("projectOrganization_participation_hasParticipant"));
        accumulators.put("projectOrganization_participation_isParticipant", sc.longAccumulator("projectOrganization_participation_isParticipant"));
        accumulators.put("organizationOrganization_dedup_isMergedIn", sc.longAccumulator("organizationOrganization_dedup_isMergedIn"));
        accumulators.put("organizationOrganization_dedup_merges", sc.longAccumulator("resultProject_outcome_produces"));
        accumulators.put("datasourceOrganization_provision_isProvidedBy", sc.longAccumulator("datasourceOrganization_provision_isProvidedBy"));
        accumulators.put("datasourceOrganization_provision_provides", sc.longAccumulator("datasourceOrganization_provision_provides"));
    }
 }
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/graph/SparkXmlRecordBuilderJob.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/graph/SparkXmlRecordBuilderJob.java
@ -24,6 +24,7 @@ public class SparkXmlRecordBuilderJob {
            final String inputPath = parser.get("sourcePath");
            final String outputPath = parser.get("outputPath");
            final String isLookupUrl = parser.get("isLookupUrl");
            final String otherDsTypeId = parser.get("otherDsTypeId");
            final FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration());
            if (fs.exists(new Path(outputPath))) {
@ -31,8 +32,9 @@ public class SparkXmlRecordBuilderJob {
                fs.mkdirs(new Path(outputPath));
            }
-            new GraphJoiner(spark, ContextMapper.fromIS(isLookupUrl), inputPath, outputPath)
+            new GraphJoiner(spark, ContextMapper.fromIS(isLookupUrl), otherDsTypeId, inputPath, outputPath)
                    .adjacencyLists();
                    //.asXML();
        }
    }
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/graph/model/SortableRelationKey.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/graph/model/SortableRelationKey.java
@ -0,0 +1,99 @@
 package eu.dnetlib.dhp.graph.model;
 import com.google.common.collect.ComparisonChain;
 import com.google.common.collect.Maps;
 import java.io.Serializable;
 import java.util.Map;
 /**
 * Allows to sort relationships according to the priority defined in weights map.
 */
 public class SortableRelationKey implements Comparable<SortableRelationKey>, Serializable {
    private String sourceId;
    private String targetId;
    private String relType;
    private String subRelType;
    private String relClass;
    private final static Map<String, Integer> weights = Maps.newHashMap();
    static {
        weights.put("outcome", 0);
        weights.put("supplement", 1);
        weights.put("publicationDataset", 2);
        weights.put("relationship", 3);
        weights.put("similarity", 4);
        weights.put("affiliation", 5);
        weights.put("provision", 6);
        weights.put("participation", 7);
        weights.put("dedup", 8);
    }
    public static SortableRelationKey from(final EntityRelEntity e) {
        return new SortableRelationKey()
                .setSourceId(e.getRelation().getSourceId())
                .setTargetId(e.getRelation().getTargetId())
                .setRelType(e.getRelation().getRelType())
                .setSubRelType(e.getRelation().getSubRelType())
                .setRelClass(e.getRelation().getRelClass());
    }
    public String getSourceId() {
        return sourceId;
    }
    public SortableRelationKey setSourceId(String sourceId) {
        this.sourceId = sourceId;
        return this;
    }
    public String getTargetId() {
        return targetId;
    }
    public SortableRelationKey setTargetId(String targetId) {
        this.targetId = targetId;
        return this;
    }
    public String getRelType() {
        return relType;
    }
    public SortableRelationKey setRelType(String relType) {
        this.relType = relType;
        return this;
    }
    public String getSubRelType() {
        return subRelType;
    }
    public SortableRelationKey setSubRelType(String subRelType) {
        this.subRelType = subRelType;
        return this;
    }
    public String getRelClass() {
        return relClass;
    }
    public SortableRelationKey setRelClass(String relClass) {
        this.relClass = relClass;
        return this;
    }
    @Override
    public int compareTo(SortableRelationKey o) {
        return ComparisonChain.start()
                .compare(weights.get(getSubRelType()), weights.get(o.getSubRelType()))
                .compare(getSourceId(), o.getSourceId())
                .compare(getTargetId(), o.getTargetId())
                .result();
    }
 }
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/graph/model/TypedRow.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/graph/model/TypedRow.java
@ -12,6 +12,10 @@ public class TypedRow implements Serializable {
    private String type;
    private String relType;
    private String subRelType;
    private String relClass;
    private String oaf;
    public String getSourceId() {
@ -50,6 +54,33 @@ public class TypedRow implements Serializable {
        return this;
    }
    public String getRelType() {
        return relType;
    }
    public TypedRow setRelType(String relType) {
        this.relType = relType;
        return this;
    }
    public String getSubRelType() {
        return subRelType;
    }
    public TypedRow setSubRelType(String subRelType) {
        this.subRelType = subRelType;
        return this;
    }
    public String getRelClass() {
        return relClass;
    }
    public TypedRow setRelClass(String relClass) {
        this.relClass = relClass;
        return this;
    }
    public String getOaf() {
        return oaf;
    }
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/graph/utils/GraphMappingUtils.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/graph/utils/GraphMappingUtils.java
@ -26,6 +26,8 @@ import static org.apache.commons.lang3.StringUtils.*;
 public class GraphMappingUtils {
    public static final String SEPARATOR = "_";
    public enum EntityType {
        publication, dataset, otherresearchproduct, software, datasource, organization, project
    }
@ -38,34 +40,6 @@ public class GraphMappingUtils {
    public static Set<String> instanceFieldFilter = Sets.newHashSet("instancetype", "hostedby", "license", "accessright", "collectedfrom", "dateofacceptance", "distributionlocation");
    private static BiMap<String, String> relClassMapping = HashBiMap.create();
    static {
        relClassMapping.put("isAuthorInstitutionOf", "hasAuthorInstitution");
        relClassMapping.put("isMergedIn", "merges");
        relClassMapping.put("isProducedBy", "produces");
        relClassMapping.put("hasParticipant", "isParticipant");
        relClassMapping.put("isProvidedBy", "provides");
        relClassMapping.put("isRelatedTo", "isRelatedTo");
        relClassMapping.put("isAmongTopNSimilarDocuments", "hasAmongTopNSimilarDocuments");
        relClassMapping.put("isRelatedTo", "isRelatedTo");
        relClassMapping.put("isSupplementTo", "isSupplementedBy");
    }
    public static String getInverseRelClass(final String relClass) {
        String res = relClassMapping.get(relClass);
        if (isNotBlank(res)) {
            return res;
        }
        res = relClassMapping.inverse().get(relClass);
        if (isNotBlank(res)) {
            return res;
        }
        throw new IllegalArgumentException("unable to find an inverse relationship class for term: " + relClass);
    }
    private static final String schemeTemplate = "dnet:%s_%s_relations";
    private static Map<EntityType, MainEntityType> entityMapping = Maps.newHashMap();
@ -158,7 +132,7 @@ public class GraphMappingUtils {
                re.setLegalname(j.read("$.legalname.value"));
                re.setLegalshortname(j.read("$.legalshortname.value"));
                re.setCountry(asQualifier(j.read("$.country")));
-
+                re.setWebsiteurl(j.read("$.websiteurl.value"));
                break;
            case project:
                re.setProjectTitle(j.read("$.title.value"));
@ -250,5 +224,8 @@ public class GraphMappingUtils {
        return s;
    }
    public static String getRelDescriptor(String relType, String subRelType, String relClass) {
        return relType + SEPARATOR + subRelType + SEPARATOR + relClass;
    }
 }
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/graph/utils/RelationPartitioner.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/graph/utils/RelationPartitioner.java
@ -0,0 +1,29 @@
 package eu.dnetlib.dhp.graph.utils;
 import eu.dnetlib.dhp.graph.model.SortableRelationKey;
 import org.apache.spark.Partitioner;
 import org.apache.spark.util.Utils;
 /**
 * Used in combination with SortableRelationKey, allows to partition the records by source id, therefore
 * allowing to sort relations sharing the same source id by the ordering defined in SortableRelationKey.
 */
 public class RelationPartitioner extends Partitioner {
    private int numPartitions;
    public RelationPartitioner(int numPartitions) {
        this.numPartitions = numPartitions;
    }
    @Override
    public int numPartitions() {
        return numPartitions;
    }
    @Override
    public int getPartition(Object key) {
        return Utils.nonNegativeMod(((SortableRelationKey) key).getSourceId().hashCode(), numPartitions());
    }
 }
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/graph/utils/XmlRecordFactory.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/graph/utils/XmlRecordFactory.java
@ -3,6 +3,7 @@ package eu.dnetlib.dhp.graph.utils;
 import com.google.common.base.Joiner;
 import com.google.common.base.Splitter;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 import com.mycila.xmltool.XMLDoc;
 import com.mycila.xmltool.XMLTag;
@ -11,6 +12,8 @@ import eu.dnetlib.dhp.graph.model.RelatedEntity;
 import eu.dnetlib.dhp.graph.model.Tuple2;
 import eu.dnetlib.dhp.schema.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.*;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.spark.util.LongAccumulator;
 import org.dom4j.Document;
 import org.dom4j.DocumentException;
 import org.dom4j.Element;
@ -27,6 +30,7 @@ import java.io.Serializable;
 import java.io.StringReader;
 import java.io.StringWriter;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
@ -37,37 +41,49 @@ import static org.apache.commons.lang3.StringUtils.substringBefore;
 public class XmlRecordFactory implements Serializable {
    private  Map<String, LongAccumulator> accumulators;
    private Set<String> specialDatasourceTypes;
    private ContextMapper contextMapper;
    private String schemaLocation;
    private Set<String> contextes = Sets.newHashSet();
    private boolean indent = false;
    public XmlRecordFactory(
            final ContextMapper contextMapper, final boolean indent,
-            final String schemaLocation, final Set<String> otherDatasourceTypesUForUI) {
+            final String schemaLocation, final String otherDatasourceTypesUForUI) {
        this(Maps.newHashMap(), contextMapper, indent, schemaLocation, otherDatasourceTypesUForUI);
    }
    public XmlRecordFactory(
            final  Map<String, LongAccumulator> accumulators,
            final ContextMapper contextMapper, final boolean indent,
            final String schemaLocation, final String otherDatasourceTypesUForUI) {
        this.accumulators = accumulators;
        this.contextMapper = contextMapper;
        this.schemaLocation = schemaLocation;
-        this.specialDatasourceTypes = otherDatasourceTypesUForUI;
+        this.specialDatasourceTypes = Sets.newHashSet(Splitter.on(",").trimResults().split(otherDatasourceTypesUForUI));
        this.indent = indent;
    }
    public String build(final JoinedEntity je) {
        final Set<String> contexts = Sets.newHashSet();
        final OafEntity entity = je.getEntity();
        TemplateFactory templateFactory = new TemplateFactory();
        try {
-            final List<String> metadata = metadata(je.getType(), entity);
+            final List<String> metadata = metadata(je.getType(), entity, contexts);
            // rels has to be processed before the contexts because they enrich the contextMap with the funding info.
-            final List<String> relations = listRelations(je, templateFactory);
+            final List<String> relations = listRelations(je, templateFactory, contexts);
-            metadata.addAll(buildContexts(getMainType(je.getType())));
+            metadata.addAll(buildContexts(getMainType(je.getType()), contexts));
            metadata.add(parseDataInfo(entity.getDataInfo()));
            final String body = templateFactory.buildBody(
@ -97,10 +113,11 @@ public class XmlRecordFactory implements Serializable {
        }
    }
-    private List<String> metadata(final String type, final OafEntity entity) {
+    private List<String> metadata(final String type, final OafEntity entity, final Set<String> contexts) {
        final List<String> metadata = Lists.newArrayList();
        if (entity.getCollectedfrom() != null) {
            metadata.addAll(entity.getCollectedfrom()
                .stream()
@ -123,6 +140,17 @@ public class XmlRecordFactory implements Serializable {
        if (GraphMappingUtils.isResult(type)) {
            final Result r = (Result) entity;
            if (r.getContext() != null) {
                contexts.addAll(r.getContext()
                        .stream()
                        .map(c -> c.getId())
                        .collect(Collectors.toList()));
                /* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
                if (contexts.contains("dh-ch::subcommunity::2")) {
                    contexts.add("clarin");
                }
            }
            if (r.getTitle() != null) {
                metadata.addAll(r.getTitle()
                    .stream()
@ -235,16 +263,6 @@ public class XmlRecordFactory implements Serializable {
            }
            metadata.add(mapQualifier("bestaccessright", getBestAccessright(r)));
            if (r.getContext() != null) {
                contextes.addAll(r.getContext()
                    .stream()
                    .map(c -> c.getId())
                    .collect(Collectors.toList()));
                if (contextes.contains("dh-ch::subcommunity::2")) {
                    contextes.add("clarin");
                }
            }
        }
        switch (EntityType.valueOf(type)) {
@ -445,7 +463,7 @@ public class XmlRecordFactory implements Serializable {
                if (ds.getSubjects() != null) {
                    metadata.addAll(ds.getSubjects()
                            .stream()
-                            .map(sp -> mapStructuredProperty("subject", sp))
+                            .map(sp -> mapStructuredProperty("subjects", sp))
                            .collect(Collectors.toList()));
                }
@ -580,7 +598,7 @@ public class XmlRecordFactory implements Serializable {
                if (p.getFundingtree() != null) {
                    metadata.addAll(p.getFundingtree()
                            .stream()
-                            .map(ft -> asXmlElement("fundingtree", ft.getValue()))
+                            .map(ft -> ft.getValue())
                            .collect(Collectors.toList()));
                }
@ -618,7 +636,7 @@ public class XmlRecordFactory implements Serializable {
        return bestAccessRight;
    }
-    private List<String> listRelations(final JoinedEntity je, TemplateFactory templateFactory) {
+    private List<String> listRelations(final JoinedEntity je, TemplateFactory templateFactory, final Set<String> contexts) {
        final List<String> rels = Lists.newArrayList();
        for (final Tuple2 link : je.getLinks()) {
@ -699,7 +717,7 @@ public class XmlRecordFactory implements Serializable {
                    if (re.getFundingtree() != null) {
                        metadata.addAll(re.getFundingtree()
                                .stream()
-                                .peek(ft -> fillContextMap(ft))
+                                .peek(ft -> fillContextMap(ft, contexts))
                                .map(ft -> getRelFundingTree(ft))
                                .collect(Collectors.toList()));
                    }
@ -709,13 +727,23 @@ public class XmlRecordFactory implements Serializable {
            }
            final DataInfo info = rel.getDataInfo();
            final String scheme = getScheme(re.getType(), targetType);
            if (StringUtils.isBlank(scheme)) {
                throw new IllegalArgumentException(String.format("missing scheme for: <%s - %s>", re.getType(), targetType));
            }
            final String accumulatorName = getRelDescriptor(rel.getRelType(), rel.getSubRelType(), rel.getRelClass());
            if (accumulators.containsKey(accumulatorName)) {
                accumulators.get(accumulatorName).add(1);
            }
            rels.add(templateFactory.getRel(
                    targetType,
                    rel.getTarget(),
                    Sets.newHashSet(metadata),
-                    getInverseRelClass(rel.getRelClass()),
+                    rel.getRelClass(),
-                    getScheme(targetType, re.getType()),
+                    scheme,
                    info));
        }
        return rels;
@ -807,14 +835,14 @@ public class XmlRecordFactory implements Serializable {
                .collect(Collectors.toList()) : Lists.newArrayList();
    }
-    private List<String> buildContexts(final String type) {
+    private List<String> buildContexts(final String type, final Set<String> contexts) {
        final List<String> res = Lists.newArrayList();
        if ((contextMapper != null) && !contextMapper.isEmpty() && MainEntityType.result.toString().equals(type)) {
            XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
-            for (final String context : contextes) {
+            for (final String context : contexts) {
                String id = "";
                for (final String token : Splitter.on("::").split(context)) {
@ -882,7 +910,7 @@ public class XmlRecordFactory implements Serializable {
        return buffer.toString();
    }
-    private void fillContextMap(final String xmlTree) {
+    private void fillContextMap(final String xmlTree, final Set<String> contexts) {
        Document fundingPath;
        try {
@ -896,7 +924,7 @@ public class XmlRecordFactory implements Serializable {
            if (funder != null) {
                final String funderShortName = funder.valueOf("./shortname");
-                contextes.add(funderShortName);
+                contexts.add(funderShortName);
                contextMapper.put(funderShortName, new ContextDef(funderShortName, funder.valueOf("./name"), "context", "funding"));
                final Node level0 = fundingPath.selectSingleNode("//funding_level_0");
@ -905,17 +933,17 @@ public class XmlRecordFactory implements Serializable {
                    contextMapper.put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", ""));
                    final Node level1 = fundingPath.selectSingleNode("//funding_level_1");
                    if (level1 == null) {
-                        contextes.add(level0Id);
+                        contexts.add(level0Id);
                    } else {
                        final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name"));
                        contextMapper.put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", ""));
                        final Node level2 = fundingPath.selectSingleNode("//funding_level_2");
                        if (level2 == null) {
-                            contextes.add(level1Id);
+                            contexts.add(level1Id);
                        } else {
                            final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name"));
                            contextMapper.put(level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", ""));
-                            contextes.add(level2Id);
+                            contexts.add(level2Id);
                        }
                    }
                }
@ -928,7 +956,7 @@ public class XmlRecordFactory implements Serializable {
    @SuppressWarnings("unchecked")
-    private String getRelFundingTree(final String xmlTree) {
+    protected static String getRelFundingTree(final String xmlTree) {
        String funding = "<funding>";
        try {
            final Document ftree = new SAXReader().read(new StringReader(xmlTree));
@ -949,11 +977,11 @@ public class XmlRecordFactory implements Serializable {
        return funding;
    }
-    private String getFunderElement(final Document ftree) {
+    private static String getFunderElement(final Document ftree) {
-        final String funderId = ftree.valueOf("//fundingtree/funder/id/text()");
+        final String funderId = ftree.valueOf("//fundingtree/funder/id");
-        final String funderShortName = ftree.valueOf("//fundingtree/funder/shortname/text()");
+        final String funderShortName = ftree.valueOf("//fundingtree/funder/shortname");
-        final String funderName = ftree.valueOf("//fundingtree/funder/name/text()");
+        final String funderName = ftree.valueOf("//fundingtree/funder/name");
-        final String funderJurisdiction = ftree.valueOf("//fundingtree/funder/jurisdiction/text()");
+        final String funderJurisdiction = ftree.valueOf("//fundingtree/funder/jurisdiction");
        return "<funder id=\"" + escapeXml(funderId) + "\" shortname=\"" + escapeXml(funderShortName) + "\" name=\"" + escapeXml(funderName)
                + "\" jurisdiction=\"" + escapeXml(funderJurisdiction) + "\" />";
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/graph/input_params_build_adjacency_lists.json
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/graph/input_params_build_adjacency_lists.json
@ -1,6 +1,7 @@
 [
-  {"paramName":"mt",  "paramLongName":"master",             "paramDescription": "should be local or yarn",                                  "paramRequired": true},
+  {"paramName":"mt",  "paramLongName":"master",          "paramDescription": "should be local or yarn",                                     "paramRequired": true},
-  {"paramName":"is",  "paramLongName":"isLookupUrl",        "paramDescription": "URL of the isLookUp Service",                              "paramRequired": true},
+  {"paramName":"is",  "paramLongName":"isLookupUrl",     "paramDescription": "URL of the isLookUp Service",                                 "paramRequired": true},
-  {"paramName":"o",   "paramLongName":"outputPath",         "paramDescription": "the path used to store temporary output files",            "paramRequired": true},
+  {"paramName":"o",   "paramLongName":"outputPath",      "paramDescription": "the path used to store temporary output files",               "paramRequired": true},
-  {"paramName":"s",   "paramLongName":"sourcePath",         "paramDescription": "the path of the sequence file to read",                    "paramRequired": true}
+  {"paramName":"s",   "paramLongName":"sourcePath",      "paramDescription": "the path of the sequence file to read",                       "paramRequired": true},
  {"paramName":"t",   "paramLongName":"otherDsTypeId",   "paramDescription": "list of datasource types to populate field datasourcetypeui", "paramRequired": true}
 ]
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/graph/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/graph/oozie_app/workflow.xml
@ -50,9 +50,10 @@
            <class>eu.dnetlib.dhp.graph.SparkXmlRecordBuilderJob</class>
            <jar>dhp-graph-provision-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-memory ${sparkExecutorMemory}
+                --executor-cores ${sparkExecutorCoresForJoining}
-                --executor-cores ${sparkExecutorCores}
+                --executor-memory ${sparkExecutorMemoryForJoining}
-                --driver-memory=${sparkDriverMemory}
+                --driver-memory=${sparkDriverMemoryForJoining}
                --conf spark.dynamicAllocation.maxExecutors=${sparkExecutorCoresForJoining}
                --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
                --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
@ -60,6 +61,7 @@
            </spark-opts>
            <arg>-mt</arg> <arg>yarn</arg>
            <arg>-is</arg> <arg>${isLookupUrl}</arg>
            <arg>-t</arg> <arg>${otherDsTypeId}</arg>
            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
            <arg>--outputPath</arg><arg>${outputPath}</arg>
        </spark>
@ -77,8 +79,9 @@
            <class>eu.dnetlib.dhp.graph.SparkXmlIndexingJob</class>
            <jar>dhp-graph-provision-${projectVersion}.jar</jar>
            <spark-opts>
-                --executor-memory ${sparkExecutorMemory}
+                --executor-cores ${sparkExecutorCoresForIndexing}
-                --driver-memory=${sparkDriverMemory}
+                --executor-memory ${sparkExecutorMemoryForIndexing}
                --driver-memory=${sparkDriverMemoryForIndexing}
                --conf spark.dynamicAllocation.maxExecutors=${sparkExecutorCoresForIndexing}
                --conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
                --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/graph/GraphJoinerTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/graph/GraphJoinerTest.java
@ -0,0 +1,38 @@
 package eu.dnetlib.dhp.graph;
 import org.junit.Before;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 public class GraphJoinerTest {
    private ClassLoader cl = getClass().getClassLoader();
    private Path workingDir;
    private Path inputDir;
    private Path outputDir;
    @Before
    public void before() throws IOException {
        workingDir = Files.createTempDirectory("promote_action_set");
        inputDir = workingDir.resolve("input");
        outputDir = workingDir.resolve("output");
    }
    private static void copyFiles(Path source, Path target) throws IOException {
        Files.list(source).forEach(f -> {
            try {
                if (Files.isDirectory(f)) {
                    Path subTarget = Files.createDirectories(target.resolve(f.getFileName()));
                    copyFiles(f, subTarget);
                } else {
                    Files.copy(f, target.resolve(f.getFileName()));
                }
            } catch (IOException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            }
        });
    }
 }
--- a/pom.xml
+++ b/pom.xml
@ -76,7 +76,7 @@
 		<dependency>
 			<groupId>junit</groupId>
 			<artifactId>junit</artifactId>
-			<version>4.12</version>
+			<version>${junit.version}</version>
 			<scope>test</scope>
 		</dependency>
@ -110,6 +110,12 @@
 				<version>${dhp.hadoop.version}</version>
 				<scope>provided</scope>
 			</dependency>
 			<dependency>
 				<groupId>org.apache.hadoop</groupId>
 				<artifactId>hadoop-distcp</artifactId>
 				<version>${dhp.hadoop.version}</version>
 				<scope>provided</scope>
 			</dependency>
 			<dependency>
 				<groupId>org.apache.spark</groupId>
 				<artifactId>spark-core_2.11</artifactId>
@ -262,6 +268,16 @@
 				<scope>provided</scope>
 			</dependency>
 			<dependency>
 				<groupId>eu.dnetlib</groupId>
 				<artifactId>dnet-actionmanager-common</artifactId>
 				<version>6.0.5</version>
 			</dependency>
 			<dependency>
 				<groupId>eu.dnetlib</groupId>
 				<artifactId>dnet-openaire-data-protos</artifactId>
 				<version>3.9.8-proto250</version>
 			</dependency>
 			<dependency>
 				<groupId>eu.dnetlib</groupId>
 				<artifactId>dnet-pace-core</artifactId>
@ -481,6 +497,7 @@
 		<dhp.jackson.version>2.9.6</dhp.jackson.version>
 		<dhp.commons.lang.version>3.5</dhp.commons.lang.version>
 		<scala.version>2.11.12</scala.version>
 		<junit.version>4.12</junit.version>
 		<mongodb.driver.version>3.4.2</mongodb.driver.version>
 	</properties>
 </project>
		`@ -0,0 +1 @@`
							`SELECT source_type, source_id, target_type, target_id, semantics FROM claim WHERE approved=TRUE;`