refactoring

2020-04-27 10:57:50 +02:00 · 2020-04-27 10:57:50 +02:00 · 6135096ef1
parent d30e710165
commit 6135096ef1
3 changed files with 517 additions and 260 deletions
--- a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java
+++ b/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java
@ -1,10 +1,13 @@
 package eu.dnetlib.dhp.countrypropagation;

-import com.google.gson.Gson;
-import eu.dnetlib.dhp.schema.common.ModelSupport;
+import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.schema.oaf.Country;
-import eu.dnetlib.dhp.schema.oaf.Datasource;
 import eu.dnetlib.dhp.schema.oaf.Software;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
 import org.apache.commons.io.FileUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaRDD;
@ -16,18 +19,8 @@ import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import scala.Array;
 import scala.Tuple2;

-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.stream.Collectors;
-
 public class CountryPropagationJobTest {
    private static final Logger log = LoggerFactory.getLogger(CountryPropagationJobTest.class);

@ -54,13 +47,11 @@ public class CountryPropagationJobTest {
        conf.set("spark.sql.warehouse.dir", workingDir.toString());
        conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());

-        spark = SparkSession
-                .builder()
-                .appName(CountryPropagationJobTest.class.getSimpleName())
-                .config(conf)
-                .getOrCreate();
-
-
+        spark =
+                SparkSession.builder()
+                        .appName(CountryPropagationJobTest.class.getSimpleName())
+                        .config(conf)
+                        .getOrCreate();
    }

    @AfterAll
@ -69,95 +60,190 @@ public class CountryPropagationJobTest {
        spark.stop();
    }

-   @Test
+    @Test
    public void testCountryPropagationSoftware() throws Exception {
-        SparkCountryPropagationJob2.main(new String[]{
-                "-isSparkSessionManaged", Boolean.FALSE.toString(),
-                "-sourcePath", getClass().getResource("/eu/dnetlib/dhp/countrypropagation/sample/software").getPath(),
-                "-hive_metastore_uris", "",
-            "-writeUpdate","false",
-            "-saveGraph","true",
-            "-resultTableName","eu.dnetlib.dhp.schema.oaf.Software",
-            "-outputPath",workingDir.toString() + "/software",
-            "-preparedInfoPath", getClass().getResource("/eu/dnetlib/dhp/countrypropagation/preparedInfo").getPath(),
-        });
+        SparkCountryPropagationJob2.main(
+                new String[] {
+                    "-isSparkSessionManaged",
+                    Boolean.FALSE.toString(),
+                    "-sourcePath",
+                    getClass()
+                            .getResource("/eu/dnetlib/dhp/countrypropagation/sample/software")
+                            .getPath(),
+                    "-hive_metastore_uris",
+                    "",
+                    "-saveGraph",
+                    "true",
+                    "-resultTableName",
+                    "eu.dnetlib.dhp.schema.oaf.Software",
+                    "-outputPath",
+                    workingDir.toString() + "/software",
+                    "-preparedInfoPath",
+                    getClass()
+                            .getResource("/eu/dnetlib/dhp/countrypropagation/preparedInfo")
+                            .getPath(),
+                });

        final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());

-        JavaRDD<Software> tmp = sc.textFile(workingDir.toString()+"/software")
-                .map(item -> OBJECT_MAPPER.readValue(item, Software.class));
+        JavaRDD<Software> tmp =
+                sc.textFile(workingDir.toString() + "/software")
+                        .map(item -> OBJECT_MAPPER.readValue(item, Software.class));

-        //tmp.map(s -> new Gson().toJson(s)).foreach(s -> System.out.println(s));
+        // tmp.map(s -> new Gson().toJson(s)).foreach(s -> System.out.println(s));

        Assertions.assertEquals(10, tmp.count());

-       Dataset<Software> verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Software.class));
+        Dataset<Software> verificationDs =
+                spark.createDataset(tmp.rdd(), Encoders.bean(Software.class));

-       Assertions.assertEquals(6, verificationDs.filter("size(country) > 0").count());
-       Assertions.assertEquals(3, verificationDs.filter("size(country) = 1").count());
-       Assertions.assertEquals(3, verificationDs.filter("size(country) = 2").count());
-       Assertions.assertEquals(0, verificationDs.filter("size(country) > 2").count());
+        Assertions.assertEquals(6, verificationDs.filter("size(country) > 0").count());
+        Assertions.assertEquals(3, verificationDs.filter("size(country) = 1").count());
+        Assertions.assertEquals(3, verificationDs.filter("size(country) = 2").count());
+        Assertions.assertEquals(0, verificationDs.filter("size(country) > 2").count());

+        Dataset<String> countryExploded =
+                verificationDs
+                        .flatMap(row -> row.getCountry().iterator(), Encoders.bean(Country.class))
+                        .map(c -> c.getClassid(), Encoders.STRING());

-       Dataset<String> countryExploded = verificationDs
-               .flatMap(row -> row.getCountry().iterator(), Encoders.bean(Country.class))
-               .map(c -> c.getClassid(), Encoders.STRING());
+        Assertions.assertEquals(9, countryExploded.count());

-       Assertions.assertEquals(9, countryExploded.count());
+        Assertions.assertEquals(1, countryExploded.filter("value = 'FR'").count());
+        Assertions.assertEquals(1, countryExploded.filter("value = 'TR'").count());
+        Assertions.assertEquals(2, countryExploded.filter("value = 'IT'").count());
+        Assertions.assertEquals(1, countryExploded.filter("value = 'US'").count());
+        Assertions.assertEquals(1, countryExploded.filter("value = 'MX'").count());
+        Assertions.assertEquals(1, countryExploded.filter("value = 'CH'").count());
+        Assertions.assertEquals(2, countryExploded.filter("value = 'JP'").count());

-       Assertions.assertEquals(1, countryExploded.filter("value = 'FR'").count());
-       Assertions.assertEquals(1, countryExploded.filter("value = 'TR'").count());
-       Assertions.assertEquals(2, countryExploded.filter("value = 'IT'").count());
-       Assertions.assertEquals(1, countryExploded.filter("value = 'US'").count());
-       Assertions.assertEquals(1, countryExploded.filter("value = 'MX'").count());
-       Assertions.assertEquals(1, countryExploded.filter("value = 'CH'").count());
-       Assertions.assertEquals(2, countryExploded.filter("value = 'JP'").count());
+        Dataset<Tuple2<String, String>> countryExplodedWithCountryclassid =
+                verificationDs.flatMap(
+                        row -> {
+                            List<Tuple2<String, String>> prova = new ArrayList();
+                            List<Country> country_list = row.getCountry();
+                            country_list.stream()
+                                    .forEach(
+                                            c ->
+                                                    prova.add(
+                                                            new Tuple2<>(
+                                                                    row.getId(), c.getClassid())));
+                            return prova.iterator();
+                        },
+                        Encoders.tuple(Encoders.STRING(), Encoders.STRING()));

-       Dataset<Tuple2<String, String>> countryExplodedWithCountryclassid = verificationDs
-               .flatMap(row -> {
-                   List<Tuple2<String, String>> prova = new ArrayList();
-                   List<Country> country_list = row.getCountry();
-                   country_list.stream().forEach(c -> prova.add(new Tuple2<>(row.getId(), c.getClassid())));
-                   return prova.iterator();
-               }, Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
+        Assertions.assertEquals(9, countryExplodedWithCountryclassid.count());

-       Assertions.assertEquals(9, countryExplodedWithCountryclassid.count());
+        countryExplodedWithCountryclassid.show(false);
+        Assertions.assertEquals(
+                1,
+                countryExplodedWithCountryclassid
+                        .filter(
+                                "_1 = '50|od______1582::6e7a9b21a2feef45673890432af34244' and _2 = 'FR' ")
+                        .count());
+        Assertions.assertEquals(
+                1,
+                countryExplodedWithCountryclassid
+                        .filter(
+                                "_1 = '50|dedup_wf_001::40ea2f24181f6ae77b866ebcbffba523' and _2 = 'TR' ")
+                        .count());
+        Assertions.assertEquals(
+                2,
+                countryExplodedWithCountryclassid
+                        .filter(
+                                "_1 = '50|od______1106::2b7ca9726230be8e862be224fd463ac4' and (_2 = 'IT' or _2 = 'MX') ")
+                        .count());
+        Assertions.assertEquals(
+                2,
+                countryExplodedWithCountryclassid
+                        .filter(
+                                "_1 = '50|od_______935::46a0ad9964171c3dd13373f5427b9a1c' and (_2 = 'IT' or _2 = 'US') ")
+                        .count());
+        Assertions.assertEquals(
+                1,
+                countryExplodedWithCountryclassid
+                        .filter(
+                                "_1 = '50|dedup_wf_001::b67bc915603fc01e445f2b5888ba7218' and _2 = 'JP'")
+                        .count());
+        Assertions.assertEquals(
+                2,
+                countryExplodedWithCountryclassid
+                        .filter(
+                                "_1 = '50|od_______109::f375befa62a741e9250e55bcfa88f9a6' and (_2 = 'CH' or _2 = 'JP') ")
+                        .count());

-       countryExplodedWithCountryclassid.show(false);
-       Assertions.assertEquals(1, countryExplodedWithCountryclassid.filter("_1 = '50|od______1582::6e7a9b21a2feef45673890432af34244' and _2 = 'FR' ").count());
-       Assertions.assertEquals(1, countryExplodedWithCountryclassid.filter("_1 = '50|dedup_wf_001::40ea2f24181f6ae77b866ebcbffba523' and _2 = 'TR' ").count());
-       Assertions.assertEquals(2, countryExplodedWithCountryclassid.filter("_1 = '50|od______1106::2b7ca9726230be8e862be224fd463ac4' and (_2 = 'IT' or _2 = 'MX') ").count());
-       Assertions.assertEquals(2, countryExplodedWithCountryclassid.filter("_1 = '50|od_______935::46a0ad9964171c3dd13373f5427b9a1c' and (_2 = 'IT' or _2 = 'US') ").count());
-       Assertions.assertEquals(1, countryExplodedWithCountryclassid.filter("_1 = '50|dedup_wf_001::b67bc915603fc01e445f2b5888ba7218' and _2 = 'JP'").count());
-       Assertions.assertEquals(2, countryExplodedWithCountryclassid.filter("_1 = '50|od_______109::f375befa62a741e9250e55bcfa88f9a6' and (_2 = 'CH' or _2 = 'JP') ").count());
+        Dataset<Tuple2<String, String>> countryExplodedWithCountryclassname =
+                verificationDs.flatMap(
+                        row -> {
+                            List<Tuple2<String, String>> prova = new ArrayList();
+                            List<Country> country_list = row.getCountry();
+                            country_list.stream()
+                                    .forEach(
+                                            c ->
+                                                    prova.add(
+                                                            new Tuple2<>(
+                                                                    row.getId(),
+                                                                    c.getClassname())));
+                            return prova.iterator();
+                        },
+                        Encoders.tuple(Encoders.STRING(), Encoders.STRING()));

-       Dataset<Tuple2<String, String>> countryExplodedWithCountryclassname = verificationDs
-               .flatMap(row -> {
-                   List<Tuple2<String, String>> prova = new ArrayList();
-                   List<Country> country_list = row.getCountry();
-                   country_list.stream().forEach(c -> prova.add(new Tuple2<>(row.getId(), c.getClassname())));
-                   return prova.iterator();
-               }, Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
+        countryExplodedWithCountryclassname.show(false);
+        Assertions.assertEquals(
+                1,
+                countryExplodedWithCountryclassname
+                        .filter(
+                                "_1 = '50|od______1582::6e7a9b21a2feef45673890432af34244' and _2 = 'France' ")
+                        .count());
+        Assertions.assertEquals(
+                1,
+                countryExplodedWithCountryclassname
+                        .filter(
+                                "_1 = '50|dedup_wf_001::40ea2f24181f6ae77b866ebcbffba523' and _2 = 'Turkey' ")
+                        .count());
+        Assertions.assertEquals(
+                2,
+                countryExplodedWithCountryclassname
+                        .filter(
+                                "_1 = '50|od______1106::2b7ca9726230be8e862be224fd463ac4' and (_2 = 'Italy' or _2 = 'Mexico') ")
+                        .count());
+        Assertions.assertEquals(
+                2,
+                countryExplodedWithCountryclassname
+                        .filter(
+                                "_1 = '50|od_______935::46a0ad9964171c3dd13373f5427b9a1c' and (_2 = 'Italy' or _2 = 'United States') ")
+                        .count());
+        Assertions.assertEquals(
+                1,
+                countryExplodedWithCountryclassname
+                        .filter(
+                                "_1 = '50|dedup_wf_001::b67bc915603fc01e445f2b5888ba7218' and _2 = 'Japan' ")
+                        .count());
+        Assertions.assertEquals(
+                2,
+                countryExplodedWithCountryclassname
+                        .filter(
+                                "_1 = '50|od_______109::f375befa62a741e9250e55bcfa88f9a6' and (_2 = 'Switzerland' or _2 = 'Japan') ")
+                        .count());

-       countryExplodedWithCountryclassname.show(false);
-       Assertions.assertEquals(1, countryExplodedWithCountryclassname.filter("_1 = '50|od______1582::6e7a9b21a2feef45673890432af34244' and _2 = 'France' ").count());
-       Assertions.assertEquals(1, countryExplodedWithCountryclassname.filter("_1 = '50|dedup_wf_001::40ea2f24181f6ae77b866ebcbffba523' and _2 = 'Turkey' ").count());
-       Assertions.assertEquals(2, countryExplodedWithCountryclassname.filter("_1 = '50|od______1106::2b7ca9726230be8e862be224fd463ac4' and (_2 = 'Italy' or _2 = 'Mexico') ").count());
-       Assertions.assertEquals(2, countryExplodedWithCountryclassname.filter("_1 = '50|od_______935::46a0ad9964171c3dd13373f5427b9a1c' and (_2 = 'Italy' or _2 = 'United States') ").count());
-       Assertions.assertEquals(1, countryExplodedWithCountryclassname.filter("_1 = '50|dedup_wf_001::b67bc915603fc01e445f2b5888ba7218' and _2 = 'Japan' ").count());
-       Assertions.assertEquals(2, countryExplodedWithCountryclassname.filter("_1 = '50|od_______109::f375befa62a741e9250e55bcfa88f9a6' and (_2 = 'Switzerland' or _2 = 'Japan') ").count());
-
-       Dataset<Tuple2<String, String>> countryExplodedWithCountryProvenance = verificationDs
-               .flatMap(row -> {
-                   List<Tuple2<String, String>> prova = new ArrayList();
-                   List<Country> country_list = row.getCountry();
-                   country_list.stream().forEach(c -> prova.add(new Tuple2<>(row.getId(), c.getDataInfo().getInferenceprovenance())));
-                   return prova.iterator();
-               }, Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
-
-       Assertions.assertEquals(7, countryExplodedWithCountryProvenance.filter("_2 = 'propagation'").count());
-   }
+        Dataset<Tuple2<String, String>> countryExplodedWithCountryProvenance =
+                verificationDs.flatMap(
+                        row -> {
+                            List<Tuple2<String, String>> prova = new ArrayList();
+                            List<Country> country_list = row.getCountry();
+                            country_list.stream()
+                                    .forEach(
+                                            c ->
+                                                    prova.add(
+                                                            new Tuple2<>(
+                                                                    row.getId(),
+                                                                    c.getDataInfo()
+                                                                            .getInferenceprovenance())));
+                            return prova.iterator();
+                        },
+                        Encoders.tuple(Encoders.STRING(), Encoders.STRING()));

+        Assertions.assertEquals(
+                7, countryExplodedWithCountryProvenance.filter("_2 = 'propagation'").count());
+    }
 }
-
-
--- a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java
+++ b/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java
@ -1,8 +1,10 @@
 package eu.dnetlib.dhp.orcidtoresultfromsemrel;

 import com.fasterxml.jackson.databind.ObjectMapper;
-import eu.dnetlib.dhp.resulttoorganizationfrominstrepo.Result2OrganizationJobTest;
 import eu.dnetlib.dhp.schema.oaf.Dataset;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import org.apache.commons.io.FileUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaRDD;
@ -17,23 +19,18 @@ import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
 public class OrcidPropagationJobTest {

-    private static final Logger log = LoggerFactory.getLogger(Result2OrganizationJobTest.class);
+    private static final Logger log = LoggerFactory.getLogger(OrcidPropagationJobTest.class);

    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

-    private static final ClassLoader cl = Result2OrganizationJobTest.class.getClassLoader();
+    private static final ClassLoader cl = OrcidPropagationJobTest.class.getClassLoader();

    private static SparkSession spark;

    private static Path workingDir;

-
    @BeforeAll
    public static void beforeAll() throws IOException {
        workingDir = Files.createTempDirectory(OrcidPropagationJobTest.class.getSimpleName());
@ -49,13 +46,11 @@ public class OrcidPropagationJobTest {
        conf.set("spark.sql.warehouse.dir", workingDir.toString());
        conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());

-
-        spark = SparkSession
-                .builder()
-                .appName(OrcidPropagationJobTest.class.getSimpleName())
-                .config(conf)
-                .getOrCreate();
-
+        spark =
+                SparkSession.builder()
+                        .appName(OrcidPropagationJobTest.class.getSimpleName())
+                        .config(conf)
+                        .getOrCreate();
    }

    @AfterAll
@ -64,132 +59,194 @@ public class OrcidPropagationJobTest {
        spark.stop();
    }

-
    @Test
-    public void noUpdateTest()throws Exception{
-        SparkOrcidToResultFromSemRelJob3.main(new String[]{
-                "-isTest", Boolean.TRUE.toString(),
-                "-isSparkSessionManaged", Boolean.FALSE.toString(),
-                "-sourcePath", getClass().getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate").getPath(),
-                "-hive_metastore_uris", "",
-                "-saveGraph","true",
-                "-resultTableName","eu.dnetlib.dhp.schema.oaf.Dataset",
-                "-outputPath",workingDir.toString() + "/dataset",
-                "-possibleUpdatesPath", getClass().getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparedInfo/mergedOrcidAssoc").getPath()
-        });
+    public void noUpdateTest() throws Exception {
+        SparkOrcidToResultFromSemRelJob3.main(
+                new String[] {
+                    "-isTest",
+                    Boolean.TRUE.toString(),
+                    "-isSparkSessionManaged",
+                    Boolean.FALSE.toString(),
+                    "-sourcePath",
+                    getClass()
+                            .getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate")
+                            .getPath(),
+                    "-hive_metastore_uris",
+                    "",
+                    "-saveGraph",
+                    "true",
+                    "-resultTableName",
+                    "eu.dnetlib.dhp.schema.oaf.Dataset",
+                    "-outputPath",
+                    workingDir.toString() + "/dataset",
+                    "-possibleUpdatesPath",
+                    getClass()
+                            .getResource(
+                                    "/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparedInfo/mergedOrcidAssoc")
+                            .getPath()
+                });

        final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());

-        JavaRDD<Dataset> tmp = sc.textFile(workingDir.toString()+"/dataset")
-                .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
+        JavaRDD<Dataset> tmp =
+                sc.textFile(workingDir.toString() + "/dataset")
+                        .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));

-        //tmp.map(s -> new Gson().toJson(s)).foreach(s -> System.out.println(s));
+        // tmp.map(s -> new Gson().toJson(s)).foreach(s -> System.out.println(s));

        Assertions.assertEquals(10, tmp.count());

-        org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
+        org.apache.spark.sql.Dataset<Dataset> verificationDataset =
+                spark.createDataset(tmp.rdd(), Encoders.bean(Dataset.class));

        verificationDataset.createOrReplaceTempView("dataset");

-        String query = "select id " +
-                "from dataset " +
-                "lateral view explode(author) a as MyT " +
-                "lateral view explode(MyT.pid) p as MyP " +
-                "where MyP.datainfo.inferenceprovenance = 'propagation'";
+        String query =
+                "select id "
+                        + "from dataset "
+                        + "lateral view explode(author) a as MyT "
+                        + "lateral view explode(MyT.pid) p as MyP "
+                        + "where MyP.datainfo.inferenceprovenance = 'propagation'";

        Assertions.assertEquals(0, spark.sql(query).count());
    }

    @Test
-    public void oneUpdateTest() throws Exception{
-        SparkOrcidToResultFromSemRelJob3.main(new String[]{
-                "-isTest", Boolean.TRUE.toString(),
-                "-isSparkSessionManaged", Boolean.FALSE.toString(),
-                "-sourcePath", getClass().getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate").getPath(),
-                "-hive_metastore_uris", "",
-                "-saveGraph","true",
-                "-resultTableName","eu.dnetlib.dhp.schema.oaf.Dataset",
-                "-outputPath",workingDir.toString() + "/dataset",
-                "-possibleUpdatesPath", getClass().getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparedInfo/mergedOrcidAssoc").getPath()
-        });
+    public void oneUpdateTest() throws Exception {
+        SparkOrcidToResultFromSemRelJob3.main(
+                new String[] {
+                    "-isTest",
+                    Boolean.TRUE.toString(),
+                    "-isSparkSessionManaged",
+                    Boolean.FALSE.toString(),
+                    "-sourcePath",
+                    getClass()
+                            .getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/oneupdate")
+                            .getPath(),
+                    "-hive_metastore_uris",
+                    "",
+                    "-saveGraph",
+                    "true",
+                    "-resultTableName",
+                    "eu.dnetlib.dhp.schema.oaf.Dataset",
+                    "-outputPath",
+                    workingDir.toString() + "/dataset",
+                    "-possibleUpdatesPath",
+                    getClass()
+                            .getResource(
+                                    "/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparedInfo/mergedOrcidAssoc")
+                            .getPath()
+                });

        final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());

-        JavaRDD<Dataset> tmp = sc.textFile(workingDir.toString()+"/dataset")
-                .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
+        JavaRDD<Dataset> tmp =
+                sc.textFile(workingDir.toString() + "/dataset")
+                        .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));

-        //tmp.map(s -> new Gson().toJson(s)).foreach(s -> System.out.println(s));
+        // tmp.map(s -> new Gson().toJson(s)).foreach(s -> System.out.println(s));

        Assertions.assertEquals(10, tmp.count());

-        org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
+        org.apache.spark.sql.Dataset<Dataset> verificationDataset =
+                spark.createDataset(tmp.rdd(), Encoders.bean(Dataset.class));

        verificationDataset.createOrReplaceTempView("dataset");

-        String query = "select id, MyT.name name, MyT.surname surname, MyP.value pid, MyP.qualifier.classid pidType " +
-                "from dataset " +
-                "lateral view explode(author) a as MyT " +
-                "lateral view explode(MyT.pid) p as MyP " +
-                "where MyP.datainfo.inferenceprovenance = 'propagation'";
+        String query =
+                "select id, MyT.name name, MyT.surname surname, MyP.value pid, MyP.qualifier.classid pidType "
+                        + "from dataset "
+                        + "lateral view explode(author) a as MyT "
+                        + "lateral view explode(MyT.pid) p as MyP "
+                        + "where MyP.datainfo.inferenceprovenance = 'propagation'";

        org.apache.spark.sql.Dataset<Row> propagatedAuthors = spark.sql(query);

        Assertions.assertEquals(1, propagatedAuthors.count());

-        Assertions.assertEquals(1, propagatedAuthors.filter("id = '50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e' " +
-                "and name = 'Vajinder' and surname = 'Kumar' and pidType = 'ORCID'").count());
+        Assertions.assertEquals(
+                1,
+                propagatedAuthors
+                        .filter(
+                                "id = '50|dedup_wf_001::95b033c0c3961f6a1cdcd41a99a9632e' "
+                                        + "and name = 'Vajinder' and surname = 'Kumar' and pidType = 'ORCID'")
+                        .count());

        Assertions.assertEquals(1, propagatedAuthors.filter("pid = '0000-0002-8825-3517'").count());
-
    }

    @Test
-    public void twoUpdatesTest() throws Exception{
-        SparkOrcidToResultFromSemRelJob3.main(new String[]{
-                "-isTest", Boolean.TRUE.toString(),
-                "-isSparkSessionManaged", Boolean.FALSE.toString(),
-                "-sourcePath", getClass().getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/twoupdates").getPath(),
-                "-hive_metastore_uris", "",
-                "-saveGraph","true",
-                "-resultTableName","eu.dnetlib.dhp.schema.oaf.Dataset",
-                "-outputPath",workingDir.toString() + "/dataset",
-                "-possibleUpdatesPath", getClass().getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparedInfo/mergedOrcidAssoc").getPath()
-        });
+    public void twoUpdatesTest() throws Exception {
+        SparkOrcidToResultFromSemRelJob3.main(
+                new String[] {
+                    "-isTest",
+                    Boolean.TRUE.toString(),
+                    "-isSparkSessionManaged",
+                    Boolean.FALSE.toString(),
+                    "-sourcePath",
+                    getClass()
+                            .getResource(
+                                    "/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/twoupdates")
+                            .getPath(),
+                    "-hive_metastore_uris",
+                    "",
+                    "-saveGraph",
+                    "true",
+                    "-resultTableName",
+                    "eu.dnetlib.dhp.schema.oaf.Dataset",
+                    "-outputPath",
+                    workingDir.toString() + "/dataset",
+                    "-possibleUpdatesPath",
+                    getClass()
+                            .getResource(
+                                    "/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparedInfo/mergedOrcidAssoc")
+                            .getPath()
+                });

        final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());

-        JavaRDD<Dataset> tmp = sc.textFile(workingDir.toString()+"/dataset")
-                .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
+        JavaRDD<Dataset> tmp =
+                sc.textFile(workingDir.toString() + "/dataset")
+                        .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));

        Assertions.assertEquals(10, tmp.count());

-        org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
+        org.apache.spark.sql.Dataset<Dataset> verificationDataset =
+                spark.createDataset(tmp.rdd(), Encoders.bean(Dataset.class));

        verificationDataset.createOrReplaceTempView("dataset");

-        String query = "select id, MyT.name name, MyT.surname surname, MyP.value pid, MyP.qualifier.classid pidType " +
-                "from dataset " +
-                "lateral view explode(author) a as MyT " +
-                "lateral view explode(MyT.pid) p as MyP " +
-                "where MyP.datainfo.inferenceprovenance = 'propagation'";
+        String query =
+                "select id, MyT.name name, MyT.surname surname, MyP.value pid, MyP.qualifier.classid pidType "
+                        + "from dataset "
+                        + "lateral view explode(author) a as MyT "
+                        + "lateral view explode(MyT.pid) p as MyP "
+                        + "where MyP.datainfo.inferenceprovenance = 'propagation'";

        org.apache.spark.sql.Dataset<Row> propagatedAuthors = spark.sql(query);

        Assertions.assertEquals(2, propagatedAuthors.count());

-        Assertions.assertEquals(1, propagatedAuthors.filter("name = 'Marc' and surname = 'Schmidtmann'").count());
-        Assertions.assertEquals(1, propagatedAuthors.filter("name = 'Ruediger' and surname = 'Beckhaus'").count());
+        Assertions.assertEquals(
+                1, propagatedAuthors.filter("name = 'Marc' and surname = 'Schmidtmann'").count());
+        Assertions.assertEquals(
+                1, propagatedAuthors.filter("name = 'Ruediger' and surname = 'Beckhaus'").count());

-        query = "select id, MyT.name name, MyT.surname surname, MyP.value pid ,MyP.qualifier.classid pidType " +
-                "from dataset " +
-                "lateral view explode(author) a as MyT " +
-                "lateral view explode(MyT.pid) p as MyP ";
+        query =
+                "select id, MyT.name name, MyT.surname surname, MyP.value pid ,MyP.qualifier.classid pidType "
+                        + "from dataset "
+                        + "lateral view explode(author) a as MyT "
+                        + "lateral view explode(MyT.pid) p as MyP ";

        org.apache.spark.sql.Dataset<Row> authorsExplodedPids = spark.sql(query);

-        Assertions.assertEquals(2, authorsExplodedPids.filter("name = 'Marc' and surname = 'Schmidtmann'").count());
-        Assertions.assertEquals(1, authorsExplodedPids.filter("name = 'Marc' and surname = 'Schmidtmann' and pidType = 'MAG Identifier'").count());
-
+        Assertions.assertEquals(
+                2, authorsExplodedPids.filter("name = 'Marc' and surname = 'Schmidtmann'").count());
+        Assertions.assertEquals(
+                1,
+                authorsExplodedPids
+                        .filter(
+                                "name = 'Marc' and surname = 'Schmidtmann' and pidType = 'MAG Identifier'")
+                        .count());
    }
-
 }
--- a/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/Result2OrganizationJobTest.java
+++ b/dhp-workflows/dhp-propagation/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/Result2OrganizationJobTest.java
@ -2,6 +2,9 @@ package eu.dnetlib.dhp.resulttoorganizationfrominstrepo;

 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.schema.oaf.Relation;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import org.apache.commons.io.FileUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaRDD;
@ -16,10 +19,6 @@ import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
 public class Result2OrganizationJobTest {

    private static final Logger log = LoggerFactory.getLogger(Result2OrganizationJobTest.class);
@ -32,10 +31,11 @@ public class Result2OrganizationJobTest {

    private static Path workingDir;

-
    @BeforeAll
    public static void beforeAll() throws IOException {
-        workingDir = Files.createTempDirectory(SparkResultToOrganizationFromIstRepoJob2.class.getSimpleName());
+        workingDir =
+                Files.createTempDirectory(
+                        SparkResultToOrganizationFromIstRepoJob2.class.getSimpleName());
        log.info("using work dir {}", workingDir);

        SparkConf conf = new SparkConf();
@ -48,13 +48,11 @@ public class Result2OrganizationJobTest {
        conf.set("spark.sql.warehouse.dir", workingDir.toString());
        conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());

-
-        spark = SparkSession
-                .builder()
-                .appName(SparkResultToOrganizationFromIstRepoJob2.class.getSimpleName())
-                .config(conf)
-                .getOrCreate();
-
+        spark =
+                SparkSession.builder()
+                        .appName(SparkResultToOrganizationFromIstRepoJob2.class.getSimpleName())
+                        .config(conf)
+                        .getOrCreate();
    }

    @AfterAll
@ -65,109 +63,225 @@ public class Result2OrganizationJobTest {

    /**
     * No modifications done to the sample sets, so that no possible updates are created
+     *
     * @throws Exception
     */
    @Test
    public void NoUpdateTest() throws Exception {
-        SparkResultToOrganizationFromIstRepoJob2.main(new String[]{
-                "-isTest", Boolean.TRUE.toString(),
-                "-isSparkSessionManaged", Boolean.FALSE.toString(),
-                "-sourcePath", getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix").getPath(),
-                "-hive_metastore_uris", "",
-                "-resultTableName","eu.dnetlib.dhp.schema.oaf.Software",
-                "-writeUpdate", "false",
-                "-saveGraph", "true",
-                "-outputPath", workingDir.toString() + "/relation",
-                "-datasourceOrganizationPath", getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization").getPath(),
-                "-alreadyLinkedPath",getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked").getPath(),
-        });
+        SparkResultToOrganizationFromIstRepoJob2.main(
+                new String[] {
+                    "-isTest",
+                    Boolean.TRUE.toString(),
+                    "-isSparkSessionManaged",
+                    Boolean.FALSE.toString(),
+                    "-sourcePath",
+                    getClass()
+                            .getResource(
+                                    "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix")
+                            .getPath(),
+                    "-hive_metastore_uris",
+                    "",
+                    "-resultTableName",
+                    "eu.dnetlib.dhp.schema.oaf.Software",
+                    "-writeUpdate",
+                    "false",
+                    "-saveGraph",
+                    "true",
+                    "-outputPath",
+                    workingDir.toString() + "/relation",
+                    "-datasourceOrganizationPath",
+                    getClass()
+                            .getResource(
+                                    "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization")
+                            .getPath(),
+                    "-alreadyLinkedPath",
+                    getClass()
+                            .getResource(
+                                    "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked")
+                            .getPath(),
+                });

        final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());

-        JavaRDD<Relation> tmp = sc.textFile(workingDir.toString()+"/relation")
-                .map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
+        JavaRDD<Relation> tmp =
+                sc.textFile(workingDir.toString() + "/relation")
+                        .map(item -> OBJECT_MAPPER.readValue(item, Relation.class));

        Assertions.assertEquals(0, tmp.count());
-
-
    }

    /**
-     * Testing set with modified association between datasource and organization. Copied some hostedby collectedfrom
-     * from the software sample set. No intersection with the already linked (all the possible new relations, will became
-     * new relations)
+     * Testing set with modified association between datasource and organization. Copied some
+     * hostedby collectedfrom from the software sample set. No intersection with the already linked
+     * (all the possible new relations, will became new relations)
+     *
     * @throws Exception
     */
    @Test
    public void UpdateNoMixTest() throws Exception {
-        SparkResultToOrganizationFromIstRepoJob2.main(new String[]{
-                "-isTest", Boolean.TRUE.toString(),
-                "-isSparkSessionManaged", Boolean.FALSE.toString(),
-                "-sourcePath", getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix").getPath(),
-                "-hive_metastore_uris", "",
-                "-resultTableName","eu.dnetlib.dhp.schema.oaf.Software",
-                "-writeUpdate", "false",
-                "-saveGraph", "true",
-                "-outputPath", workingDir.toString() + "/relation",
-                "-datasourceOrganizationPath", getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization").getPath(),
-                "-alreadyLinkedPath",getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked").getPath(),
-        });
+        SparkResultToOrganizationFromIstRepoJob2.main(
+                new String[] {
+                    "-isTest",
+                    Boolean.TRUE.toString(),
+                    "-isSparkSessionManaged",
+                    Boolean.FALSE.toString(),
+                    "-sourcePath",
+                    getClass()
+                            .getResource(
+                                    "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix")
+                            .getPath(),
+                    "-hive_metastore_uris",
+                    "",
+                    "-resultTableName",
+                    "eu.dnetlib.dhp.schema.oaf.Software",
+                    "-writeUpdate",
+                    "false",
+                    "-saveGraph",
+                    "true",
+                    "-outputPath",
+                    workingDir.toString() + "/relation",
+                    "-datasourceOrganizationPath",
+                    getClass()
+                            .getResource(
+                                    "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization")
+                            .getPath(),
+                    "-alreadyLinkedPath",
+                    getClass()
+                            .getResource(
+                                    "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked")
+                            .getPath(),
+                });

        final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());

-        JavaRDD<Relation> tmp = sc.textFile(workingDir.toString()+"/relation")
-                .map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
+        JavaRDD<Relation> tmp =
+                sc.textFile(workingDir.toString() + "/relation")
+                        .map(item -> OBJECT_MAPPER.readValue(item, Relation.class));

        Assertions.assertEquals(20, tmp.count());

-        Dataset<Relation> verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
-        Assertions.assertEquals(8, verificationDs.filter("target = '20|dedup_wf_001::5168917a6aeeea55269daeac1af2ecd2'").count());
-        Assertions.assertEquals(1, verificationDs.filter("target = '20|opendoar____::124266ebc4ece2934eb80edfda3f2091'").count());
-        Assertions.assertEquals(1, verificationDs.filter("target = '20|opendoar____::4429502fa1936b0941f4647b69b844c8'").count());
+        Dataset<Relation> verificationDs =
+                spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
+        Assertions.assertEquals(
+                8,
+                verificationDs
+                        .filter("target = '20|dedup_wf_001::5168917a6aeeea55269daeac1af2ecd2'")
+                        .count());
+        Assertions.assertEquals(
+                1,
+                verificationDs
+                        .filter("target = '20|opendoar____::124266ebc4ece2934eb80edfda3f2091'")
+                        .count());
+        Assertions.assertEquals(
+                1,
+                verificationDs
+                        .filter("target = '20|opendoar____::4429502fa1936b0941f4647b69b844c8'")
+                        .count());

-        Assertions.assertEquals(2, verificationDs.filter("source = '50|dedup_wf_001::b67bc915603fc01e445f2b5888ba7218' and " +
-                "(target = '20|opendoar____::124266ebc4ece2934eb80edfda3f2091' " +
-                "or target = '20|dedup_wf_001::5168917a6aeeea55269daeac1af2ecd2')").count());
+        Assertions.assertEquals(
+                2,
+                verificationDs
+                        .filter(
+                                "source = '50|dedup_wf_001::b67bc915603fc01e445f2b5888ba7218' and "
+                                        + "(target = '20|opendoar____::124266ebc4ece2934eb80edfda3f2091' "
+                                        + "or target = '20|dedup_wf_001::5168917a6aeeea55269daeac1af2ecd2')")
+                        .count());
    }

    @Test
    public void UpdateMixTest() throws Exception {
-        SparkResultToOrganizationFromIstRepoJob2.main(new String[]{
-                "-isTest", Boolean.TRUE.toString(),
-                "-isSparkSessionManaged", Boolean.FALSE.toString(),
-                "-sourcePath", getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix").getPath(),
-                "-hive_metastore_uris", "",
-                "-resultTableName","eu.dnetlib.dhp.schema.oaf.Software",
-                "-writeUpdate", "false",
-                "-saveGraph", "true",
-                "-outputPath", workingDir.toString() + "/relation",
-                "-datasourceOrganizationPath", getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization").getPath(),
-                "-alreadyLinkedPath",getClass().getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked").getPath(),
-        });
+        SparkResultToOrganizationFromIstRepoJob2.main(
+                new String[] {
+                    "-isTest",
+                    Boolean.TRUE.toString(),
+                    "-isSparkSessionManaged",
+                    Boolean.FALSE.toString(),
+                    "-sourcePath",
+                    getClass()
+                            .getResource(
+                                    "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix")
+                            .getPath(),
+                    "-hive_metastore_uris",
+                    "",
+                    "-resultTableName",
+                    "eu.dnetlib.dhp.schema.oaf.Software",
+                    "-writeUpdate",
+                    "false",
+                    "-saveGraph",
+                    "true",
+                    "-outputPath",
+                    workingDir.toString() + "/relation",
+                    "-datasourceOrganizationPath",
+                    getClass()
+                            .getResource(
+                                    "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization")
+                            .getPath(),
+                    "-alreadyLinkedPath",
+                    getClass()
+                            .getResource(
+                                    "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked")
+                            .getPath(),
+                });

        final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());

-        JavaRDD<Relation> tmp = sc.textFile(workingDir.toString()+"/relation")
-                .map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
+        JavaRDD<Relation> tmp =
+                sc.textFile(workingDir.toString() + "/relation")
+                        .map(item -> OBJECT_MAPPER.readValue(item, Relation.class));

-        Dataset<Relation> verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
+        Dataset<Relation> verificationDs =
+                spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));

        Assertions.assertEquals(8, verificationDs.count());

-        Assertions.assertEquals(2, verificationDs.filter("source = '50|od_______109::f375befa62a741e9250e55bcfa88f9a6'").count());
-        Assertions.assertEquals(1, verificationDs.filter("source = '50|dedup_wf_001::b67bc915603fc01e445f2b5888ba7218'").count());
-        Assertions.assertEquals(1, verificationDs.filter("source = '50|dedup_wf_001::40ea2f24181f6ae77b866ebcbffba523'").count());
+        Assertions.assertEquals(
+                2,
+                verificationDs
+                        .filter("source = '50|od_______109::f375befa62a741e9250e55bcfa88f9a6'")
+                        .count());
+        Assertions.assertEquals(
+                1,
+                verificationDs
+                        .filter("source = '50|dedup_wf_001::b67bc915603fc01e445f2b5888ba7218'")
+                        .count());
+        Assertions.assertEquals(
+                1,
+                verificationDs
+                        .filter("source = '50|dedup_wf_001::40ea2f24181f6ae77b866ebcbffba523'")
+                        .count());

-        Assertions.assertEquals(1, verificationDs.filter("source = '20|wt__________::a72760363ca885e6bef165804770e00c'").count());
+        Assertions.assertEquals(
+                1,
+                verificationDs
+                        .filter("source = '20|wt__________::a72760363ca885e6bef165804770e00c'")
+                        .count());

-        Assertions.assertEquals(4, verificationDs.filter("relclass = 'hasAuthorInstitution' and substring(source, 1,2) = '50'").count());
-        Assertions.assertEquals(4, verificationDs.filter("relclass = 'isAuthorInstitutionOf' and substring(source, 1,2) = '20'").count());
+        Assertions.assertEquals(
+                4,
+                verificationDs
+                        .filter(
+                                "relclass = 'hasAuthorInstitution' and substring(source, 1,2) = '50'")
+                        .count());
+        Assertions.assertEquals(
+                4,
+                verificationDs
+                        .filter(
+                                "relclass = 'isAuthorInstitutionOf' and substring(source, 1,2) = '20'")
+                        .count());

-        Assertions.assertEquals(4, verificationDs.filter("relclass = 'hasAuthorInstitution' and " +
-                "substring(source, 1,2) = '50' and substring(target, 1, 2) = '20'").count());
-        Assertions.assertEquals(4, verificationDs.filter("relclass = 'isAuthorInstitutionOf' and " +
-                "substring(source, 1,2) = '20' and substring(target, 1, 2) = '50'").count());
+        Assertions.assertEquals(
+                4,
+                verificationDs
+                        .filter(
+                                "relclass = 'hasAuthorInstitution' and "
+                                        + "substring(source, 1,2) = '50' and substring(target, 1, 2) = '20'")
+                        .count());
+        Assertions.assertEquals(
+                4,
+                verificationDs
+                        .filter(
+                                "relclass = 'isAuthorInstitutionOf' and "
+                                        + "substring(source, 1,2) = '20' and substring(target, 1, 2) = '50'")
+                        .count());
    }
-
-
 }