92 changed files with 2656 additions and 13805 deletions
--- a/.gitignore
+++ b/.gitignore
@ -19,5 +19,3 @@
 /build
 spark-warehouse
 /dhp-workflows/dhp-graph-mapper/job-override.properties
-test.properties
-
--- a/dhp-build/dhp-build-assembly-resources/pom.xml
+++ b/dhp-build/dhp-build-assembly-resources/pom.xml
@ -6,7 +6,7 @@
    <parent>
        <groupId>eu.dnetlib</groupId>
        <artifactId>dhp-build</artifactId>
-        <version>4.1.13-SNAPSHOT</version>
+        <version>4.1.7</version>
    </parent>

    <artifactId>dhp-build-assembly-resources</artifactId>
--- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml
+++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml
@ -6,11 +6,10 @@
    <parent>
        <groupId>eu.dnetlib</groupId>
        <artifactId>dhp-build</artifactId>
-        <version>4.1.13-SNAPSHOT</version>
+        <version>4.1.7</version>
    </parent>

    <artifactId>dhp-build-properties-maven-plugin</artifactId>
-    <version>4.1.13-SNAPSHOT</version>
    <packaging>maven-plugin</packaging>

    <description>This module is a maven plugin implementing custom properties substitutions in the build lifecycle</description>
@ -20,19 +19,16 @@
            <groupId>org.apache.maven</groupId>
            <artifactId>maven-plugin-api</artifactId>
            <version>3.6.3</version>
-            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.maven</groupId>
            <artifactId>maven-project</artifactId>
            <version>2.2.1</version>
-            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.maven</groupId>
            <artifactId>maven-artifact</artifactId>
            <version>2.2.1</version>
-            <scope>provided</scope>
        </dependency>

        <dependency>
@ -104,29 +100,6 @@
                </configuration>
            </plugin>
        </plugins>
-
-        <pluginManagement>
-            <plugins>
-                <plugin>
-                    <groupId>org.apache.maven.plugins</groupId>
-                    <artifactId>maven-plugin-plugin</artifactId>
-                    <version>3.2</version>
-                    <configuration>
-                        <skipErrorNoDescriptorsFound>true</skipErrorNoDescriptorsFound>
-                    </configuration>
-                    <executions>
-                        <execution>
-                            <id>mojo-descriptor</id>
-                            <phase>process-classes</phase>
-                            <goals>
-                                <goal>descriptor</goal>
-                            </goals>
-                        </execution>
-                    </executions>
-                </plugin>
-            </plugins>
-        </pluginManagement>
-
    </build>

 </project>
--- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java
+++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java
@ -8,8 +8,6 @@ import static org.junit.jupiter.api.Assertions.assertNull;

 import org.junit.jupiter.api.*;

-import java.nio.file.Paths;
-
 /** @author mhorst, claudio.atzori */
 public class GenerateOoziePropertiesMojoTest {

@ -68,7 +66,7 @@ public class GenerateOoziePropertiesMojoTest {
 		clearSystemProperties();

 		// given
-		String workflowSourceDir = Paths.get("eu/dnetlib/dhp/").toString();
+		String workflowSourceDir = "eu/dnetlib/dhp/";
 		System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);

 		// execute
@ -83,14 +81,14 @@ public class GenerateOoziePropertiesMojoTest {

 		clearSystemProperties();
 		// given
-		String workflowSourceDir = Paths.get("eu/dnetlib/dhp/wf/transformers").toString();
+		String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
 		System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);

 		// execute
 		mojo.execute();

 		// assert
-		assertEquals(Paths.get("wf/transformers").toString(), System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
+		assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
 	}

 	@Test
@ -98,13 +96,13 @@ public class GenerateOoziePropertiesMojoTest {

 		clearSystemProperties();
 		// given
-		String workflowSourceDir = Paths.get("wf/transformers").toString();
+		String workflowSourceDir = "wf/transformers";
 		System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);

 		// execute
 		mojo.execute();

 		// assert
-		assertEquals(Paths.get("wf/transformers").toString(), System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
+		assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
 	}
 }
--- a/dhp-build/dhp-build-properties-maven-plugin/test.properties
+++ b/dhp-build/dhp-build-properties-maven-plugin/test.properties
@ -0,0 +1,2 @@
+# Mon May 03 16:05:14 CEST 2021
+projectPropertyKey=projectPropertyValue
--- a/dhp-build/dhp-code-style/pom.xml
+++ b/dhp-build/dhp-code-style/pom.xml
@ -5,7 +5,7 @@

    <groupId>eu.dnetlib</groupId>
    <artifactId>dhp-code-style</artifactId>
-    <version>4.1.13-SNAPSHOT</version>
+    <version>4.1.7</version>

    <packaging>jar</packaging>

--- a/dhp-build/pom.xml
+++ b/dhp-build/pom.xml
@ -4,7 +4,7 @@
 	<parent>
 		<groupId>eu.dnetlib</groupId>
 		<artifactId>dnet-dedup</artifactId>
-		<version>4.1.13-SNAPSHOT</version>
+		<version>4.1.7</version>
 		<relativePath>../pom.xml</relativePath>
 	</parent>
 	<artifactId>dhp-build</artifactId>
--- a/dnet-dedup-test/job-override.properties
+++ b/dnet-dedup-test/job-override.properties
@ -1,6 +1,5 @@
-useTree = true
-entitiesPath = /user/michele.debonis/lda_experiments/authors_pubmed
-workingPath = /user/michele.debonis/authors_dedup/gt2_dedup
-numPartitions = 1000
-dedupConfPath = /user/michele.debonis/lda_experiments/authors.fdup.gt2.conf.json
-groundTruthFieldJPath = $.orcid
+entitiesPath = /tmp/prod_provision/graph/01_graph_raw/publication
+workingPath = /user/michele.debonis/erf_test/workingdir
+dedupConfPath = /user/michele.debonis/erf_test/pubs.tree.conf.json
+numPartitions = 20
+useTree = true
--- a/dnet-dedup-test/pom.xml
+++ b/dnet-dedup-test/pom.xml
@ -6,7 +6,7 @@
    <parent>
        <groupId>eu.dnetlib</groupId>
        <artifactId>dnet-dedup</artifactId>
-        <version>4.1.13-SNAPSHOT</version>
+        <version>4.1.7</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

@ -132,17 +132,6 @@
            <groupId>com.jayway.jsonpath</groupId>
            <artifactId>json-path</artifactId>
        </dependency>
-        <dependency>
-            <groupId>org.mockito</groupId>
-            <artifactId>mockito-core</artifactId>
-            <scope>test</scope>
-        </dependency>
-
-        <dependency>
-            <groupId>org.mockito</groupId>
-            <artifactId>mockito-junit-jupiter</artifactId>
-            <scope>test</scope>
-        </dependency>

    </dependencies>

--- a/dnet-dedup-test/src/main/java/eu/dnetlib/Deduper.java
+++ b/dnet-dedup-test/src/main/java/eu/dnetlib/Deduper.java
@ -1,24 +1,37 @@
 package eu.dnetlib;

+import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.hash.Hashing;
-import eu.dnetlib.graph.JavaGraphProcessor;
+import eu.dnetlib.graph.GraphProcessor;
 import eu.dnetlib.pace.config.DedupConfig;
+import eu.dnetlib.pace.config.WfConfig;
+import eu.dnetlib.pace.model.Field;
 import eu.dnetlib.pace.model.MapDocument;
-import eu.dnetlib.pace.util.BlockProcessorForTesting;
+import eu.dnetlib.pace.model.MapDocumentComparator;
+import eu.dnetlib.pace.tree.JsonListMatch;
+import eu.dnetlib.pace.tree.LevensteinTitle;
+import eu.dnetlib.pace.tree.SizeMatch;
+import eu.dnetlib.pace.tree.TitleVersionMatch;
+import eu.dnetlib.pace.tree.support.TreeProcessor;
+import eu.dnetlib.pace.util.BlockProcessor;
 import eu.dnetlib.pace.util.MapDocumentUtil;
+import eu.dnetlib.pace.util.Reporter;
 import eu.dnetlib.pace.utils.Utility;
 import eu.dnetlib.reporter.SparkReporter;
 import eu.dnetlib.support.Block;
 import eu.dnetlib.support.ConnectedComponent;
 import eu.dnetlib.support.Relation;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.api.java.function.PairFlatMapFunction;
 import org.apache.spark.api.java.function.PairFunction;
 import org.apache.spark.graphx.Edge;
+import org.apache.spark.rdd.RDD;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
@ -26,6 +39,7 @@ import org.apache.spark.sql.SparkSession;
 import org.apache.spark.util.LongAccumulator;
 import scala.Serializable;
 import scala.Tuple2;
+import scala.math.Ordering;

 import java.nio.charset.Charset;
 import java.util.*;
@ -57,13 +71,14 @@ public class Deduper implements Serializable {
                .reduceByKey((b1, b2) -> Block.from(b1, b2, of, maxQueueSize));
    }

-    public static Iterator<Tuple2<String, String>> ccToMergeRel(Tuple2<String, List<String>> cc, DedupConfig dedupConf) {
-        return cc._2()
+    public static Iterator<Tuple2<String, String>> ccToMergeRel(ConnectedComponent cc, DedupConfig dedupConf) {
+        return cc
+                .getDocs()
                .stream()
                .flatMap(
                        id -> {
                            List<Tuple2<String, String>> tmp = new ArrayList<>();
-                            tmp.add(new Tuple2<>(cc._1(), id));
+                            tmp.add(new Tuple2<>(cc.getCcId(), id));
                            return tmp.stream();
                        })
                .iterator();
@ -83,15 +98,15 @@ public class Deduper implements Serializable {
    }

    public static JavaRDD<Relation> computeRelations(
-            JavaSparkContext context, JavaPairRDD<String, Block> blocks, DedupConfig config, boolean useTree, boolean noMatch) {
+            JavaSparkContext context, JavaPairRDD<String, Block> blocks, DedupConfig config) {
        Map<String, LongAccumulator> accumulators = Utility.constructAccumulator(config, context.sc());

        return blocks
                .flatMapToPair(
                        it -> {
                            final SparkReporter reporter = new SparkReporter(accumulators);
-                            new BlockProcessorForTesting(config)
-                                    .processSortedBlock(it._1(), it._2().getDocuments(), reporter, useTree, noMatch);
+                            new BlockProcessor(config)
+                                    .processSortedBlock(it._1(), it._2().getDocuments(), reporter);
                            return reporter.getRelations().iterator();
                        })
                .mapToPair(it -> new Tuple2<>(it._1() + it._2(), new Relation(it._1(), it._2(), "simRel")))
@ -99,7 +114,138 @@ public class Deduper implements Serializable {
                .map(Tuple2::_2);
    }

-    public static void createSimRels(DedupConfig dedupConf, SparkSession spark, String entitiesPath, String simRelsPath, boolean useTree, boolean noMatch){
+    public static Queue<MapDocument> prepareQueue(final Iterable<MapDocument> documents, DedupConfig config) {
+        final Queue<MapDocument> queue = new PriorityQueue<>(100, new MapDocumentComparator(config.getWf().getOrderField()));
+
+        final Set<String> seen = new HashSet<String>();
+        final int queueMaxSize = config.getWf().getQueueMaxSize();
+
+        documents.forEach(doc -> {
+            if (queue.size() <= queueMaxSize) {
+                final String id = doc.getIdentifier();
+
+                if (!seen.contains(id)) {
+                    seen.add(id);
+                    queue.add(doc);
+                }
+            }
+        });
+
+        return queue;
+    }
+
+    public static JavaRDD<Relation> computePublicationRelations(
+            JavaSparkContext context, JavaPairRDD<String, Block> blocks, DedupConfig config) {
+
+        return blocks.
+                flatMapToPair((PairFlatMapFunction<Tuple2<String, Block>, String, String>)
+                        it -> {
+                            List<Tuple2<String,String>> relations = new ArrayList<>();
+
+                            if (it._2().getDocuments().size()>1) {
+
+                                Queue<MapDocument> queue = prepareQueue(it._2().getDocuments(), config);
+
+                                while (!queue.isEmpty()) {
+
+                                        final MapDocument pivot = queue.remove();
+                                        final String idPivot = pivot.getIdentifier();
+
+                                        WfConfig wf = config.getWf();
+                                        final Field fieldsPivot = pivot.values(wf.getOrderField());
+                                        final String fieldPivot = (fieldsPivot == null) || fieldsPivot.isEmpty() ? "" : fieldsPivot.stringValue();
+
+                                        if (fieldPivot != null) {
+                                            int i = 0;
+                                            for (final MapDocument curr : queue) {
+                                                final String idCurr = curr.getIdentifier();
+
+                                                if (config.getWf().getSkipList().contains(StringUtils.substringBetween(idCurr, "|", "::"))) {
+                                                    break;
+                                                }
+
+                                                if (i > wf.getSlidingWindowSize()) {
+                                                    break;
+                                                }
+
+                                                final Field fieldsCurr = curr.values(wf.getOrderField());
+                                                final String fieldCurr = (fieldsCurr == null) || fieldsCurr.isEmpty() ? null : fieldsCurr.stringValue();
+
+                                                if (!idCurr.equals(idPivot) && (fieldCurr != null)) {
+
+                                                    double score = 0.0;
+                                                    Map<String, String> params = new HashMap<>();
+                                                    params.put("jpath_value", "$.value");
+                                                    params.put("jpath_classid", "$.qualifier.classid");
+                                                    JsonListMatch jsonListMatch = new JsonListMatch(params);
+                                                    double result = jsonListMatch.compare(pivot.getFieldMap().get("pid"), curr.getFieldMap().get("pid"), config);
+                                                    if (result > 0.5) //if the result of the comparison is greater than the threshold
+                                                        score += 10.0;  //high score because it should match when the first condition is satisfied
+                                                    else
+                                                        score += 0.0;
+
+                                                    TitleVersionMatch titleVersionMatch = new TitleVersionMatch(params);
+                                                    double result1 = titleVersionMatch.compare(pivot.getFieldMap().get("title"), curr.getFieldMap().get("title"), config);
+                                                    SizeMatch sizeMatch = new SizeMatch(params);
+                                                    double result2 = sizeMatch.compare(pivot.getFieldMap().get("authors"), curr.getFieldMap().get("authors"), config);
+                                                    if ((result1 == 1.0 && result2 == 1.0) || (result1 == -1.0 && result2 == 1.0) || (result1 == 1.0 && result2 == -1.0) || (result1 == -1.0 && result2 == -1.0))
+                                                        score += 0.0;
+                                                    else
+                                                        score -= 1.0;
+
+                                                    LevensteinTitle levensteinTitle = new LevensteinTitle(params);
+                                                    double result3 = levensteinTitle.compare(pivot.getFieldMap().get("title"), curr.getFieldMap().get("title"), config);
+                                                    score += result3;
+
+                                                    if (score >= 0.99) {
+                                                        relations.add(new Tuple2<>(idPivot, idCurr));
+                                                        relations.add(new Tuple2<>(idCurr, idPivot));
+                                                    }
+
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+
+                            return relations.iterator();
+                        })
+                .mapToPair(it -> new Tuple2<>(it._1() + it._2(), new Relation(it._1(), it._2(), "simRel")))
+                .reduceByKey((a,b) -> a)
+                .map(Tuple2::_2);
+    }
+
+    public static boolean comparePublications(MapDocument a, MapDocument b, DedupConfig config){
+
+        double score = 0.0;
+        Map<String, String> params = new HashMap<>();
+        params.put("jpath_value", "$.value");
+        params.put("jpath_classid", "$.qualifier.classid");
+        JsonListMatch jsonListMatch = new JsonListMatch(params);
+        double result = jsonListMatch.compare(a.getFieldMap().get("pid"), b.getFieldMap().get("pid"), config);
+        if (result > 0.5) //if the result of the comparison is greater than the threshold
+            score += 1.0;
+        else
+            score += 0.0;
+
+        TitleVersionMatch titleVersionMatch = new TitleVersionMatch(params);
+        double result1 = titleVersionMatch.compare(a.getFieldMap().get("title"), b.getFieldMap().get("title"), config);
+        SizeMatch sizeMatch = new SizeMatch(params);
+        double result2 = sizeMatch.compare(a.getFieldMap().get("authors"), b.getFieldMap().get("authors"), config);
+        if ((result1 == 1.0 && result2 == 1.0) || (result1 == -1.0 && result2 == 1.0) || (result1 == 1.0 && result2 == -1.0) || (result1 == -1.0 && result2 == -1.0))
+            score += 0.0;
+        else
+            score -= 1.0;
+
+        LevensteinTitle levensteinTitle = new LevensteinTitle(params);
+        double result3 = levensteinTitle.compare(a.getFieldMap().get("title"), b.getFieldMap().get("title"), config);
+        score += result3;
+
+        return score >= 0.99;
+
+    }
+
+    public static void createSimRels(DedupConfig dedupConf, SparkSession spark, String entitiesPath, String simRelsPath){

        JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());

@ -115,7 +261,7 @@ public class Deduper implements Serializable {
        JavaPairRDD<String, Block> blocks = Deduper.createSortedBlocks(mapDocuments, dedupConf);

        // create relations by comparing only elements in the same group
-        JavaRDD<Relation> relations = Deduper.computeRelations(sc, blocks, dedupConf, useTree, noMatch);
+        JavaRDD<Relation> relations = Deduper.computeRelations(sc, blocks, dedupConf);

        // save the simrel in the workingdir
        spark
@ -136,19 +282,21 @@ public class Deduper implements Serializable {
                .map(s -> MapDocumentUtil.getJPathString(dedupConf.getWf().getIdPath(), s))
                .mapToPair((PairFunction<String, Object, String>) s -> new Tuple2<>(hash(s), s));

-        final JavaRDD<Edge<String>> edgeRdd = spark
+        final RDD<Edge<String>> edgeRdd = spark
                .read()
                .load(simRelsPath)
                .as(Encoders.bean(Relation.class))
                .javaRDD()
-                .map(Relation::toEdgeRdd);
+                .map(Relation::toEdgeRdd)
+                .rdd();

-        JavaPairRDD<String, List<String>> ccs = JavaGraphProcessor
-                .findCCs(vertexes, edgeRdd, dedupConf.getWf().getMaxIterations());
+        JavaRDD<ConnectedComponent> ccs = GraphProcessor
+                .findCCs(vertexes.rdd(), edgeRdd, maxIterations)
+                .toJavaRDD();

        JavaRDD<Relation> mergeRel = ccs
-                .filter(cc -> cc._2().size() > 1)
-                .flatMap(cc -> Deduper.ccToMergeRel(cc, dedupConf))
+                .filter(k -> k.getDocs().size() > 1)
+                .flatMap(cc -> ccToMergeRel(cc, dedupConf))
                .map(it -> new Relation(it._1(), it._2(), "mergeRel"));

        final Dataset<Relation> mergeRels = spark
@ -159,7 +307,7 @@ public class Deduper implements Serializable {
        mergeRels.write().mode(SaveMode.Overwrite).parquet(mergeRelsPath);
    }

-    public static void createDedupEntity(DedupConfig dedupConf, String simRelsPath, String mergeRelsPath, String entitiesPath, SparkSession spark, String dedupEntityPath){
+    public static void createDedupEntity(DedupConfig dedupConf, String mergeRelsPath, String entitiesPath, SparkSession spark, String dedupEntityPath){

        JavaPairRDD<String, String> entities = spark
                .read()
@ -170,15 +318,7 @@ public class Deduper implements Serializable {
                .toJavaRDD()
                .mapToPair(t -> t);

-        // <source_raw_id, relation(source, target)>
-        JavaPairRDD<String, Relation> simRels = spark
-                .read()
-                .load(simRelsPath)
-                .as(Encoders.bean(Relation.class))
-                .toJavaRDD()
-                .mapToPair(r-> new Tuple2<>(r.getSource(), r));
-
-        // <raw_id, relation(dedup_id, raw_id)>
+        // <source, target>: source is the dedup_id, target is the id of the mergedIn
        JavaPairRDD<String, Relation> mergeRels = spark
                .read()
                .load(mergeRelsPath)
@ -191,22 +331,7 @@ public class Deduper implements Serializable {
                .groupByKey()
                .map(t-> entityMerger(t._1(), t._2().iterator()));

-        JavaPairRDD<String, Iterable<Relation>> simRelsWithDedupId = simRels
-                .join(mergeRels)
-                .mapToPair(x -> new Tuple2<>(x._2()._2().getSource(), x._2()._1()))
-                .groupByKey();
-
-        JavaRDD<ConnectedComponent> groupEntity = mergeRels.join(entities)
-                .mapToPair(t -> new Tuple2<>(t._2()._1().getSource(), t._2()._2()))
-                .groupByKey()
-                .join(simRelsWithDedupId)
-                .map(x -> new ConnectedComponent(
-                        x._1(),
-                        x._2()._1(),
-                        x._2()._2())
-                );
-
-        groupEntity.saveAsTextFile(dedupEntityPath);
+        dedupEntities.saveAsTextFile(dedupEntityPath);
    }

 }
--- a/dnet-dedup-test/src/main/java/eu/dnetlib/graph/JavaGraphProcessor.java
+++ b/dnet-dedup-test/src/main/java/eu/dnetlib/graph/JavaGraphProcessor.java
@ -1,56 +0,0 @@
-package eu.dnetlib.graph;
-
-import com.clearspring.analytics.util.Lists;
-import com.google.common.collect.Sets;
-import eu.dnetlib.pace.utils.Utility;
-import eu.dnetlib.support.ConnectedComponent;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.graphx.*;
-import org.apache.spark.rdd.RDD;
-import org.apache.spark.storage.StorageLevel;
-import scala.Tuple2;
-import scala.reflect.ClassTag;
-import scala.reflect.ClassTag$;
-
-import java.util.List;
-
-public class JavaGraphProcessor {
-
-    //<ccId, list(json)>
-    public static JavaPairRDD<String, List<String>> findCCs(JavaPairRDD<Object, String> vertexes, JavaRDD<Edge<String>> edges, int maxIterations) {
-
-        ClassTag<String> stringTag = ClassTag$.MODULE$.apply(String.class);
-        Graph<String, String> graph =
-                Graph.apply(
-                        vertexes.rdd(),
-                        edges.rdd(),
-                        "",
-                        StorageLevel.MEMORY_ONLY(),
-                        StorageLevel.MEMORY_ONLY(),
-                        stringTag,
-                        stringTag
-                );
-
-        GraphOps<String, String> graphOps = new GraphOps<>(graph, stringTag, stringTag);
-        JavaRDD<Tuple2<Object, Object>> cc = graphOps.connectedComponents(maxIterations).vertices().toJavaRDD();
-
-        JavaPairRDD<Object, String> joinResult = vertexes
-                .leftOuterJoin(cc.mapToPair(x -> x))
-                .mapToPair(x -> {
-                    if (!x._2()._2().isPresent()) {
-                        return new Tuple2<>(x._1(), x._2()._1());
-                    } else {
-                        return new Tuple2<>(x._2()._2(), x._2()._1());
-                    }
-                });
-
-        return joinResult
-                .groupByKey()
-                .map(x -> Lists.newArrayList(x._2()))
-                .zipWithUniqueId()
-                .mapToPair(x -> new Tuple2<>("dedup______::" + x._2().toString(), x._1()));
-
-    }
-
-}
--- a/dnet-dedup-test/src/main/java/eu/dnetlib/jobs/AbstractSparkJob.java
+++ b/dnet-dedup-test/src/main/java/eu/dnetlib/jobs/AbstractSparkJob.java
@ -19,7 +19,6 @@ import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Serializable;
-import java.nio.charset.StandardCharsets;
 import java.util.stream.Collectors;

 public abstract class AbstractSparkJob implements Serializable {
@ -60,7 +59,7 @@ public abstract class AbstractSparkJob implements Serializable {

        Path path=new Path(filePath);
        FileSystem fs = FileSystem.get(new Configuration());
-        BufferedReader br=new BufferedReader(new InputStreamReader(fs.open(path), StandardCharsets.UTF_8));
+        BufferedReader br=new BufferedReader(new InputStreamReader(fs.open(path)));
        try {
            return String.join("", br.lines().collect(Collectors.toList()));
        } finally {
--- a/dnet-dedup-test/src/main/java/eu/dnetlib/jobs/SparkComputeStatistics.java
+++ b/dnet-dedup-test/src/main/java/eu/dnetlib/jobs/SparkComputeStatistics.java
@ -1,36 +1,20 @@
 package eu.dnetlib.jobs;

-import eu.dnetlib.Deduper;
-import eu.dnetlib.pace.config.DedupConfig;
-import eu.dnetlib.pace.config.Type;
-import eu.dnetlib.pace.model.FieldValueImpl;
-import eu.dnetlib.pace.model.MapDocument;
-import eu.dnetlib.pace.util.MapDocumentUtil;
-import eu.dnetlib.pace.utils.Utility;
 import eu.dnetlib.support.ArgumentApplicationParser;
-import eu.dnetlib.support.Block;
-import eu.dnetlib.support.ConnectedComponent;
 import eu.dnetlib.support.Relation;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.function.PairFunction;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SparkSession;
-import org.codehaus.jackson.map.ObjectMapper;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import scala.Tuple2;

 import java.io.IOException;
-import java.util.List;
 import java.util.Optional;
-import java.util.stream.Collectors;

 public class SparkComputeStatistics extends AbstractSparkJob {

@ -58,42 +42,18 @@ public class SparkComputeStatistics extends AbstractSparkJob {

        @Override
        public void run() throws IOException {
-            //https://towardsdatascience.com/7-evaluation-metrics-for-clustering-algorithms-bdc537ff54d2#:~:text=There%20are%20two%20types%20of,to%20all%20unsupervised%20learning%20results)
+
            // read oozie parameters
            final String entitiesPath = parser.get("entitiesPath");
            final String workingPath = parser.get("workingPath");
-            final String dedupConfPath = parser.get("dedupConfPath");
-            final String groundTruthFieldJPath = parser.get("groundTruthFieldJPath");
            final int numPartitions = Optional
                    .ofNullable(parser.get("numPartitions"))
                    .map(Integer::valueOf)
                    .orElse(NUM_PARTITIONS);

-            log.info("entitiesPath:          '{}'", entitiesPath);
-            log.info("workingPath:           '{}'", workingPath);
-            log.info("numPartitions:         '{}'", numPartitions);
-            log.info("dedupConfPath:         '{}'", dedupConfPath);
-            log.info("groundTruthFieldJPath: '{}'", groundTruthFieldJPath);
-
-            JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
-            DedupConfig dedupConfig = loadDedupConfig(dedupConfPath);
-
-            JavaPairRDD<String, MapDocument> mapDocuments = sc
-                    .textFile(entitiesPath)
-                    .repartition(numPartitions)
-                    .mapToPair(
-                            (PairFunction<String, String, MapDocument>) s -> {
-                                MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConfig, s);
-                                //put in the map the groundTruthField used to compute statistics
-                                d.getFieldMap().put("groundTruth", new FieldValueImpl(Type.String, "groundTruth", MapDocumentUtil.getJPathString(groundTruthFieldJPath, s)));
-                                return new Tuple2<>(d.getIdentifier(), d);
-                            });
-
-            JavaRDD<String> entities = mapDocuments.map(d -> d._2().getFieldMap().get("groundTruth").stringValue());
-
-            // create blocks
-            JavaRDD<List<String>> blocks = Deduper.createSortedBlocks(mapDocuments, dedupConfig)
-                    .map(b -> b._2().getDocuments().stream().map(d -> d.getFieldMap().get("groundTruth").stringValue()).collect(Collectors.toList()));
+            log.info("entitiesPath:  '{}'", entitiesPath);
+            log.info("workingPath:   '{}'", workingPath);
+            log.info("numPartitions: '{}'", numPartitions);

            // <source, target>: source is the dedup_id, target is the id of the mergedIn
            JavaRDD<Relation> mergerels = spark
@ -108,38 +68,15 @@ public class SparkComputeStatistics extends AbstractSparkJob {
                    .as(Encoders.bean(Relation.class))
                    .toJavaRDD();

-            JavaRDD<List<String>> groups = sc.textFile(workingPath + "/groupentities")
-                    .map(e -> new ObjectMapper().readValue(e, ConnectedComponent.class))
-                    .map(e -> e.getDocs().stream().map(d -> MapDocumentUtil.getJPathString(groundTruthFieldJPath, d)).collect(Collectors.toList()));
-
-            long entities_number = entities.count();
-            long blocks_number = blocks.count();
-            double blocks_randIndex = randIndex(blocks);
            long simrels_number = simrels.count();
            long mergerels_number = mergerels.count();
-            double groups_randIndex = randIndex(groups);
-            long groups_number = groups.count();
-            long groundtruth_number = entities.filter(e -> !e.isEmpty()).count();
-            long correct_groups = groups.filter(x -> x.stream().distinct().count()==1).count();
-            long wrong_groups = groups_number - correct_groups;
+            long connected_components = mergerels.groupBy(Relation::getSource).count();

-            String print =
-                    "Entities : " + entities_number + "\n" +
-                    "Ground Truth : " + groundtruth_number + "\n" +
-                    "Blocks : " + blocks_number + "\n" +
-                    "Blocks RI : " + blocks_randIndex + "\n" +
-                    "SimRels : " + simrels_number + "\n" +
-                    "MergeRels : " + mergerels_number + "\n" +
-                    "Groups : " + groups_number + " (correct: " + correct_groups + ", wrong: " + wrong_groups + ")\n" +
-                    "Groups RI : " + groups_randIndex;
-
-            System.out.println(print);
-
-            writeStatsFileToHDFS(groundtruth_number, entities_number, blocks_randIndex, groups_randIndex, blocks_number, simrels_number, mergerels_number, groups_number, workingPath + "/stats_file.txt");
+            writeStatsFileToHDFS(simrels_number, mergerels_number, connected_components, workingPath + "/stats_file");

        }

-        public static void writeStatsFileToHDFS(long groundtruth_number, long entities_number, double blocks_randIndex, double groups_randIndex, long blocks_number, long simrels_number, long mergerels_number, long groups_number, String filePath) throws IOException {
+        public static void writeStatsFileToHDFS(long simrels_number, long mergerels_number, long connected_components, String filePath) throws IOException {
            Configuration conf = new Configuration();

            FileSystem fs = FileSystem.get(conf);
@ -156,14 +93,9 @@ public class SparkComputeStatistics extends AbstractSparkJob {
                }

                String print =
-                        "Entities : " + entities_number + "\n" +
-                        "Ground Truth : " + groundtruth_number + "\n" +
-                        "Blocks : " + blocks_number + "\n" +
-                        "Blocks RI : " + blocks_randIndex + "\n" +
-                        "SimRels : " + simrels_number + "\n" +
-                        "MergeRels : " + mergerels_number + "\n" +
-                        "Groups : " + groups_number + "\n" +
-                        "Groups RI : " + groups_randIndex;
+                        "Similarity Relations : " + simrels_number + "\n" +
+                        "Merge Relations : " + mergerels_number + "\n" +
+                        "Connected Components : " + connected_components;

                // Create file to write
                FSDataOutputStream out = fs.create(outFile);
@ -177,31 +109,5 @@ public class SparkComputeStatistics extends AbstractSparkJob {
                e.printStackTrace();
            }
        }
-
-        //TODO find another maesure that takes into account all the elements outside of the group too
-        //RandIndex = number of pairwise correct predictions/total number of possible pairs (in the same cluster) -> bounded between 0 and 1
-        public double randIndex(JavaRDD<List<String>> clusters) {
-
-            Tuple2<Integer, Integer> reduce = clusters.map(c -> {
-                        int num = 0;
-                        for (String id : c.stream().distinct().filter(s -> !s.isEmpty()).collect(Collectors.toList())) {
-                            int n = (int) c.stream().filter(i -> i.equals(id)).count();
-                            num += binomialCoefficient(n);
-                        }
-                        int den = binomialCoefficient(c.size());
-                        return new Tuple2<>(num, den);
-                    })
-                    .reduce((a, b) -> new Tuple2<>(a._1() + b._1(), a._2() + b._2()));
-
-            return (double)reduce._1()/ reduce._2();
-        }
-
-        private static int binomialCoefficient(int n)
-        {
-            return n*(n-1)/2;
-        }
-
-        //V-measure = harmonic mean of homogeneity and completeness, homogeneity = each cluster contains only members of a single class, completeness = all members of a given class are assigned to the same cluster
-
 }

--- a/dnet-dedup-test/src/main/java/eu/dnetlib/jobs/SparkCreateDedupEntity.java
+++ b/dnet-dedup-test/src/main/java/eu/dnetlib/jobs/SparkCreateDedupEntity.java
@ -7,7 +7,6 @@ import eu.dnetlib.pace.utils.Utility;
 import eu.dnetlib.support.ArgumentApplicationParser;
 import eu.dnetlib.support.ConnectedComponent;
 import eu.dnetlib.support.Relation;
-import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
@ -17,32 +16,29 @@ import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import scala.Tuple2;
-import scala.Tuple3;

 import java.io.IOException;
-import java.util.*;
-import java.util.stream.Collectors;
-import java.util.stream.StreamSupport;
+import java.util.Optional;

-public class SparkCreateGroupEntity extends AbstractSparkJob {
+public class SparkCreateDedupEntity extends AbstractSparkJob {

-        private static final Logger log = LoggerFactory.getLogger(SparkCreateGroupEntity.class);
+        private static final Logger log = LoggerFactory.getLogger(eu.dnetlib.jobs.SparkCreateDedupEntity.class);

-        public SparkCreateGroupEntity(ArgumentApplicationParser parser, SparkSession spark) {
+        public SparkCreateDedupEntity(ArgumentApplicationParser parser, SparkSession spark) {
            super(parser, spark);
        }

        public static void main(String[] args) throws Exception {

            ArgumentApplicationParser parser = new ArgumentApplicationParser(
-                    Utility.readResource("/jobs/parameters/createGroupEntity_parameters.json", SparkCreateGroupEntity.class)
+                    Utility.readResource("/jobs/parameters/createDedupEntity_parameters.json", SparkCreateDedupEntity.class)
            );

            parser.parseArgument(args);

            SparkConf conf = new SparkConf();

-            new SparkCreateGroupEntity(
+            new SparkCreateDedupEntity(
                    parser,
                    getSparkSession(conf)
            ).run();
@ -65,9 +61,8 @@ public class SparkCreateGroupEntity extends AbstractSparkJob {
            log.info("dedupConfPath: '{}'", dedupConfPath);
            log.info("numPartitions: '{}'", numPartitions);

-            DedupConfig dedupConf = DedupConfig.load(readFileFromHDFS(dedupConfPath));
+            DedupConfig dedupConf = DedupConfig.load(readResource("/jobs/parameters/createDedupEntity_parameters.json", SparkCreateDedupEntity.class));

-            // <raw_id, json>
            JavaPairRDD<String, String> entities = spark
                    .read()
                    .textFile(entitiesPath)
@ -77,15 +72,7 @@ public class SparkCreateGroupEntity extends AbstractSparkJob {
                    .toJavaRDD()
                    .mapToPair(t -> t);

-            // <source_raw_id, relation(source, target)>
-            JavaPairRDD<String, Relation> simRels = spark
-                    .read()
-                    .load(workingPath + "/simrels")
-                    .as(Encoders.bean(Relation.class))
-                    .toJavaRDD()
-                    .mapToPair(r-> new Tuple2<>(r.getSource(), r));
-
-            // <raw_id, relation(dedup_id, raw_id)>
+            // <source, target>: source is the dedup_id, target is the id of the mergedIn
            JavaPairRDD<String, Relation> mergeRels = spark
                    .read()
                    .load(workingPath + "/mergerels")
@ -93,23 +80,12 @@ public class SparkCreateGroupEntity extends AbstractSparkJob {
                    .toJavaRDD()
                    .mapToPair(r -> new Tuple2<>(r.getTarget(), r));

-            // <dedup_id, simrel>
-            JavaPairRDD<String, Iterable<Relation>> simRelsWithDedupId = simRels
-                    .join(mergeRels)
-                    .mapToPair(x -> new Tuple2<>(x._2()._2().getSource(), x._2()._1()))
-                    .groupByKey();
-
-            JavaRDD<ConnectedComponent> groupEntity = mergeRels.join(entities)
+            JavaRDD<ConnectedComponent> dedupEntities = mergeRels.join(entities)
                    .mapToPair(t -> new Tuple2<>(t._2()._1().getSource(), t._2()._2()))
                    .groupByKey()
-                    .join(simRelsWithDedupId)
-                    .map(x -> new ConnectedComponent(
-                            x._1(),
-                            x._2()._1(),
-                            x._2()._2())
-                    );
+                    .map(t-> Deduper.entityMerger(t._1(), t._2().iterator()));

-            groupEntity.saveAsTextFile(workingPath + "/groupentities", GzipCodec.class);
+            dedupEntities.saveAsTextFile(workingPath + "dedupentity");

        }

--- a/dnet-dedup-test/src/main/java/eu/dnetlib/jobs/SparkCreateMergeRels.java
+++ b/dnet-dedup-test/src/main/java/eu/dnetlib/jobs/SparkCreateMergeRels.java
@ -1,7 +1,7 @@
 package eu.dnetlib.jobs;

 import eu.dnetlib.Deduper;
-import eu.dnetlib.graph.JavaGraphProcessor;
+import eu.dnetlib.graph.GraphProcessor;
 import eu.dnetlib.pace.config.DedupConfig;
 import eu.dnetlib.pace.util.MapDocumentUtil;
 import eu.dnetlib.pace.utils.Utility;
@ -24,7 +24,6 @@ import org.slf4j.LoggerFactory;
 import scala.Tuple2;

 import java.io.IOException;
-import java.util.List;
 import java.util.Optional;

 import static eu.dnetlib.Deduper.hash;
@ -79,18 +78,20 @@ public class SparkCreateMergeRels extends AbstractSparkJob {
                .map(s -> MapDocumentUtil.getJPathString(dedupConf.getWf().getIdPath(), s))
                .mapToPair((PairFunction<String, Object, String>) s -> new Tuple2<>(hash(s), s));

-        final JavaRDD<Edge<String>> edgeRdd = spark
+        final RDD<Edge<String>> edgeRdd = spark
                .read()
                .load(workingPath + "/simrels")
                .as(Encoders.bean(Relation.class))
                .javaRDD()
-                .map(Relation::toEdgeRdd);
+                .map(Relation::toEdgeRdd)
+                .rdd();

-        JavaPairRDD<String, List<String>> ccs = JavaGraphProcessor
-                .findCCs(vertexes, edgeRdd, dedupConf.getWf().getMaxIterations());
+        JavaRDD<ConnectedComponent> ccs = GraphProcessor
+                .findCCs(vertexes.rdd(), edgeRdd, dedupConf.getWf().getMaxIterations())
+                .toJavaRDD();

        JavaRDD<Relation> mergeRel = ccs
-                .filter(cc -> cc._2().size() > 1)
+                .filter(k -> k.getDocs().size() > 1)
                .flatMap(cc -> Deduper.ccToMergeRel(cc, dedupConf))
                .map(it -> new Relation(it._1(), it._2(), "mergeRel"));

--- a/dnet-dedup-test/src/main/java/eu/dnetlib/jobs/SparkCreateSimRels.java
+++ b/dnet-dedup-test/src/main/java/eu/dnetlib/jobs/SparkCreateSimRels.java
@ -14,7 +14,6 @@ import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.PairFunction;
-import org.apache.spark.sql.Encoder;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
@ -73,7 +72,6 @@ public class SparkCreateSimRels extends AbstractSparkJob {

        JavaPairRDD<String, MapDocument> mapDocuments = sc
                .textFile(entitiesPath)
-                .repartition(numPartitions)
                .mapToPair(
                        (PairFunction<String, String, MapDocument>) s -> {
                            MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConfig, s);
@ -84,7 +82,12 @@ public class SparkCreateSimRels extends AbstractSparkJob {
        JavaPairRDD<String, Block> blocks = Deduper.createSortedBlocks(mapDocuments, dedupConfig);

        // create relations by comparing only elements in the same group
-        JavaRDD<Relation> relations = Deduper.computeRelations(sc, blocks, dedupConfig, useTree, false);
+        JavaRDD<Relation> relations;
+
+        if (useTree)
+            relations = Deduper.computeRelations(sc, blocks, dedupConfig);
+        else
+            relations = Deduper.computePublicationRelations(sc, blocks, dedupConfig);

        // save the simrel in the workingdir
        spark
--- a/dnet-dedup-test/src/main/java/eu/dnetlib/support/Block.java
+++ b/dnet-dedup-test/src/main/java/eu/dnetlib/support/Block.java
@ -1,7 +1,10 @@
 package eu.dnetlib.support;

 import java.io.Serializable;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
@ -9,7 +12,6 @@ import java.util.stream.StreamSupport;
 import com.google.common.collect.Lists;

 import eu.dnetlib.pace.model.MapDocument;
-import org.codehaus.jackson.annotate.JsonIgnore;

 public class Block implements Serializable {

@ -21,11 +23,6 @@ public class Block implements Serializable {
        super();
    }

-    public Block(String key, List<MapDocument> documents) {
-        this.key = key;
-        this.documents = documents;
-    }
-
    public Block(String key, Iterable<MapDocument> documents) {
        this.key = key;
        this.documents = Lists.newArrayList(documents);
--- a/dnet-dedup-test/src/main/java/eu/dnetlib/support/ConnectedComponent.java
+++ b/dnet-dedup-test/src/main/java/eu/dnetlib/support/ConnectedComponent.java
@ -5,35 +5,54 @@ import java.io.Serializable;
 import java.util.HashSet;
 import java.util.Set;

-import com.google.common.collect.Sets;
+import eu.dnetlib.pace.utils.Utility;
+import org.apache.commons.lang.StringUtils;
+import org.codehaus.jackson.annotate.JsonIgnore;
+
+import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.pace.util.PaceException;
-import org.codehaus.jackson.map.ObjectMapper;

 public class ConnectedComponent implements Serializable {

    private HashSet<String> docs;
    private String ccId;
-    private HashSet<Relation> simrels;

    public ConnectedComponent() {
    }

-    public ConnectedComponent(String ccId, Set<String> docs, Set<Relation> simrels) {
-        this.docs = new HashSet<>(docs);
-        this.ccId = ccId;
-        this.simrels = new HashSet<>(simrels);
-    }
-
    public ConnectedComponent(Set<String> docs) {
        this.docs = new HashSet<>(docs);
-        //initialization of id and relations missing
+        createID();
    }

-    public ConnectedComponent(String ccId, Iterable<String> docs, Iterable<Relation> simrels) {
-        this.ccId = ccId;
-        this.docs = Sets.newHashSet(docs);
-        this.simrels = Sets.newHashSet(simrels);
+    public String createID() {
+        if (docs.size() > 1) {
+            final String s = getMin();
+            ccId = "dedup::" + Utility.md5(s);
+            return ccId;
+        } else {
+            return docs.iterator().next();
+        }
+    }
+
+    @JsonIgnore
+    public String getMin() {
+
+        final StringBuilder min = new StringBuilder();
+        docs
+                .forEach(
+                        i -> {
+                            if (StringUtils.isBlank(min.toString())) {
+                                min.append(i);
+                            } else {
+                                if (min.toString().compareTo(i) > 0) {
+                                    min.setLength(0);
+                                    min.append(i);
+                                }
+                            }
+                        });
+        return min.toString();
    }

    @Override
@ -61,12 +80,4 @@ public class ConnectedComponent implements Serializable {
    public void setCcId(String ccId) {
        this.ccId = ccId;
    }
-
-    public void setSimrels(HashSet<Relation> simrels) {
-        this.simrels = simrels;
-    }
-
-    public HashSet<Relation> getSimrels() {
-        return simrels;
-    }
 }
--- a/dnet-dedup-test/src/main/resources/dedup/oozie_app/workflow.xml
+++ b/dnet-dedup-test/src/main/resources/dedup/oozie_app/workflow.xml
@ -16,10 +16,6 @@
            <name>dedupConfPath</name>
            <description>path for the dedup configuration file</description>
        </property>
-        <property>
-            <name>groundTruthFieldJPath</name>
-            <description>jpath of the field to be used as ground truth</description>
-        </property>
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
@ -135,34 +131,7 @@
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.shuffle.partitions=3840
-                --conf spark.dynamicAllocation.enabled=true
-            </spark-opts>
-            <arg>--entitiesPath</arg><arg>${entitiesPath}</arg>
-            <arg>--workingPath</arg><arg>${workingPath}</arg>
-            <arg>--numPartitions</arg><arg>${numPartitions}</arg>
-            <arg>--dedupConfPath</arg><arg>${dedupConfPath}</arg>
-        </spark>
-        <ok to="CreateGroupEntities"/>
-        <error to="Kill"/>
-    </action>
-
-
-    <action name="CreateGroupEntities">
-        <spark xmlns="uri:oozie:spark-action:0.2">
-            <master>yarn</master>
-            <mode>cluster</mode>
-            <name>Create Group Entities</name>
-            <class>eu.dnetlib.jobs.SparkCreateGroupEntity</class>
-            <jar>dnet-dedup-test-${projectVersion}.jar</jar>
-            <spark-opts>
-                --executor-memory=${sparkExecutorMemory}
-                --executor-cores=${sparkExecutorCores}
-                --driver-memory=${sparkDriverMemory}
-                --conf spark.extraListeners=${spark2ExtraListeners}
-                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
-                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
-                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
-                --conf spark.sql.shuffle.partitions=3840
+                --conf spark.dynamicAllocation.enabled=false
            </spark-opts>
            <arg>--entitiesPath</arg><arg>${entitiesPath}</arg>
            <arg>--workingPath</arg><arg>${workingPath}</arg>
@ -193,12 +162,36 @@
            <arg>--entitiesPath</arg><arg>${entitiesPath}</arg>
            <arg>--workingPath</arg><arg>${workingPath}</arg>
            <arg>--numPartitions</arg><arg>${numPartitions}</arg>
-            <arg>--dedupConfPath</arg><arg>${dedupConfPath}</arg>
-            <arg>--groundTruthFieldJPath</arg><arg>${groundTruthFieldJPath}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
    </action>

+    <!--<action name="CreateDedupEntities">-->
+        <!--<spark xmlns="uri:oozie:spark-action:0.2">-->
+            <!--<master>yarn</master>-->
+            <!--<mode>cluster</mode>-->
+            <!--<name>Create Dedup Entities</name>-->
+            <!--<class>eu.dnetlib.jobs.SparkCreateDedupEntity</class>-->
+            <!--<jar>dnet-dedup-test-${projectVersion}.jar</jar>-->
+            <!--<spark-opts>-->
+                <!--&#45;&#45;executor-memory=${sparkExecutorMemory}-->
+                <!--&#45;&#45;executor-cores=${sparkExecutorCores}-->
+                <!--&#45;&#45;driver-memory=${sparkDriverMemory}-->
+                <!--&#45;&#45;conf spark.extraListeners=${spark2ExtraListeners}-->
+                <!--&#45;&#45;conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}-->
+                <!--&#45;&#45;conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}-->
+                <!--&#45;&#45;conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}-->
+                <!--&#45;&#45;conf spark.sql.shuffle.partitions=3840-->
+            <!--</spark-opts>-->
+            <!--<arg>&#45;&#45;entitiesPath</arg><arg>${entitiesPath}</arg>-->
+            <!--<arg>&#45;&#45;workingPath</arg><arg>${workingPath}</arg>-->
+            <!--<arg>&#45;&#45;numPartitions</arg><arg>${numPartitions}</arg>-->
+            <!--<arg>&#45;&#45;dedupConfPath</arg><arg>${dedupConfPath}</arg>-->
+        <!--</spark>-->
+        <!--<ok to="End"/>-->
+        <!--<error to="Kill"/>-->
+    <!--</action>-->
+
    <end name="End"/>
 </workflow-app>
--- a/dnet-dedup-test/src/main/resources/jobs/parameters/computeStatistics_parameters.json
+++ b/dnet-dedup-test/src/main/resources/jobs/parameters/computeStatistics_parameters.json
@ -16,17 +16,5 @@
    "paramLongName": "numPartitions",
    "paramDescription": "number of partitions for the similarity relations intermediate phases",
    "paramRequired": false
-  },
-  {
-    "paramName": "dc",
-    "paramLongName": "dedupConfPath",
-    "paramDescription": "dedup configuration to be used",
-    "paramRequired": true
-  },
-  {
-    "paramName": "gt",
-    "paramLongName": "groundTruthFieldJPath",
-    "paramDescription": "field to be used as groundtruth",
-    "paramRequired": true
  }
 ]
--- a/dnet-dedup-test/src/main/resources/jobs/parameters/createDedupEntity_parameters.json
+++ b/dnet-dedup-test/src/main/resources/jobs/parameters/createDedupEntity_parameters.json
--- a/dnet-dedup-test/src/test/java/eu/dnetlib/pace/DedupLocalTest.java
+++ b/dnet-dedup-test/src/test/java/eu/dnetlib/pace/DedupLocalTest.java
--- a/dnet-dedup-test/src/test/java/eu/dnetlib/pace/DedupTestUtils.java
+++ b/dnet-dedup-test/src/test/java/eu/dnetlib/pace/DedupTestUtils.java
@ -1,57 +1,37 @@
 package eu.dnetlib.pace;

-import eu.dnetlib.pace.config.Type;
-import eu.dnetlib.pace.model.FieldListImpl;
 import eu.dnetlib.pace.model.MapDocument;
 import eu.dnetlib.support.ConnectedComponent;
-import org.apache.commons.io.IOUtils;
 import org.apache.spark.api.java.JavaRDD;
-import org.codehaus.jackson.map.ObjectMapper;
-import scala.Tuple2;

-import java.io.*;
-import java.util.Arrays;
 import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;

 public abstract class DedupTestUtils {

-    public static String prepareTable(MapDocument doc) {

-        String ret = "<table>";
-
-        for(String fieldName: doc.getFieldMap().keySet()) {
-            if (doc.getFieldMap().get(fieldName).getType().equals(Type.String)) {
-                ret += "<tr><th>" + fieldName + "</th><td>" + doc.getFieldMap().get(fieldName).stringValue() + "</td></tr>";
-            }
-            else if (doc.getFieldMap().get(fieldName).getType().equals(Type.List)) {
-                ret += "<tr><th>" + fieldName + "</th><td>[" + ((FieldListImpl)doc.getFieldMap().get(fieldName)).stringList().stream().collect(Collectors.joining(";")) + "]</td></tr>";
-            }
-        }
-
-        return ret + "</table>";
-
-    }
-
-    public static void prepareGraphParams(List<String> vertexes, List<Tuple2<String, String>> edgesTuple, String filePath, String templateFilePath, Map<String, MapDocument> mapDocuments) {
-
-        List<Node> nodes = vertexes.stream().map(v -> new Node(v.substring(3, 20).replaceAll("_", ""), vertexes.indexOf(v), prepareTable(mapDocuments.get(v)))).collect(Collectors.toList());
-        List<Edge> edges = edgesTuple.stream().map(e -> new Edge(vertexes.indexOf(e._1()), vertexes.indexOf(e._2()))).collect(Collectors.toList());
-
-        try(FileWriter fw = new FileWriter(filePath)) {
-            String fullText = IOUtils.toString(new FileReader(templateFilePath));
-
-            String s = fullText
-                    .replaceAll("%nodes%", new ObjectMapper().writeValueAsString(nodes))
-                    .replaceAll("%edges%", new ObjectMapper().writeValueAsString(edges));
-
-            IOUtils.write(s, fw);
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-
-    }
+//    public static void printStatistics(JavaRDD<ConnectedComponent> ccs){
+//        final JavaRDD<ConnectedComponent> connectedComponents = ccs.filter(cc -> cc.getDocs().size()>1);
+//        final JavaRDD<ConnectedComponent> nonDeduplicated = ccs.filter(cc -> cc.getDocs().size()==1);
+//
+//        //print deduped
+//        connectedComponents.map(cc -> {
+//            StringBuilder sb = new StringBuilder();
+//            for (MapDocument m : cc.getDocs()){
+//                sb.append(m.getFieldMap().get("originalId").stringValue() + " - "+ m.getFieldMap().get("legalname").stringValue() + "\n");
+//            }
+//            return sb.toString();
+//        }).foreach(s -> System.out.println("*******\n" + s + "*******\n"));
+//
+//        //print nondeduped
+//        nonDeduplicated.foreach(cc -> {
+//            System.out.println(cc.getId() + " - " + cc.getFieldMap().get("legalname").stringValue());
+//        });
+//
+//        System.out.println("Non duplicates: " + nonDeduplicated.count());
+//        System.out.println("Duplicates: " + connectedComponents.flatMap(cc -> cc.getDocs().iterator()).count());
+//        System.out.println("Connected Components: " + connectedComponents.count());
+//
+//    }

    public static String getOrganizationLegalname(MapDocument mapDocument){
        return mapDocument.getFieldMap().get("legalname").stringValue();
@ -67,65 +47,3 @@ public abstract class DedupTestUtils {
    }

 }
-
-class Node{
-    String label;
-    int id;
-    String title;
-
-    public Node(String label, int id, String title) {
-        this.label = label;
-        this.id = id;
-        this.title = title;
-    }
-
-    public String getLabel() {
-        return label;
-    }
-
-    public void setLabel(String label) {
-        this.label = label;
-    }
-
-    public int getId() {
-        return id;
-    }
-
-    public void setId(int id) {
-        this.id = id;
-    }
-
-    public String getTitle() {
-        return title;
-    }
-
-    public void setTitle(String title) {
-        this.title = title;
-    }
-}
-
-class Edge{
-    int from;
-    int to;
-
-    public Edge(int from, int to) {
-        this.from = from;
-        this.to = to;
-    }
-
-    public int getFrom() {
-        return from;
-    }
-
-    public void setFrom(int from) {
-        this.from = from;
-    }
-
-    public int getTo() {
-        return to;
-    }
-
-    public void setTo(int to) {
-        this.to = to;
-    }
-}
--- a/dnet-dedup-test/src/test/java/eu/dnetlib/pace/clustering/ClusteringCombinerTest.java
+++ b/dnet-dedup-test/src/test/java/eu/dnetlib/pace/clustering/ClusteringCombinerTest.java
@ -0,0 +1,39 @@
+package eu.dnetlib.pace.clustering;
+
+import eu.dnetlib.pace.config.Config;
+import eu.dnetlib.pace.model.MapDocument;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.junit.jupiter.api.*;
+
+public class ClusteringCombinerTest  {
+
+
+	// TODO RE IMPLEMENT Tests with the new configuration
+//	private static final Log log = LogFactory.getLog(ClusteringCombinerTest.class);
+//
+//	private Config config;
+//
+//	@Before
+//	public void setUp() {
+//		config = getOrganizationTestConf();
+//	}
+//
+//	@Test
+//	public void testCombine() {
+//
+//		final MapDocument organization = organization(config, "A", "University of Turin", "UNITO");
+//		log.info("University of Turin");
+//		log.info(ClusteringCombiner.combine(organization, config));
+//	}
+//
+//	@Test
+//	public void testCombineBlacklistAware() {
+//
+//		final MapDocument organization = organization(config, "A", "University of Turin", "UNITO");
+//		log.info("University of Turin");
+//		log.info(BlacklistAwareClusteringCombiner.filterAndCombine(organization, config));
+//	}
+
+}
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/authors.fdup.conf.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/authors.fdup.conf.json
@ -1,134 +0,0 @@
-{
-  "wf": {
-    "threshold": "0.99",
-    "dedupRun": "001",
-    "entityType": "author",
-    "subEntityType": "author",
-    "subEntityValue": "author",
-    "orderField": "fullname",
-    "queueMaxSize": "200",
-    "groupMaxSize": "100",
-    "maxChildren": "100",
-    "slidingWindowSize": "50",
-    "rootBuilder": [
-      "result",
-      "resultProject_outcome_isProducedBy",
-      "resultResult_publicationDataset_isRelatedTo",
-      "resultResult_similarity_isAmongTopNSimilarDocuments",
-      "resultResult_similarity_hasAmongTopNSimilarDocuments",
-      "resultOrganization_affiliation_isAffiliatedWith",
-      "resultResult_part_hasPart",
-      "resultResult_part_isPartOf",
-      "resultResult_supplement_isSupplementTo",
-      "resultResult_supplement_isSupplementedBy",
-      "resultResult_version_isVersionOf"
-    ],
-    "includeChildren": "true",
-    "maxIterations": 20,
-    "idPath": "$.id"
-  },
-  "pace": {
-    "clustering" : [
-      { "name" : "lnfi", "fields" : [ "fullname" ], "params" : {} }
-    ],
-    "decisionTree": {
-      "start": {
-        "fields": [
-          {
-            "field": "orcid",
-            "comparator": "exactMatch",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "orcids",
-        "ignoreUndefined": "true"
-      },
-      "orcids": {
-        "fields": [
-          {
-            "field": "orcids",
-            "comparator": "stringListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {"type": "count"}
-          }
-        ],
-        "threshold": 3.0,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "coauthors",
-        "undefined": "coauthors",
-        "ignoreUndefined": "true"
-      },
-      "coauthors": {
-        "fields": [
-          {
-            "field": "coauthors",
-            "comparator": "authorsMatch",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {"type": "count"}
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "MAX",
-        "positive": "topicsMatch",
-        "negative": "NO_MATCH",
-        "undefined": "topicsMatch",
-        "ignoreUndefined": "true"
-      },
-      "topicsMatch": {
-        "fields": [
-          {
-            "field": "topics",
-            "comparator": "cosineSimilarity",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 0.5,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "false"
-      }
-    },
-    "model": [
-      {
-        "name": "topics",
-        "type": "DoubleArray",
-        "path": "$.topics"
-      },
-      {
-        "name": "fullname",
-        "type": "String",
-        "path": "$.fullname"
-      },
-      {
-        "name": "orcid",
-        "type": "String",
-        "path": "$.orcid"
-      },
-      {
-        "name": "coauthors",
-        "type": "List",
-        "path": "$.coAuthors[*].fullname"
-      },
-      {
-        "name": "orcids",
-        "type": "List",
-        "path": "$.coAuthors[*].orcid"
-      }
-    ],
-    "blacklists": {},
-    "synonyms": {}
-  }
-}
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/authors.fdup.soft.conf.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/authors.fdup.soft.conf.json
@ -1,134 +0,0 @@
-{
-  "wf": {
-    "threshold": "0.99",
-    "dedupRun": "001",
-    "entityType": "author",
-    "subEntityType": "author",
-    "subEntityValue": "author",
-    "orderField": "fullname",
-    "queueMaxSize": "200",
-    "groupMaxSize": "100",
-    "maxChildren": "100",
-    "slidingWindowSize": "50",
-    "rootBuilder": [
-      "result",
-      "resultProject_outcome_isProducedBy",
-      "resultResult_publicationDataset_isRelatedTo",
-      "resultResult_similarity_isAmongTopNSimilarDocuments",
-      "resultResult_similarity_hasAmongTopNSimilarDocuments",
-      "resultOrganization_affiliation_isAffiliatedWith",
-      "resultResult_part_hasPart",
-      "resultResult_part_isPartOf",
-      "resultResult_supplement_isSupplementTo",
-      "resultResult_supplement_isSupplementedBy",
-      "resultResult_version_isVersionOf"
-    ],
-    "includeChildren": "true",
-    "maxIterations": 20,
-    "idPath": "$.id"
-  },
-  "pace": {
-    "clustering" : [
-      { "name" : "lnfi", "fields" : [ "fullname" ], "params" : {} }
-    ],
-    "decisionTree": {
-      "start": {
-        "fields": [
-          {
-            "field": "orcid",
-            "comparator": "exactMatch",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "orcids",
-        "ignoreUndefined": "true"
-      },
-      "orcids": {
-        "fields": [
-          {
-            "field": "orcids",
-            "comparator": "stringListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {"type": "count"}
-          }
-        ],
-        "threshold": 3.0,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "coauthors",
-        "undefined": "coauthors",
-        "ignoreUndefined": "true"
-      },
-      "coauthors": {
-        "fields": [
-          {
-            "field": "coauthors",
-            "comparator": "authorsMatch",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {"type": "count"}
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "MAX",
-        "positive": "topicsMatch",
-        "negative": "NO_MATCH",
-        "undefined": "topicsMatch",
-        "ignoreUndefined": "true"
-      },
-      "topicsMatch": {
-        "fields": [
-          {
-            "field": "topics",
-            "comparator": "cosineSimilarity",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "false"
-      }
-    },
-    "model": [
-      {
-        "name": "topics",
-        "type": "DoubleArray",
-        "path": "$.topics"
-      },
-      {
-        "name": "fullname",
-        "type": "String",
-        "path": "$.fullname"
-      },
-      {
-        "name": "orcid",
-        "type": "String",
-        "path": "$.orcid"
-      },
-      {
-        "name": "coauthors",
-        "type": "List",
-        "path": "$.coAuthors[*].fullname"
-      },
-      {
-        "name": "orcids",
-        "type": "List",
-        "path": "$.coAuthors[*].orcid"
-      }
-    ],
-    "blacklists": {},
-    "synonyms": {}
-  }
-}
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/ds.tree.conf.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/ds.tree.conf.json
@ -1,87 +0,0 @@
-{
-  "wf" : {
-    "threshold" : "0.99",
-    "dedupRun" : "001",
-    "entityType" : "datasource",
-    "orderField" : "englishname",
-    "queueMaxSize" : "2000",
-    "groupMaxSize" : "50",
-    "slidingWindowSize" : "200",
-    "idPath":"$.id",
-    "rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ],
-    "includeChildren" : "true",
-    "maxIterations": "20"
-  },
-  "pace" : {
-    "clustering" : [
-      { "name" : "sortedngrampairs", "fields" : [ "officialname", "englishname" ], "params" : { "max" : 2, "ngramLen" : "3", "collapseOn:name": "0"} },
-      { "name" : "suffixprefix", "fields" : [ "officialname", "englishname" ], "params" : { "max" : 1, "len" : "3", "collapseOn:name": "0" } },
-      {"name" : "ngrams", "fields" : ["officialname", "englishname"], "params" : {"ngramLen": 4, "max" : 2, "maxPerToken": 2, "minNgramLen": 1, "collapseOn:name": "0"}},
-      { "name" : "urlclustering", "fields" : [ "websiteurl" ], "params" : { } }
-    ],
-    "decisionTree" : {
-      "start": {
-        "fields": [
-          {
-            "field": "websiteurl",
-            "comparator": "domainExactMatch",
-            "weight": 1,
-            "countIfUndefined": "false",
-            "params": {}
-          }
-        ],
-        "threshold": 1,
-        "aggregation": "AVG",
-        "positive": "layer2",
-        "negative": "NO_MATCH",
-        "undefined": "layer2",
-        "ignoreUndefined": "true"
-      },
-      "layer2": {
-        "fields": [
-          {
-            "field": "officialname",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {
-              "threshold": 0.9
-            }
-          },
-          {
-            "field": "englishname",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {
-              "threshold": 0.9
-            }
-          },
-          {
-            "field": "officialname",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {
-              "crossCompare": "englishname",
-              "threshold": 0.9
-            }
-          }
-        ],
-        "threshold": 0.9,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "true"
-      }
-    },
-    "model" : [
-      { "name" : "englishname", "type" : "String", "path" : "$.englishname" },
-      { "name" : "officialname", "type" : "String", "path" : "$.officialname" },
-      { "name" : "websiteurl", "type" : "URL", "path" : "$.websiteurl" }
-    ],
-    "blacklists" : {},
-    "synonyms": {}
-  }
-}
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/organization.current.conf.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/organization.current.conf.json
@ -3,9 +3,8 @@
    "threshold" : "0.99",
    "dedupRun" : "001",
    "entityType" : "organization",
-    "subEntityValue": "organization",
    "orderField" : "legalname",
-    "queueMaxSize" : "100000",
+    "queueMaxSize" : "2000",
    "groupMaxSize" : "50",
    "slidingWindowSize" : "200",
    "idPath":"$.id",
@ -144,10 +143,10 @@
      }
    },
    "model" : [
-      { "name" : "country", "type" : "String", "path" : "$.country.classid"},
-      { "name" : "legalshortname", "type" : "String", "path" : "$.legalshortname.value"},
-      { "name" : "legalname", "type" : "String", "path" : "$.legalname.value" },
-      { "name" : "websiteurl", "type" : "URL", "path" : "$.websiteurl.value" },
+      { "name" : "country", "type" : "String", "path" : "$.organization.metadata.country.classid"},
+      { "name" : "legalshortname", "type" : "String", "path" : "$.organization.metadata.legalshortname.value"},
+      { "name" : "legalname", "type" : "String", "path" : "$.organization.metadata.legalname.value" },
+      { "name" : "websiteurl", "type" : "URL", "path" : "$.organization.metadata.websiteurl.value" },
      { "name" : "gridid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid =='grid')].value"},
      { "name" : "originalId", "type" : "String", "path" : "$.id" }
    ],
@ -155,7 +154,7 @@
      "legalname" : []
    },
    "synonyms": {
-      "key::1": ["university","università", "universitas", "università studi","universitario","universitaria","université", "universite", "universitaire","universitaires","universidad","universitade","Universität","universitaet","Uniwersytet","университет","universiteit","πανεπιστήμιο","universitesi","universiteti", "universiti", "Πανεπιστήμιο", "panepistemio"],
+      "key::1": ["university","università", "universitas", "università studi","universitario","universitaria","université", "universite", "universitaire","universitaires","universidad","universitade","Universität","universitaet","Uniwersytet","университет","universiteit","πανεπιστήμιο","universitesi","universiteti", "universiti"],
      "key::2": ["studies","studi","études","estudios","estudos","Studien","studia","исследования","studies","σπουδές"],
      "key::3": ["advanced","superiore","supérieur","supérieure","supérieurs","supérieures","avancado","avancados","fortgeschrittene","fortgeschritten","zaawansowany","передовой","gevorderd","gevorderde","προχωρημένος","προχωρημένη","προχωρημένο","προχωρημένες","προχωρημένα","wyzsza"],
      "key::4": ["institute","istituto","institut","instituto","instituto","Institut","instytut","институт","instituut","ινστιτούτο"],
@ -164,7 +163,7 @@
      "key::7": ["college","collegio","colegio","faculdade","Hochschule","Szkoła Wyższa","Высшая школа","κολλέγιο"],
      "key::8": ["foundation","fondazione","fondation","fundación","fundação","Stiftung","Fundacja","фонд","stichting","ίδρυμα","idryma"],
      "key::9": ["center","centro","centre","centro","centro","zentrum","centrum","центр","centrum","κέντρο"],
-      "key::10": ["national","nazionale","national","nationale","nationaux","nationales","nacional","nacional","national","krajowy","национальный","nationaal","nationale","εθνικό", "eθνικό"],
+      "key::10": ["national","nazionale","national","nationale","nationaux","nationales","nacional","nacional","national","krajowy","национальный","nationaal","nationale","εθνικό"],
      "key::11": ["association","associazione","association","asociación","associação","Verein","verband","stowarzyszenie","ассоциация","associatie"],
      "key::12": ["society","societa","société","sociedad","sociedade","gesellschaft","społeczeństwo","общество","maatschappij","κοινωνία"],
      "key::13": ["international","internazionale","international","internacional","internacional","international","międzynarodowy","Международный","internationaal","internationale","διεθνής","διεθνή","διεθνές"],
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/pub.instancetype.tree.conf.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/pub.instancetype.tree.conf.json
@ -1,442 +0,0 @@
-{
-  "wf": {
-    "threshold": "0.99",
-    "dedupRun": "001",
-    "entityType": "result",
-    "subEntityType": "resulttype",
-    "subEntityValue": "publication",
-    "orderField": "title",
-    "queueMaxSize": "200",
-    "groupMaxSize": "100",
-    "maxChildren": "100",
-    "slidingWindowSize": "50",
-    "rootBuilder": [
-      "result",
-      "resultProject_outcome_isProducedBy",
-      "resultResult_publicationDataset_isRelatedTo",
-      "resultResult_similarity_isAmongTopNSimilarDocuments",
-      "resultResult_similarity_hasAmongTopNSimilarDocuments",
-      "resultOrganization_affiliation_isAffiliatedWith",
-      "resultResult_part_hasPart",
-      "resultResult_part_isPartOf",
-      "resultResult_supplement_isSupplementTo",
-      "resultResult_supplement_isSupplementedBy",
-      "resultResult_version_isVersionOf"
-    ],
-    "includeChildren": "true",
-    "maxIterations": 20,
-    "idPath": "$.id"
-  },
-  "pace": {
-    "clustering": [
-      {
-        "name": "wordsStatsSuffixPrefixChain",
-        "fields": [
-          "title"
-        ],
-        "params": {
-          "mod": "10"
-        }
-      },
-      {
-        "name": "lowercase",
-        "fields": [
-          "doi",
-          "altdoi"
-        ],
-        "params": {
-          "collapseOn:pid": "0"
-        }
-      }
-    ],
-    "decisionTree": {
-      "start": {
-        "fields": [
-          {
-            "field": "instance",
-            "comparator": "instanceTypeMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          }
-        ],
-        "threshold": 0.5,
-        "aggregation": "MAX",
-        "positive": "layer1",
-        "negative": "NO_MATCH",
-        "undefined": "layer1",
-        "ignoreUndefined": "true"
-      },
-      "layer1": {
-        "fields": [
-          {
-            "field": "pid",
-            "comparator": "jsonListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "jpath_value": "$.value",
-              "jpath_classid": "$.qualifier.classid"
-            }
-          },
-          {
-            "field": "pid",
-            "comparator": "jsonListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "jpath_value": "$.value",
-              "jpath_classid": "$.qualifier.classid",
-              "crossCompare": "alternateid"
-            }
-          }
-        ],
-        "threshold": 0.5,
-        "aggregation": "MAX",
-        "positive": "layer2",
-        "negative": "layer3",
-        "undefined": "layer3",
-        "ignoreUndefined": "true"
-      },
-      "layer2": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 0.9,
-        "aggregation": "AVG",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "true"
-      },
-      "layer3": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "titleVersionMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          },
-          {
-            "field": "authors",
-            "comparator": "sizeMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "AND",
-        "positive": "layer4",
-        "negative": "NO_MATCH",
-        "undefined": "layer4",
-        "ignoreUndefined": "false"
-      },
-      "layer4": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 0.99,
-        "aggregation": "AVG",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "true"
-      }
-    },
-    "model": [
-      {
-        "name": "doi",
-        "type": "String",
-        "path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "altdoi",
-        "type": "String",
-        "path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "pid",
-        "type": "JSON",
-        "path": "$.instance[*].pid[*]",
-        "overrideMatch": "true"
-      },
-      {
-        "name": "alternateid",
-        "type": "JSON",
-        "path": "$.instance[*].alternateIdentifier[*]",
-        "overrideMatch": "true"
-      },
-      {
-        "name": "title",
-        "type": "String",
-        "path": "$.title[?(@.qualifier.classid == 'main title')].value",
-        "length": 250,
-        "size": 5
-      },
-      {
-        "name": "authors",
-        "type": "List",
-        "path": "$.author[*].fullname",
-        "size": 200
-      },
-      {
-        "name": "resulttype",
-        "type": "String",
-        "path": "$.resulttype.classid"
-      },
-      {
-        "name": "instance",
-        "type": "List",
-        "path": "$.instance[*].instancetype.classname"
-      }
-    ],
-    "blacklists": {
-      "title": [
-        "(?i)^Data Management Plan",
-        "^Inside Front Cover$",
-        "(?i)^Poster presentations$",
-        "^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
-        "^Problems with perinatal pathology\\.?$",
-        "(?i)^Cases? of Puerperal Convulsions$",
-        "(?i)^Operative Gyna?ecology$",
-        "(?i)^Mind the gap\\!?\\:?$",
-        "^Chronic fatigue syndrome\\.?$",
-        "^Cartas? ao editor Letters? to the Editor$",
-        "^Note from the Editor$",
-        "^Anesthesia Abstract$",
-        "^Annual report$",
-        "(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$",
-        "(?i)^Graph and Table of Infectious Diseases?$",
-        "^Presentation$",
-        "(?i)^Reviews and Information on Publications$",
-        "(?i)^PUBLIC HEALTH SERVICES?$",
-        "(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
-        "(?i)^Adrese autora$",
-        "(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
-        "(?i)^Acknowledgement to Referees$",
-        "(?i)^Behçet's disease\\.?$",
-        "(?i)^Isolation and identification of restriction endonuclease.*$",
-        "(?i)^CEREBROVASCULAR DISEASES?.?$",
-        "(?i)^Screening for abdominal aortic aneurysms?\\.?$",
-        "^Event management$",
-        "(?i)^Breakfast and Crohn's disease.*\\.?$",
-        "^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$",
-        "(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$",
-        "^Gushi hakubutsugaku$",
-        "^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$",
-        "^Intestinal spirocha?etosis$",
-        "^Treatment of Rodent Ulcer$",
-        "(?i)^\\W*Cloud Computing\\W*$",
-        "^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
-        "^Free Communications, Poster Presentations: Session [A-F]$",
-        "^“The Historical Aspects? of Quackery\\.?”$",
-        "^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
-        "^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
-        "(?i)^Case Report$",
-        "^Boletín Informativo$",
-        "(?i)^Glioblastoma Multiforme$",
-        "(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
-        "^Zaměstnanecké výhody$",
-        "(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
-        "(?i)^Carotid body tumours?\\.?$",
-        "(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
-        "^Avant-propos$",
-        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
-        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
-        "(?i)^PUBLIC HEALTH VERSUS THE STATE$",
-        "^Viñetas de Cortázar$",
-        "(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$",
-        "(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$",
-        "(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
-        "(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
-        "(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
-        "^Aus der AGMB$",
-        "^Znanstveno-stručni prilozi$",
-        "(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
-        "(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
-        "(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
-        "^Finanční analýza podniku$",
-        "^Financial analysis( of business)?$",
-        "(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
-        "^Jikken nihon shūshinsho$",
-        "(?i)^CORONER('|s)(s|') INQUESTS$",
-        "(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
-        "(?i)^Consultants' contract(s)?$",
-        "(?i)^Upute autorima$",
-        "(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
-        "^Joshi shin kokubun$",
-        "^Kōtō shōgaku dokuhon nōson'yō$",
-        "^Jinjō shōgaku shōka$",
-        "^Shōgaku shūjichō$",
-        "^Nihon joshi dokuhon$",
-        "^Joshi shin dokuhon$",
-        "^Chūtō kanbun dokuhon$",
-        "^Wabun dokuhon$",
-        "(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
-        "(?i)^cardiac rehabilitation$",
-        "(?i)^Analytical summary$",
-        "^Thesaurus resolutionum Sacrae Congregationis Concilii$",
-        "(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
-        "^Prikazi i osvrti$",
-        "^Rodinný dům s provozovnou$",
-        "^Family house with an establishment$",
-        "^Shinsei chūtō shin kokugun$",
-        "^Pulmonary alveolar proteinosis(\\.?)$",
-        "^Shinshū kanbun$",
-        "^Viñeta(s?) de Rodríguez$",
-        "(?i)^RUBRIKA UREDNIKA$",
-        "^A Matching Model of the Academic Publication Market$",
-        "^Yōgaku kōyō$",
-        "^Internetový marketing$",
-        "^Internet marketing$",
-        "^Chūtō kokugo dokuhon$",
-        "^Kokugo dokuhon$",
-        "^Antibiotic Cover for Dental Extraction(s?)$",
-        "^Strategie podniku$",
-        "^Strategy of an Enterprise$",
-        "(?i)^respiratory disease(s?)(\\.?)$",
-        "^Award(s?) for Gallantry in Civil Defence$",
-        "^Podniková kultura$",
-        "^Corporate Culture$",
-        "^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$",
-        "^Pracovní motivace$",
-        "^Work Motivation$",
-        "^Kaitei kōtō jogaku dokuhon$",
-        "^Konsolidovaná účetní závěrka$",
-        "^Consolidated Financial Statements$",
-        "(?i)^intracranial tumour(s?)$",
-        "^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
-        "^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
-        "^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
-        "^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
-        "^Úroveň motivačního procesu jako způsobu vedení lidí$",
-        "^The level of motivation process as a leadership$",
-        "^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
-        "(?i)^news and events$",
-        "(?i)^NOVOSTI I DOGAĐAJI$",
-        "^Sansū no gakushū$",
-        "^Posouzení informačního systému firmy a návrh změn$",
-        "^Information System Assessment and Proposal for ICT Modification$",
-        "^Stresové zatížení pracovníků ve vybrané profesi$",
-        "^Stress load in a specific job$",
-        "^Sunday: Poster Sessions, Pt.*$",
-        "^Monday: Poster Sessions, Pt.*$",
-        "^Wednesday: Poster Sessions, Pt.*",
-        "^Tuesday: Poster Sessions, Pt.*$",
-        "^Analýza reklamy$",
-        "^Analysis of advertising$",
-        "^Shōgaku shūshinsho$",
-        "^Shōgaku sansū$",
-        "^Shintei joshi kokubun$",
-        "^Taishō joshi kokubun dokuhon$",
-        "^Joshi kokubun$",
-        "^Účetní uzávěrka a účetní závěrka v ČR$",
-        "(?i)^The \"?Causes\"? of Cancer$",
-        "^Normas para la publicación de artículos$",
-        "^Editor('|s)(s|') [Rr]eply$",
-        "^Editor(’|s)(s|’) letter$",
-        "^Redaktoriaus žodis$",
-        "^DISCUSSION ON THE PRECEDING PAPER$",
-        "^Kōtō shōgaku shūshinsho jidōyō$",
-        "^Shōgaku nihon rekishi$",
-        "^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
-        "^Préface$",
-        "^Occupational [Hh]ealth [Ss]ervices.$",
-        "^In Memoriam Professor Toshiyuki TAKESHIMA$",
-        "^Účetní závěrka ve vybraném podniku.*$",
-        "^Financial statements in selected company$",
-        "^Abdominal [Aa]ortic [Aa]neurysms.*$",
-        "^Pseudomyxoma peritonei$",
-        "^Kazalo autora$",
-        "(?i)^uvodna riječ$",
-        "^Motivace jako způsob vedení lidí$",
-        "^Motivation as a leadership$",
-        "^Polyfunkční dům$",
-        "^Multi\\-funkcional building$",
-        "^Podnikatelský plán$",
-        "(?i)^Podnikatelský záměr$",
-        "(?i)^Business Plan$",
-        "^Oceňování nemovitostí$",
-        "^Marketingová komunikace$",
-        "^Marketing communication$",
-        "^Sumario Analítico$",
-        "^Riječ uredništva$",
-        "^Savjetovanja i priredbe$",
-        "^Índice$",
-        "^(Starobosanski nadpisi).*$",
-        "^Vzdělávání pracovníků v organizaci$",
-        "^Staff training in organization$",
-        "^(Life Histories of North American Geometridae).*$",
-        "^Strategická analýza podniku$",
-        "^Strategic Analysis of an Enterprise$",
-        "^Sadržaj$",
-        "^Upute suradnicima$",
-        "^Rodinný dům$",
-        "(?i)^Fami(l)?ly house$",
-        "^Upute autorima$",
-        "^Strategic Analysis$",
-        "^Finanční analýza vybraného podniku$",
-        "^Finanční analýza$",
-        "^Riječ urednika$",
-        "(?i)^Content(s?)$",
-        "(?i)^Inhalt$",
-        "^Jinjō shōgaku shūshinsho jidōyō$",
-        "(?i)^Index$",
-        "^Chūgaku kokubun kyōkasho$",
-        "^Retrato de una mujer$",
-        "^Retrato de un hombre$",
-        "^Kōtō shōgaku dokuhon$",
-        "^Shotōka kokugo$",
-        "^Shōgaku dokuhon$",
-        "^Jinjō shōgaku kokugo dokuhon$",
-        "^Shinsei kokugo dokuhon$",
-        "^Teikoku dokuhon$",
-        "^Instructions to Authors$",
-        "^KİTAP TAHLİLİ$",
-        "^PRZEGLĄD PIŚMIENNICTWA$",
-        "(?i)^Presentación$",
-        "^İçindekiler$",
-        "(?i)^Tabl?e of contents$",
-        "^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
-        "^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
-        "^Editorial( Board)?$",
-        "(?i)^Editorial \\(English\\)$",
-        "^Editörden$",
-        "^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
-        "^(Kiri Karl Morgensternile).*$",
-        "^(\\[Eksliibris Aleksandr).*\\]$",
-        "^(\\[Eksliibris Aleksandr).*$",
-        "^(Eksliibris Aleksandr).*$",
-        "^(Kiri A\\. de Vignolles).*$",
-        "^(2 kirja Karl Morgensternile).*$",
-        "^(Pirita kloostri idaosa arheoloogilised).*$",
-        "^(Kiri tundmatule).*$",
-        "^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
-        "^(Eksliibris Nikolai Birukovile).*$",
-        "^(Eksliibris Nikolai Issakovile).*$",
-        "^(WHP Cruise Summary Information of section).*$",
-        "^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
-        "^(Measurement of the spin\\-dependent structure function).*",
-        "(?i)^.*authors['’′]? reply\\.?$",
-        "(?i)^.*authors['’′]? response\\.?$",
-        "^Data [mM]anagement [sS]ervices\\.$",
-        "Research and Advanced Technology for Digital Libraries"
-      ]
-    },
-    "synonyms": {}
-  }
-}
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/pub.new.tree.conf.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/pub.new.tree.conf.json
@ -1,476 +0,0 @@
-{
-  "wf": {
-    "threshold": "0.99",
-    "dedupRun": "001",
-    "entityType": "result",
-    "subEntityType": "resulttype",
-    "subEntityValue": "publication",
-    "orderField": "title",
-    "queueMaxSize": "200",
-    "groupMaxSize": "100",
-    "maxChildren": "100",
-    "slidingWindowSize": "50",
-    "rootBuilder": [
-      "result",
-      "resultProject_outcome_isProducedBy",
-      "resultResult_publicationDataset_isRelatedTo",
-      "resultResult_similarity_isAmongTopNSimilarDocuments",
-      "resultResult_similarity_hasAmongTopNSimilarDocuments",
-      "resultOrganization_affiliation_isAffiliatedWith",
-      "resultResult_part_hasPart",
-      "resultResult_part_isPartOf",
-      "resultResult_supplement_isSupplementTo",
-      "resultResult_supplement_isSupplementedBy",
-      "resultResult_version_isVersionOf"
-    ],
-    "includeChildren": "true",
-    "maxIterations": 20,
-    "idPath": "$.id"
-  },
-  "pace": {
-    "clustering": [
-      {
-        "name": "wordsStatsSuffixPrefixChain",
-        "fields": [
-          "title"
-        ],
-        "params": {
-          "mod": "10"
-        }
-      },
-      {
-        "name": "lowercase",
-        "fields": [
-          "doi",
-          "altdoi"
-        ],
-        "params": {
-          "collapseOn:pid": "0"
-        }
-      }
-    ],
-    "decisionTree": {
-      "start": {
-        "fields": [
-          {
-            "field": "pid",
-            "comparator": "jsonListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "jpath_value": "$.value",
-              "jpath_classid": "$.qualifier.classid",
-              "mode": "count"
-            }
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "instanceTypeCheck",
-        "ignoreUndefined": "false"
-      },
-      "instanceTypeCheck": {
-        "fields": [
-          {
-            "field": "instance",
-            "comparator": "instanceTypeMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          }
-        ],
-        "threshold": 0.5,
-        "aggregation": "MAX",
-        "positive": "pidVSaltid",
-        "negative": "NO_MATCH",
-        "undefined": "pidVSaltid",
-        "ignoreUndefined": "true"
-      },
-      "pidVSaltid": {
-        "fields": [
-          {
-            "field": "pid",
-            "comparator": "jsonListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "jpath_value": "$.value",
-              "jpath_classid": "$.qualifier.classid",
-              "crossCompare": "alternateid",
-              "mode": "count"
-            }
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "MAX",
-        "positive": "softCheck",
-        "negative": "earlyExits",
-        "undefined": "earlyExits",
-        "ignoreUndefined": "true"
-      },
-      "softCheck": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 0.9,
-        "aggregation": "AVG",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "true"
-      },
-      "earlyExits": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "titleVersionMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          },
-          {
-            "field": "authors",
-            "comparator": "sizeMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "AND",
-        "positive": "strongCheck",
-        "negative": "NO_MATCH",
-        "undefined": "strongCheck",
-        "ignoreUndefined": "false"
-      },
-      "strongCheck": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 0.99,
-        "aggregation": "AVG",
-        "positive": "surnames",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "true"
-      },
-      "surnames": {
-        "fields": [
-          {
-            "field": "authors",
-            "comparator": "authorsMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "surname_th": 0.75,
-              "fullname_th": 0.75,
-              "size_th": 20,
-              "mode": "surname"
-            }
-          }
-        ],
-        "threshold": 0.6,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "MATCH",
-        "ignoreUndefined": "true"
-      }
-    },
-    "model": [
-      {
-        "name": "doi",
-        "type": "String",
-        "path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "altdoi",
-        "type": "String",
-        "path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "pid",
-        "type": "JSON",
-        "path": "$.instance[*].pid[*]",
-        "overrideMatch": "true"
-      },
-      {
-        "name": "alternateid",
-        "type": "JSON",
-        "path": "$.instance[*].alternateIdentifier[*]",
-        "overrideMatch": "true"
-      },
-      {
-        "name": "title",
-        "type": "StringConcat",
-        "path": "$.title[?(@.qualifier.classid == 'main title')].value|||$.title[?(@.qualifier.classid == 'subtitle')].value",
-        "length": 250,
-        "size": 5
-      },
-      {
-        "name": "authors",
-        "type": "List",
-        "path": "$.author[*].fullname",
-        "size": 200
-      },
-      {
-        "name": "resulttype",
-        "type": "String",
-        "path": "$.resulttype.classid"
-      },
-      {
-        "name": "instance",
-        "type": "List",
-        "path": "$.instance[*].instancetype.classname"
-      }
-    ],
-    "blacklists": {
-      "title": [
-        "(?i)^Data Management Plan",
-        "^Inside Front Cover$",
-        "(?i)^Poster presentations$",
-        "^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
-        "^Problems with perinatal pathology\\.?$",
-        "(?i)^Cases? of Puerperal Convulsions$",
-        "(?i)^Operative Gyna?ecology$",
-        "(?i)^Mind the gap\\!?\\:?$",
-        "^Chronic fatigue syndrome\\.?$",
-        "^Cartas? ao editor Letters? to the Editor$",
-        "^Note from the Editor$",
-        "^Anesthesia Abstract$",
-        "^Annual report$",
-        "(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$",
-        "(?i)^Graph and Table of Infectious Diseases?$",
-        "^Presentation$",
-        "(?i)^Reviews and Information on Publications$",
-        "(?i)^PUBLIC HEALTH SERVICES?$",
-        "(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
-        "(?i)^Adrese autora$",
-        "(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
-        "(?i)^Acknowledgement to Referees$",
-        "(?i)^Behçet's disease\\.?$",
-        "(?i)^Isolation and identification of restriction endonuclease.*$",
-        "(?i)^CEREBROVASCULAR DISEASES?.?$",
-        "(?i)^Screening for abdominal aortic aneurysms?\\.?$",
-        "^Event management$",
-        "(?i)^Breakfast and Crohn's disease.*\\.?$",
-        "^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$",
-        "(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$",
-        "^Gushi hakubutsugaku$",
-        "^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$",
-        "^Intestinal spirocha?etosis$",
-        "^Treatment of Rodent Ulcer$",
-        "(?i)^\\W*Cloud Computing\\W*$",
-        "^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
-        "^Free Communications, Poster Presentations: Session [A-F]$",
-        "^“The Historical Aspects? of Quackery\\.?”$",
-        "^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
-        "^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
-        "(?i)^Case Report$",
-        "^Boletín Informativo$",
-        "(?i)^Glioblastoma Multiforme$",
-        "(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
-        "^Zaměstnanecké výhody$",
-        "(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
-        "(?i)^Carotid body tumours?\\.?$",
-        "(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
-        "^Avant-propos$",
-        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
-        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
-        "(?i)^PUBLIC HEALTH VERSUS THE STATE$",
-        "^Viñetas de Cortázar$",
-        "(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$",
-        "(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$",
-        "(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
-        "(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
-        "(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
-        "^Aus der AGMB$",
-        "^Znanstveno-stručni prilozi$",
-        "(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
-        "(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
-        "(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
-        "^Finanční analýza podniku$",
-        "^Financial analysis( of business)?$",
-        "(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
-        "^Jikken nihon shūshinsho$",
-        "(?i)^CORONER('|s)(s|') INQUESTS$",
-        "(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
-        "(?i)^Consultants' contract(s)?$",
-        "(?i)^Upute autorima$",
-        "(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
-        "^Joshi shin kokubun$",
-        "^Kōtō shōgaku dokuhon nōson'yō$",
-        "^Jinjō shōgaku shōka$",
-        "^Shōgaku shūjichō$",
-        "^Nihon joshi dokuhon$",
-        "^Joshi shin dokuhon$",
-        "^Chūtō kanbun dokuhon$",
-        "^Wabun dokuhon$",
-        "(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
-        "(?i)^cardiac rehabilitation$",
-        "(?i)^Analytical summary$",
-        "^Thesaurus resolutionum Sacrae Congregationis Concilii$",
-        "(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
-        "^Prikazi i osvrti$",
-        "^Rodinný dům s provozovnou$",
-        "^Family house with an establishment$",
-        "^Shinsei chūtō shin kokugun$",
-        "^Pulmonary alveolar proteinosis(\\.?)$",
-        "^Shinshū kanbun$",
-        "^Viñeta(s?) de Rodríguez$",
-        "(?i)^RUBRIKA UREDNIKA$",
-        "^A Matching Model of the Academic Publication Market$",
-        "^Yōgaku kōyō$",
-        "^Internetový marketing$",
-        "^Internet marketing$",
-        "^Chūtō kokugo dokuhon$",
-        "^Kokugo dokuhon$",
-        "^Antibiotic Cover for Dental Extraction(s?)$",
-        "^Strategie podniku$",
-        "^Strategy of an Enterprise$",
-        "(?i)^respiratory disease(s?)(\\.?)$",
-        "^Award(s?) for Gallantry in Civil Defence$",
-        "^Podniková kultura$",
-        "^Corporate Culture$",
-        "^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$",
-        "^Pracovní motivace$",
-        "^Work Motivation$",
-        "^Kaitei kōtō jogaku dokuhon$",
-        "^Konsolidovaná účetní závěrka$",
-        "^Consolidated Financial Statements$",
-        "(?i)^intracranial tumour(s?)$",
-        "^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
-        "^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
-        "^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
-        "^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
-        "^Úroveň motivačního procesu jako způsobu vedení lidí$",
-        "^The level of motivation process as a leadership$",
-        "^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
-        "(?i)^news and events$",
-        "(?i)^NOVOSTI I DOGAĐAJI$",
-        "^Sansū no gakushū$",
-        "^Posouzení informačního systému firmy a návrh změn$",
-        "^Information System Assessment and Proposal for ICT Modification$",
-        "^Stresové zatížení pracovníků ve vybrané profesi$",
-        "^Stress load in a specific job$",
-        "^Sunday: Poster Sessions, Pt.*$",
-        "^Monday: Poster Sessions, Pt.*$",
-        "^Wednesday: Poster Sessions, Pt.*",
-        "^Tuesday: Poster Sessions, Pt.*$",
-        "^Analýza reklamy$",
-        "^Analysis of advertising$",
-        "^Shōgaku shūshinsho$",
-        "^Shōgaku sansū$",
-        "^Shintei joshi kokubun$",
-        "^Taishō joshi kokubun dokuhon$",
-        "^Joshi kokubun$",
-        "^Účetní uzávěrka a účetní závěrka v ČR$",
-        "(?i)^The \"?Causes\"? of Cancer$",
-        "^Normas para la publicación de artículos$",
-        "^Editor('|s)(s|') [Rr]eply$",
-        "^Editor(’|s)(s|’) letter$",
-        "^Redaktoriaus žodis$",
-        "^DISCUSSION ON THE PRECEDING PAPER$",
-        "^Kōtō shōgaku shūshinsho jidōyō$",
-        "^Shōgaku nihon rekishi$",
-        "^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
-        "^Préface$",
-        "^Occupational [Hh]ealth [Ss]ervices.$",
-        "^In Memoriam Professor Toshiyuki TAKESHIMA$",
-        "^Účetní závěrka ve vybraném podniku.*$",
-        "^Financial statements in selected company$",
-        "^Abdominal [Aa]ortic [Aa]neurysms.*$",
-        "^Pseudomyxoma peritonei$",
-        "^Kazalo autora$",
-        "(?i)^uvodna riječ$",
-        "^Motivace jako způsob vedení lidí$",
-        "^Motivation as a leadership$",
-        "^Polyfunkční dům$",
-        "^Multi\\-funkcional building$",
-        "^Podnikatelský plán$",
-        "(?i)^Podnikatelský záměr$",
-        "(?i)^Business Plan$",
-        "^Oceňování nemovitostí$",
-        "^Marketingová komunikace$",
-        "^Marketing communication$",
-        "^Sumario Analítico$",
-        "^Riječ uredništva$",
-        "^Savjetovanja i priredbe$",
-        "^Índice$",
-        "^(Starobosanski nadpisi).*$",
-        "^Vzdělávání pracovníků v organizaci$",
-        "^Staff training in organization$",
-        "^(Life Histories of North American Geometridae).*$",
-        "^Strategická analýza podniku$",
-        "^Strategic Analysis of an Enterprise$",
-        "^Sadržaj$",
-        "^Upute suradnicima$",
-        "^Rodinný dům$",
-        "(?i)^Fami(l)?ly house$",
-        "^Upute autorima$",
-        "^Strategic Analysis$",
-        "^Finanční analýza vybraného podniku$",
-        "^Finanční analýza$",
-        "^Riječ urednika$",
-        "(?i)^Content(s?)$",
-        "(?i)^Inhalt$",
-        "^Jinjō shōgaku shūshinsho jidōyō$",
-        "(?i)^Index$",
-        "^Chūgaku kokubun kyōkasho$",
-        "^Retrato de una mujer$",
-        "^Retrato de un hombre$",
-        "^Kōtō shōgaku dokuhon$",
-        "^Shotōka kokugo$",
-        "^Shōgaku dokuhon$",
-        "^Jinjō shōgaku kokugo dokuhon$",
-        "^Shinsei kokugo dokuhon$",
-        "^Teikoku dokuhon$",
-        "^Instructions to Authors$",
-        "^KİTAP TAHLİLİ$",
-        "^PRZEGLĄD PIŚMIENNICTWA$",
-        "(?i)^Presentación$",
-        "^İçindekiler$",
-        "(?i)^Tabl?e of contents$",
-        "^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
-        "^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
-        "^Editorial( Board)?$",
-        "(?i)^Editorial \\(English\\)$",
-        "^Editörden$",
-        "^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
-        "^(Kiri Karl Morgensternile).*$",
-        "^(\\[Eksliibris Aleksandr).*\\]$",
-        "^(\\[Eksliibris Aleksandr).*$",
-        "^(Eksliibris Aleksandr).*$",
-        "^(Kiri A\\. de Vignolles).*$",
-        "^(2 kirja Karl Morgensternile).*$",
-        "^(Pirita kloostri idaosa arheoloogilised).*$",
-        "^(Kiri tundmatule).*$",
-        "^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
-        "^(Eksliibris Nikolai Birukovile).*$",
-        "^(Eksliibris Nikolai Issakovile).*$",
-        "^(WHP Cruise Summary Information of section).*$",
-        "^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
-        "^(Measurement of the spin\\-dependent structure function).*",
-        "(?i)^.*authors['’′]? reply\\.?$",
-        "(?i)^.*authors['’′]? response\\.?$",
-        "^Data [mM]anagement [sS]ervices\\.$",
-        "Research and Advanced Technology for Digital Libraries"
-      ]
-    },
-    "synonyms": {}
-  }
-}
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/pub.prod.tree.conf.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/pub.prod.tree.conf.json
@ -1,348 +0,0 @@
-{
-  "wf": {
-    "threshold": "0.99",
-    "dedupRun": "001",
-    "entityType": "result",
-    "subEntityType": "resulttype",
-    "subEntityValue": "publication",
-    "orderField": "title",
-    "queueMaxSize": "200",
-    "groupMaxSize": "100",
-    "maxChildren": "100",
-    "slidingWindowSize": "50",
-    "rootBuilder": [
-      "result",
-      "resultProject_outcome_isProducedBy",
-      "resultResult_publicationDataset_isRelatedTo",
-      "resultResult_similarity_isAmongTopNSimilarDocuments",
-      "resultResult_similarity_hasAmongTopNSimilarDocuments",
-      "resultOrganization_affiliation_isAffiliatedWith",
-      "resultResult_part_hasPart",
-      "resultResult_part_isPartOf",
-      "resultResult_supplement_isSupplementTo",
-      "resultResult_supplement_isSupplementedBy",
-      "resultResult_version_isVersionOf"
-    ],
-    "includeChildren": "true",
-    "maxIterations": 20,
-    "idPath": "$.id"
-  },
-  "pace": {
-    "clustering": [
-      {
-        "name": "wordsStatsSuffixPrefixChain",
-        "fields": [
-          "title"
-        ],
-        "params": {
-          "mod": "10"
-        }
-      },
-      {
-        "name": "lowercase",
-        "fields": [
-          "doi",
-          "altdoi"
-        ],
-        "params": {
-          "collapseOn:pid": "0"
-        }
-      }
-    ],
-    "decisionTree": {
-      "start": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 0.9,
-        "aggregation": "AVG",
-        "positive": "MATCH",
-        "negative": "MATCH",
-        "undefined": "MATCH",
-        "ignoreUndefined": "true"
-      }
-    },
-    "model": [
-      {
-        "name": "doi",
-        "type": "String",
-        "path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "altdoi",
-        "type": "String",
-        "path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "pid",
-        "type": "JSON",
-        "path": "$.instance[*].pid[*]",
-        "overrideMatch": "true"
-      },
-      {
-        "name": "alternateid",
-        "type": "JSON",
-        "path": "$.instance[*].alternateIdentifier[*]",
-        "overrideMatch": "true"
-      },
-      {
-        "name": "title",
-        "type": "String",
-        "path": "$.title[?(@.qualifier.classid == 'main title')].value",
-        "length": 250,
-        "size": 5
-      },
-      {
-        "name": "authors",
-        "type": "List",
-        "path": "$.author[*].fullname",
-        "size": 200
-      },
-      {
-        "name": "resulttype",
-        "type": "String",
-        "path": "$.resulttype.classid"
-      }
-    ],
-    "blacklists": {
-      "title": [
-        "(?i)^Data Management Plan",
-        "^Inside Front Cover$",
-        "(?i)^Poster presentations$",
-        "^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
-        "^Problems with perinatal pathology\\.?$",
-        "(?i)^Cases? of Puerperal Convulsions$",
-        "(?i)^Operative Gyna?ecology$",
-        "(?i)^Mind the gap\\!?\\:?$",
-        "^Chronic fatigue syndrome\\.?$",
-        "^Cartas? ao editor Letters? to the Editor$",
-        "^Note from the Editor$",
-        "^Anesthesia Abstract$",
-        "^Annual report$",
-        "(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$",
-        "(?i)^Graph and Table of Infectious Diseases?$",
-        "^Presentation$",
-        "(?i)^Reviews and Information on Publications$",
-        "(?i)^PUBLIC HEALTH SERVICES?$",
-        "(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
-        "(?i)^Adrese autora$",
-        "(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
-        "(?i)^Acknowledgement to Referees$",
-        "(?i)^Behçet's disease\\.?$",
-        "(?i)^Isolation and identification of restriction endonuclease.*$",
-        "(?i)^CEREBROVASCULAR DISEASES?.?$",
-        "(?i)^Screening for abdominal aortic aneurysms?\\.?$",
-        "^Event management$",
-        "(?i)^Breakfast and Crohn's disease.*\\.?$",
-        "^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$",
-        "(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$",
-        "^Gushi hakubutsugaku$",
-        "^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$",
-        "^Intestinal spirocha?etosis$",
-        "^Treatment of Rodent Ulcer$",
-        "(?i)^\\W*Cloud Computing\\W*$",
-        "^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
-        "^Free Communications, Poster Presentations: Session [A-F]$",
-        "^“The Historical Aspects? of Quackery\\.?”$",
-        "^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
-        "^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
-        "(?i)^Case Report$",
-        "^Boletín Informativo$",
-        "(?i)^Glioblastoma Multiforme$",
-        "(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
-        "^Zaměstnanecké výhody$",
-        "(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
-        "(?i)^Carotid body tumours?\\.?$",
-        "(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
-        "^Avant-propos$",
-        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
-        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
-        "(?i)^PUBLIC HEALTH VERSUS THE STATE$",
-        "^Viñetas de Cortázar$",
-        "(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$",
-        "(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$",
-        "(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
-        "(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
-        "(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
-        "^Aus der AGMB$",
-        "^Znanstveno-stručni prilozi$",
-        "(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
-        "(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
-        "(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
-        "^Finanční analýza podniku$",
-        "^Financial analysis( of business)?$",
-        "(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
-        "^Jikken nihon shūshinsho$",
-        "(?i)^CORONER('|s)(s|') INQUESTS$",
-        "(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
-        "(?i)^Consultants' contract(s)?$",
-        "(?i)^Upute autorima$",
-        "(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
-        "^Joshi shin kokubun$",
-        "^Kōtō shōgaku dokuhon nōson'yō$",
-        "^Jinjō shōgaku shōka$",
-        "^Shōgaku shūjichō$",
-        "^Nihon joshi dokuhon$",
-        "^Joshi shin dokuhon$",
-        "^Chūtō kanbun dokuhon$",
-        "^Wabun dokuhon$",
-        "(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
-        "(?i)^cardiac rehabilitation$",
-        "(?i)^Analytical summary$",
-        "^Thesaurus resolutionum Sacrae Congregationis Concilii$",
-        "(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
-        "^Prikazi i osvrti$",
-        "^Rodinný dům s provozovnou$",
-        "^Family house with an establishment$",
-        "^Shinsei chūtō shin kokugun$",
-        "^Pulmonary alveolar proteinosis(\\.?)$",
-        "^Shinshū kanbun$",
-        "^Viñeta(s?) de Rodríguez$",
-        "(?i)^RUBRIKA UREDNIKA$",
-        "^A Matching Model of the Academic Publication Market$",
-        "^Yōgaku kōyō$",
-        "^Internetový marketing$",
-        "^Internet marketing$",
-        "^Chūtō kokugo dokuhon$",
-        "^Kokugo dokuhon$",
-        "^Antibiotic Cover for Dental Extraction(s?)$",
-        "^Strategie podniku$",
-        "^Strategy of an Enterprise$",
-        "(?i)^respiratory disease(s?)(\\.?)$",
-        "^Award(s?) for Gallantry in Civil Defence$",
-        "^Podniková kultura$",
-        "^Corporate Culture$",
-        "^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$",
-        "^Pracovní motivace$",
-        "^Work Motivation$",
-        "^Kaitei kōtō jogaku dokuhon$",
-        "^Konsolidovaná účetní závěrka$",
-        "^Consolidated Financial Statements$",
-        "(?i)^intracranial tumour(s?)$",
-        "^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
-        "^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
-        "^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
-        "^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
-        "^Úroveň motivačního procesu jako způsobu vedení lidí$",
-        "^The level of motivation process as a leadership$",
-        "^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
-        "(?i)^news and events$",
-        "(?i)^NOVOSTI I DOGAĐAJI$",
-        "^Sansū no gakushū$",
-        "^Posouzení informačního systému firmy a návrh změn$",
-        "^Information System Assessment and Proposal for ICT Modification$",
-        "^Stresové zatížení pracovníků ve vybrané profesi$",
-        "^Stress load in a specific job$",
-        "^Sunday: Poster Sessions, Pt.*$",
-        "^Monday: Poster Sessions, Pt.*$",
-        "^Wednesday: Poster Sessions, Pt.*",
-        "^Tuesday: Poster Sessions, Pt.*$",
-        "^Analýza reklamy$",
-        "^Analysis of advertising$",
-        "^Shōgaku shūshinsho$",
-        "^Shōgaku sansū$",
-        "^Shintei joshi kokubun$",
-        "^Taishō joshi kokubun dokuhon$",
-        "^Joshi kokubun$",
-        "^Účetní uzávěrka a účetní závěrka v ČR$",
-        "(?i)^The \"?Causes\"? of Cancer$",
-        "^Normas para la publicación de artículos$",
-        "^Editor('|s)(s|') [Rr]eply$",
-        "^Editor(’|s)(s|’) letter$",
-        "^Redaktoriaus žodis$",
-        "^DISCUSSION ON THE PRECEDING PAPER$",
-        "^Kōtō shōgaku shūshinsho jidōyō$",
-        "^Shōgaku nihon rekishi$",
-        "^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
-        "^Préface$",
-        "^Occupational [Hh]ealth [Ss]ervices.$",
-        "^In Memoriam Professor Toshiyuki TAKESHIMA$",
-        "^Účetní závěrka ve vybraném podniku.*$",
-        "^Financial statements in selected company$",
-        "^Abdominal [Aa]ortic [Aa]neurysms.*$",
-        "^Pseudomyxoma peritonei$",
-        "^Kazalo autora$",
-        "(?i)^uvodna riječ$",
-        "^Motivace jako způsob vedení lidí$",
-        "^Motivation as a leadership$",
-        "^Polyfunkční dům$",
-        "^Multi\\-funkcional building$",
-        "^Podnikatelský plán$",
-        "(?i)^Podnikatelský záměr$",
-        "(?i)^Business Plan$",
-        "^Oceňování nemovitostí$",
-        "^Marketingová komunikace$",
-        "^Marketing communication$",
-        "^Sumario Analítico$",
-        "^Riječ uredništva$",
-        "^Savjetovanja i priredbe$",
-        "^Índice$",
-        "^(Starobosanski nadpisi).*$",
-        "^Vzdělávání pracovníků v organizaci$",
-        "^Staff training in organization$",
-        "^(Life Histories of North American Geometridae).*$",
-        "^Strategická analýza podniku$",
-        "^Strategic Analysis of an Enterprise$",
-        "^Sadržaj$",
-        "^Upute suradnicima$",
-        "^Rodinný dům$",
-        "(?i)^Fami(l)?ly house$",
-        "^Upute autorima$",
-        "^Strategic Analysis$",
-        "^Finanční analýza vybraného podniku$",
-        "^Finanční analýza$",
-        "^Riječ urednika$",
-        "(?i)^Content(s?)$",
-        "(?i)^Inhalt$",
-        "^Jinjō shōgaku shūshinsho jidōyō$",
-        "(?i)^Index$",
-        "^Chūgaku kokubun kyōkasho$",
-        "^Retrato de una mujer$",
-        "^Retrato de un hombre$",
-        "^Kōtō shōgaku dokuhon$",
-        "^Shotōka kokugo$",
-        "^Shōgaku dokuhon$",
-        "^Jinjō shōgaku kokugo dokuhon$",
-        "^Shinsei kokugo dokuhon$",
-        "^Teikoku dokuhon$",
-        "^Instructions to Authors$",
-        "^KİTAP TAHLİLİ$",
-        "^PRZEGLĄD PIŚMIENNICTWA$",
-        "(?i)^Presentación$",
-        "^İçindekiler$",
-        "(?i)^Tabl?e of contents$",
-        "^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
-        "^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
-        "^Editorial( Board)?$",
-        "(?i)^Editorial \\(English\\)$",
-        "^Editörden$",
-        "^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
-        "^(Kiri Karl Morgensternile).*$",
-        "^(\\[Eksliibris Aleksandr).*\\]$",
-        "^(\\[Eksliibris Aleksandr).*$",
-        "^(Eksliibris Aleksandr).*$",
-        "^(Kiri A\\. de Vignolles).*$",
-        "^(2 kirja Karl Morgensternile).*$",
-        "^(Pirita kloostri idaosa arheoloogilised).*$",
-        "^(Kiri tundmatule).*$",
-        "^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
-        "^(Eksliibris Nikolai Birukovile).*$",
-        "^(Eksliibris Nikolai Issakovile).*$",
-        "^(WHP Cruise Summary Information of section).*$",
-        "^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
-        "^(Measurement of the spin\\-dependent structure function).*",
-        "(?i)^.*authors['’′]? reply\\.?$",
-        "(?i)^.*authors['’′]? response\\.?$",
-        "^Data [mM]anagement [sS]ervices\\.$",
-        "Research and Advanced Technology for Digital Libraries"
-      ]
-    },
-    "synonyms": {}
-  }
-}
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/publication.current.conf.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/publication.current.conf.json
@ -6,10 +6,10 @@
    "subEntityType": "resulttype",
    "subEntityValue": "publication",
    "orderField": "title",
-    "queueMaxSize": "200",
+    "queueMaxSize": "2000",
    "groupMaxSize": "100",
    "maxChildren": "100",
-    "slidingWindowSize": "50",
+    "slidingWindowSize": "200",
    "rootBuilder": [
      "result",
      "resultProject_outcome_isProducedBy",
@ -28,26 +28,10 @@
    "idPath": "$.id"
  },
  "pace": {
-    "clustering": [
-      {
-        "name": "wordsStatsSuffixPrefixChain",
-        "fields": [
-          "title"
-        ],
-        "params": {
-          "mod": "10"
-        }
-      },
-      {
-        "name": "lowercase",
-        "fields": [
-          "doi",
-          "altdoi"
-        ],
-        "params": {
-          "collapseOn:pid": "0"
-        }
-      }
+    "clustering" : [
+      { "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} },
+      { "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } },
+      { "name" : "lowercase", "fields" : [ "doi" ], "params" : { } }
    ],
    "decisionTree": {
      "start": {
@ -59,75 +43,18 @@
            "countIfUndefined": "false",
            "params": {
              "jpath_value": "$.value",
-              "jpath_classid": "$.qualifier.classid",
-              "mode": "count"
+              "jpath_classid": "$.qualifier.classid"
            }
          }
        ],
-        "threshold": 1.0,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "instanceTypeCheck",
-        "undefined": "instanceTypeCheck",
-        "ignoreUndefined": "false"
-      },
-      "instanceTypeCheck": {
-        "fields": [
-          {
-            "field": "instance",
-            "comparator": "instanceTypeMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          }
-        ],
        "threshold": 0.5,
-        "aggregation": "MAX",
-        "positive": "pidVSaltid",
-        "negative": "NO_MATCH",
-        "undefined": "pidVSaltid",
-        "ignoreUndefined": "true"
-      },
-      "pidVSaltid": {
-        "fields": [
-          {
-            "field": "pid",
-            "comparator": "jsonListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "jpath_value": "$.value",
-              "jpath_classid": "$.qualifier.classid",
-              "crossCompare": "alternateid",
-              "mode": "count"
-            }
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "MAX",
-        "positive": "softCheck",
-        "negative": "earlyExits",
-        "undefined": "earlyExits",
-        "ignoreUndefined": "true"
-      },
-      "softCheck": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 0.9,
        "aggregation": "AVG",
        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
+        "negative": "layer2",
+        "undefined": "layer2",
        "ignoreUndefined": "true"
      },
-      "earlyExits": {
+      "layer2": {
        "fields": [
          {
            "field": "title",
@ -146,12 +73,12 @@
        ],
        "threshold": 1.0,
        "aggregation": "AND",
-        "positive": "strongCheck",
+        "positive": "layer3",
        "negative": "NO_MATCH",
-        "undefined": "strongCheck",
+        "undefined": "layer3",
        "ignoreUndefined": "false"
      },
-      "strongCheck": {
+      "layer3": {
        "fields": [
          {
            "field": "title",
@ -163,30 +90,9 @@
        ],
        "threshold": 0.99,
        "aggregation": "AVG",
-        "positive": "surnames",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "true"
-      },
-      "surnames": {
-        "fields": [
-          {
-            "field": "authors",
-            "comparator": "authorsMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "surname_th": 0.75,
-              "fullname_th": 0.75,
-              "mode": "full"
-            }
-          }
-        ],
-        "threshold": 0.6,
-        "aggregation": "MAX",
        "positive": "MATCH",
        "negative": "NO_MATCH",
-        "undefined": "MATCH",
+        "undefined": "NO_MATCH",
        "ignoreUndefined": "true"
      }
    },
@ -194,29 +100,18 @@
      {
        "name": "doi",
        "type": "String",
-        "path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "altdoi",
-        "type": "String",
-        "path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
+        "path": "$.pid[?(@.qualifier.classid == 'doi')].value"
      },
      {
        "name": "pid",
        "type": "JSON",
-        "path": "$.instance[*].pid[*]",
-        "overrideMatch": "true"
-      },
-      {
-        "name": "alternateid",
-        "type": "JSON",
-        "path": "$.instance[*].alternateIdentifier[*]",
+        "path": "$.pid",
        "overrideMatch": "true"
      },
      {
        "name": "title",
-        "type": "StringConcat",
-        "path": "$.title[?(@.qualifier.classid == 'main title')].value|||$.title[?(@.qualifier.classid == 'subtitle')].value",
+        "type": "String",
+        "path": "$.title[?(@.qualifier.classid == 'main title')].value",
        "length": 250,
        "size": 5
      },
@ -230,16 +125,10 @@
        "name": "resulttype",
        "type": "String",
        "path": "$.resulttype.classid"
-      },
-      {
-        "name": "instance",
-        "type": "List",
-        "path": "$.instance[*].instancetype.classname"
      }
    ],
    "blacklists": {
      "title": [
-        "(?i)^Data Management Plan",
        "^Inside Front Cover$",
        "(?i)^Poster presentations$",
        "^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
@ -465,16 +354,7 @@
        "^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
        "^(Measurement of the spin\\-dependent structure function).*",
        "(?i)^.*authors['’′]? reply\\.?$",
-        "(?i)^.*authors['’′]? response\\.?$",
-        "^Data [mM]anagement [sS]ervices\\.$",
-        "Research and Advanced Technology for Digital Libraries",
-        "(?i)^risky business$",
-        "(?i)^great expectations\\.?$",
-        "(?i)^what's in a name\\?$",
-        "(?i)^decisions, decisions\\.?$",
-        "(?i)^update to our reader, reviewer, and author communities.*",
-        "(?i)^lest we forget$",
-        "(?i)^measure for measure$"
+        "(?i)^.*authors['’′]? response\\.?$"
      ]
    },
    "synonyms": {}
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/pubs.fdup.exp.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/pubs.fdup.exp.json
@ -1,381 +0,0 @@
-{
-  "wf": {
-    "threshold": "0.99",
-    "dedupRun": "001",
-    "entityType": "result",
-    "subEntityType": "resulttype",
-    "subEntityValue": "publication",
-    "orderField": "title",
-    "queueMaxSize": "200",
-    "groupMaxSize": "100",
-    "maxChildren": "100",
-    "slidingWindowSize": "100",
-    "rootBuilder": [
-      "result",
-      "resultProject_outcome_isProducedBy",
-      "resultResult_publicationDataset_isRelatedTo",
-      "resultResult_similarity_isAmongTopNSimilarDocuments",
-      "resultResult_similarity_hasAmongTopNSimilarDocuments",
-      "resultOrganization_affiliation_isAffiliatedWith",
-      "resultResult_part_hasPart",
-      "resultResult_part_isPartOf",
-      "resultResult_supplement_isSupplementTo",
-      "resultResult_supplement_isSupplementedBy",
-      "resultResult_version_isVersionOf"
-    ],
-    "includeChildren": "true",
-    "maxIterations": 20,
-    "idPath": "$.id"
-  },
-  "pace": {
-    "clustering" : [
-      { "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } },
-      { "name" : "lowercase", "fields" : [ "doi", "altdoi" ], "params" : { "collapseOn:pid": "0"} }
-    ],
-    "decisionTree": {
-      "start": {
-        "fields": [
-          {
-            "field": "pid",
-            "comparator": "jsonListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "jpath_value": "$.value",
-              "jpath_classid": "$.qualifier.classid",
-               "mode": "count"
-            }
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "versionCheck",
-        "undefined": "versionCheck",
-        "ignoreUndefined": "true"
-      },
-      "versionCheck": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "titleVersionMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "MAX",
-        "positive": "titleCheck",
-        "negative": "NO_MATCH",
-        "undefined": "titleCheck",
-        "ignoreUndefined": "false"
-      },
-      "titleCheck": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 0.9,
-        "aggregation": "MAX",
-        "positive": "authorsCheck",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "true"
-      },
-      "authorsCheck": {
-        "fields": [
-          {
-            "field": "authors",
-            "comparator": "authorsMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          }
-        ],
-        "threshold": 0.6,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "MATCH",
-        "ignoreUndefined": "false"
-      }
-    },
-    "model": [
-      {
-        "name": "doi",
-        "type": "String",
-        "path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "altdoi",
-        "type": "String",
-        "path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "pid",
-        "type": "JSON",
-        "path": "$.instance[*].pid[*]",
-        "overrideMatch": "true"
-      },
-      {
-        "name": "title",
-        "type": "String",
-        "path": "$.title[?(@.qualifier.classid == 'main title')].value",
-        "length": 250,
-        "size": 5
-      },
-      {
-        "name": "authors",
-        "type": "List",
-        "path": "$.author[*].fullname",
-        "size": 200
-      },
-      {
-        "name": "resulttype",
-        "type": "String",
-        "path": "$.resulttype.classid"
-      }
-    ],
-    "blacklists": {
-      "title": [
-        "(?i)^Data Management Plan",
-        "^Inside Front Cover$",
-        "(?i)^Poster presentations$",
-        "^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
-        "^Problems with perinatal pathology\\.?$",
-        "(?i)^Cases? of Puerperal Convulsions$",
-        "(?i)^Operative Gyna?ecology$",
-        "(?i)^Mind the gap\\!?\\:?$",
-        "^Chronic fatigue syndrome\\.?$",
-        "^Cartas? ao editor Letters? to the Editor$",
-        "^Note from the Editor$",
-        "^Anesthesia Abstract$",
-        "^Annual report$",
-        "(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$",
-        "(?i)^Graph and Table of Infectious Diseases?$",
-        "^Presentation$",
-        "(?i)^Reviews and Information on Publications$",
-        "(?i)^PUBLIC HEALTH SERVICES?$",
-        "(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
-        "(?i)^Adrese autora$",
-        "(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
-        "(?i)^Acknowledgement to Referees$",
-        "(?i)^Behçet's disease\\.?$",
-        "(?i)^Isolation and identification of restriction endonuclease.*$",
-        "(?i)^CEREBROVASCULAR DISEASES?.?$",
-        "(?i)^Screening for abdominal aortic aneurysms?\\.?$",
-        "^Event management$",
-        "(?i)^Breakfast and Crohn's disease.*\\.?$",
-        "^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$",
-        "(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$",
-        "^Gushi hakubutsugaku$",
-        "^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$",
-        "^Intestinal spirocha?etosis$",
-        "^Treatment of Rodent Ulcer$",
-        "(?i)^\\W*Cloud Computing\\W*$",
-        "^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
-        "^Free Communications, Poster Presentations: Session [A-F]$",
-        "^“The Historical Aspects? of Quackery\\.?”$",
-        "^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
-        "^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
-        "(?i)^Case Report$",
-        "^Boletín Informativo$",
-        "(?i)^Glioblastoma Multiforme$",
-        "(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
-        "^Zaměstnanecké výhody$",
-        "(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
-        "(?i)^Carotid body tumours?\\.?$",
-        "(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
-        "^Avant-propos$",
-        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
-        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
-        "(?i)^PUBLIC HEALTH VERSUS THE STATE$",
-        "^Viñetas de Cortázar$",
-        "(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$",
-        "(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$",
-        "(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
-        "(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
-        "(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
-        "^Aus der AGMB$",
-        "^Znanstveno-stručni prilozi$",
-        "(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
-        "(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
-        "(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
-        "^Finanční analýza podniku$",
-        "^Financial analysis( of business)?$",
-        "(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
-        "^Jikken nihon shūshinsho$",
-        "(?i)^CORONER('|s)(s|') INQUESTS$",
-        "(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
-        "(?i)^Consultants' contract(s)?$",
-        "(?i)^Upute autorima$",
-        "(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
-        "^Joshi shin kokubun$",
-        "^Kōtō shōgaku dokuhon nōson'yō$",
-        "^Jinjō shōgaku shōka$",
-        "^Shōgaku shūjichō$",
-        "^Nihon joshi dokuhon$",
-        "^Joshi shin dokuhon$",
-        "^Chūtō kanbun dokuhon$",
-        "^Wabun dokuhon$",
-        "(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
-        "(?i)^cardiac rehabilitation$",
-        "(?i)^Analytical summary$",
-        "^Thesaurus resolutionum Sacrae Congregationis Concilii$",
-        "(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
-        "^Prikazi i osvrti$",
-        "^Rodinný dům s provozovnou$",
-        "^Family house with an establishment$",
-        "^Shinsei chūtō shin kokugun$",
-        "^Pulmonary alveolar proteinosis(\\.?)$",
-        "^Shinshū kanbun$",
-        "^Viñeta(s?) de Rodríguez$",
-        "(?i)^RUBRIKA UREDNIKA$",
-        "^A Matching Model of the Academic Publication Market$",
-        "^Yōgaku kōyō$",
-        "^Internetový marketing$",
-        "^Internet marketing$",
-        "^Chūtō kokugo dokuhon$",
-        "^Kokugo dokuhon$",
-        "^Antibiotic Cover for Dental Extraction(s?)$",
-        "^Strategie podniku$",
-        "^Strategy of an Enterprise$",
-        "(?i)^respiratory disease(s?)(\\.?)$",
-        "^Award(s?) for Gallantry in Civil Defence$",
-        "^Podniková kultura$",
-        "^Corporate Culture$",
-        "^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$",
-        "^Pracovní motivace$",
-        "^Work Motivation$",
-        "^Kaitei kōtō jogaku dokuhon$",
-        "^Konsolidovaná účetní závěrka$",
-        "^Consolidated Financial Statements$",
-        "(?i)^intracranial tumour(s?)$",
-        "^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
-        "^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
-        "^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
-        "^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
-        "^Úroveň motivačního procesu jako způsobu vedení lidí$",
-        "^The level of motivation process as a leadership$",
-        "^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
-        "(?i)^news and events$",
-        "(?i)^NOVOSTI I DOGAĐAJI$",
-        "^Sansū no gakushū$",
-        "^Posouzení informačního systému firmy a návrh změn$",
-        "^Information System Assessment and Proposal for ICT Modification$",
-        "^Stresové zatížení pracovníků ve vybrané profesi$",
-        "^Stress load in a specific job$",
-        "^Sunday: Poster Sessions, Pt.*$",
-        "^Monday: Poster Sessions, Pt.*$",
-        "^Wednesday: Poster Sessions, Pt.*",
-        "^Tuesday: Poster Sessions, Pt.*$",
-        "^Analýza reklamy$",
-        "^Analysis of advertising$",
-        "^Shōgaku shūshinsho$",
-        "^Shōgaku sansū$",
-        "^Shintei joshi kokubun$",
-        "^Taishō joshi kokubun dokuhon$",
-        "^Joshi kokubun$",
-        "^Účetní uzávěrka a účetní závěrka v ČR$",
-        "(?i)^The \"?Causes\"? of Cancer$",
-        "^Normas para la publicación de artículos$",
-        "^Editor('|s)(s|') [Rr]eply$",
-        "^Editor(’|s)(s|’) letter$",
-        "^Redaktoriaus žodis$",
-        "^DISCUSSION ON THE PRECEDING PAPER$",
-        "^Kōtō shōgaku shūshinsho jidōyō$",
-        "^Shōgaku nihon rekishi$",
-        "^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
-        "^Préface$",
-        "^Occupational [Hh]ealth [Ss]ervices.$",
-        "^In Memoriam Professor Toshiyuki TAKESHIMA$",
-        "^Účetní závěrka ve vybraném podniku.*$",
-        "^Financial statements in selected company$",
-        "^Abdominal [Aa]ortic [Aa]neurysms.*$",
-        "^Pseudomyxoma peritonei$",
-        "^Kazalo autora$",
-        "(?i)^uvodna riječ$",
-        "^Motivace jako způsob vedení lidí$",
-        "^Motivation as a leadership$",
-        "^Polyfunkční dům$",
-        "^Multi\\-funkcional building$",
-        "^Podnikatelský plán$",
-        "(?i)^Podnikatelský záměr$",
-        "(?i)^Business Plan$",
-        "^Oceňování nemovitostí$",
-        "^Marketingová komunikace$",
-        "^Marketing communication$",
-        "^Sumario Analítico$",
-        "^Riječ uredništva$",
-        "^Savjetovanja i priredbe$",
-        "^Índice$",
-        "^(Starobosanski nadpisi).*$",
-        "^Vzdělávání pracovníků v organizaci$",
-        "^Staff training in organization$",
-        "^(Life Histories of North American Geometridae).*$",
-        "^Strategická analýza podniku$",
-        "^Strategic Analysis of an Enterprise$",
-        "^Sadržaj$",
-        "^Upute suradnicima$",
-        "^Rodinný dům$",
-        "(?i)^Fami(l)?ly house$",
-        "^Upute autorima$",
-        "^Strategic Analysis$",
-        "^Finanční analýza vybraného podniku$",
-        "^Finanční analýza$",
-        "^Riječ urednika$",
-        "(?i)^Content(s?)$",
-        "(?i)^Inhalt$",
-        "^Jinjō shōgaku shūshinsho jidōyō$",
-        "(?i)^Index$",
-        "^Chūgaku kokubun kyōkasho$",
-        "^Retrato de una mujer$",
-        "^Retrato de un hombre$",
-        "^Kōtō shōgaku dokuhon$",
-        "^Shotōka kokugo$",
-        "^Shōgaku dokuhon$",
-        "^Jinjō shōgaku kokugo dokuhon$",
-        "^Shinsei kokugo dokuhon$",
-        "^Teikoku dokuhon$",
-        "^Instructions to Authors$",
-        "^KİTAP TAHLİLİ$",
-        "^PRZEGLĄD PIŚMIENNICTWA$",
-        "(?i)^Presentación$",
-        "^İçindekiler$",
-        "(?i)^Tabl?e of contents$",
-        "^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
-        "^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
-        "^Editorial( Board)?$",
-        "(?i)^Editorial \\(English\\)$",
-        "^Editörden$",
-        "^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
-        "^(Kiri Karl Morgensternile).*$",
-        "^(\\[Eksliibris Aleksandr).*\\]$",
-        "^(\\[Eksliibris Aleksandr).*$",
-        "^(Eksliibris Aleksandr).*$",
-        "^(Kiri A\\. de Vignolles).*$",
-        "^(2 kirja Karl Morgensternile).*$",
-        "^(Pirita kloostri idaosa arheoloogilised).*$",
-        "^(Kiri tundmatule).*$",
-        "^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
-        "^(Eksliibris Nikolai Birukovile).*$",
-        "^(Eksliibris Nikolai Issakovile).*$",
-        "^(WHP Cruise Summary Information of section).*$",
-        "^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
-        "^(Measurement of the spin\\-dependent structure function).*",
-        "(?i)^.*authors['’′]? reply\\.?$",
-        "(?i)^.*authors['’′]? response\\.?$",
-        "^Data [mM]anagement [sS]ervices\\.$",
-        "Research and Advanced Technology for Digital Libraries",
-        "Food and Nutrition"
-      ]
-    },
-    "synonyms": {}
-  }
-}
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/sw.tree.conf.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/sw.tree.conf.json
@ -1,150 +0,0 @@
-{
-  "wf" : {
-    "threshold" : "0.99",
-    "dedupRun" : "001",
-    "entityType" : "result",
-    "subEntityType" : "resulttype",
-    "subEntityValue" : "software",
-    "orderField" : "title",
-    "queueMaxSize" : "200",
-    "groupMaxSize" : "100",
-    "maxChildren" : "100",
-    "slidingWindowSize" : "50",
-    "rootBuilder" : [ "result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments", "resultOrganization_affiliation_hasAuthorInstitution", "resultResult_part_hasPart", "resultResult_part_isPartOf", "resultResult_supplement_isSupplementTo", "resultResult_supplement_isSupplementedBy", "resultResult_version_isVersionOf" ],
-    "includeChildren" : "true"
-  },
-  "pace" : {
-    "clustering" : [
-      { "name" : "wordsStatsSuffixPrefixChain", "fields" : [ "title" ], "params" : { "mod" : "10" } },
-      { "name" : "lowercase", "fields" : [ "doi", "altdoi" ], "params" : { "collapseOn:pid":"0"} },
-      { "name" : "ngrams", "fields" : [ "title" ], "params" : {"ngramLen": 3, "max": 4, "maxPerToken":1, "minNgramLen":3}},
-      { "name" : "urlclustering", "fields": [ "url" ], "params" : {}}
-    ],
-    "decisionTree": {
-      "start": {
-        "fields": [
-          {
-            "field": "doi",
-            "comparator": "exactMatch",
-            "weight": 1,
-            "countIfUndefined": "false",
-            "params": {}
-          }
-        ],
-        "threshold": 1,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "titleCheck",
-        "undefined": "titleCheck",
-        "ignoreUndefined": "false"
-      },
-      "titleCheck": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "levensteinTitleIgnoreVersion",
-            "weight": 1,
-            "countIfUndefined": "false",
-            "params": {}
-          }
-        ],
-        "threshold": 0.95,
-        "aggregation": "AVG",
-        "positive": "pidCheck",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "false"
-      },
-      "pidCheck": {
-        "fields": [
-          {
-            "field": "altdoi",
-            "comparator": "exactMatch",
-            "weight": 1,
-            "countIfUndefined": "false",
-            "params": {}
-          },
-          {
-            "field": "doi",
-            "comparator": "exactMatch",
-            "weight": 1,
-            "countIfUndefined": "false",
-            "params": {"crossCompare": "altdoi"}
-          },
-          {
-            "field": "url",
-            "comparator": "exactMatch",
-            "weight": 1,
-            "countIfUndefined": "false",
-            "params": {}
-          }
-        ],
-        "threshold": 1,
-        "aggregation": "OR",
-        "positive": "MATCH",
-        "negative": "authorsCheck",
-        "undefined": "authorsCheck",
-        "ignoreUndefined": "false"
-      },
-      "authorsCheck": {
-        "fields": [
-          {
-            "field": "authors",
-            "comparator": "authorsMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "surname_th": 0.70,
-              "fullname_th": 0.70,
-              "size_th": 20,
-              "mode": "surname"
-            }
-          }
-        ],
-        "threshold": 1,
-        "aggregation": "AVG",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "MATCH",
-        "ignoreUndefined": "false"
-      }
-    },
-    "model" : [
-      {
-        "name" : "doi",
-        "type" : "String",
-        "path" : "$.instance.pid[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name" : "altdoi",
-        "type" : "String",
-        "path" : "$.instance.alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name" : "title",
-        "type" : "String",
-        "path" : "$.title[?(@.qualifier.classid == 'main title')].value",
-        "length" : 250,
-        "size" : 5
-      },
-      {
-        "name" : "url",
-        "type" : "String",
-        "path" : "$.instance.url"
-      },
-      {
-        "name" : "resulttype",
-        "type" : "String",
-        "path" : "$.resulttype.classid"
-      },
-      {
-        "name": "authors",
-        "type": "List",
-        "path": "$.author[*].fullname",
-        "size": 200
-      }
-    ],
-    "blacklists" : {},
-    "synonyms": {}
-  }
-}
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/test.pub.tree.conf.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/test.pub.tree.conf.json
@ -1,403 +0,0 @@
-{
-  "wf": {
-    "threshold": "0.99",
-    "dedupRun": "001",
-    "entityType": "result",
-    "subEntityType": "resulttype",
-    "subEntityValue": "publication",
-    "orderField": "title",
-    "queueMaxSize": "200",
-    "groupMaxSize": "100",
-    "maxChildren": "100",
-    "slidingWindowSize": "50",
-    "rootBuilder": [
-      "result",
-      "resultProject_outcome_isProducedBy",
-      "resultResult_publicationDataset_isRelatedTo",
-      "resultResult_similarity_isAmongTopNSimilarDocuments",
-      "resultResult_similarity_hasAmongTopNSimilarDocuments",
-      "resultOrganization_affiliation_isAffiliatedWith",
-      "resultResult_part_hasPart",
-      "resultResult_part_isPartOf",
-      "resultResult_supplement_isSupplementTo",
-      "resultResult_supplement_isSupplementedBy",
-      "resultResult_version_isVersionOf"
-    ],
-    "includeChildren": "true",
-    "maxIterations": 20,
-    "idPath": "$.id"
-  },
-  "pace": {
-    "clustering" : [
-      { "name" : "wordsStatsSuffixPrefixChain", "fields" : [ "title" ], "params" : { "mod" : "10" } },
-      { "name" : "lowercase", "fields" : [ "doi", "altdoi" ], "params" : { "collapseOn:pid": "0"} }
-    ],
-    "decisionTree": {
-      "start": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "titleVersionMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          },
-          {
-            "field": "authors",
-            "comparator": "sizeMatch",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "AND",
-        "positive": "pid_check",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "false"
-      },
-      "pid_check": {
-        "fields": [
-          {
-            "field": "pid",
-            "comparator": "jsonListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "jpath_value": "$.value",
-              "jpath_classid": "$.qualifier.classid"
-            }
-          },
-          {
-            "field": "pid",
-            "comparator": "jsonListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "jpath_value": "$.value",
-              "jpath_classid": "$.qualifier.classid",
-              "crossCompare": "alternateid"
-            }
-          }
-        ],
-        "threshold": 0.5,
-        "aggregation": "MAX",
-        "positive": "title_check_low_th",
-        "negative": "title_check_high_th",
-        "undefined": "title_check_high_th",
-        "ignoreUndefined": "true"
-      },
-      "title_check_low_th": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 0.9,
-        "aggregation": "AVG",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "true"
-      },
-      "title_check_high_th": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 0.99,
-        "aggregation": "AVG",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "true"
-      }
-    },
-    "model": [
-      {
-        "name": "doi",
-        "type": "String",
-        "path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "altdoi",
-        "type": "String",
-        "path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "pid",
-        "type": "JSON",
-        "path": "$.instance[*].pid[*]",
-        "overrideMatch": "true"
-      },
-      {
-        "name": "alternateid",
-        "type": "JSON",
-        "path": "$.instance[*].alternateIdentifier[*]",
-        "overrideMatch": "true"
-      },
-      {
-        "name": "title",
-        "type": "String",
-        "path": "$.title[?(@.qualifier.classid == 'main title')].value",
-        "length": 250,
-        "size": 5
-      },
-      {
-        "name": "authors",
-        "type": "List",
-        "path": "$.author[*].fullname",
-        "size": 200
-      },
-      {
-        "name": "resulttype",
-        "type": "String",
-        "path": "$.resulttype.classid"
-      }
-    ],
-    "blacklists": {
-      "title": [
-        "(?i)^Data Management Plan",
-        "^Inside Front Cover$",
-        "(?i)^Poster presentations$",
-        "^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
-        "^Problems with perinatal pathology\\.?$",
-        "(?i)^Cases? of Puerperal Convulsions$",
-        "(?i)^Operative Gyna?ecology$",
-        "(?i)^Mind the gap\\!?\\:?$",
-        "^Chronic fatigue syndrome\\.?$",
-        "^Cartas? ao editor Letters? to the Editor$",
-        "^Note from the Editor$",
-        "^Anesthesia Abstract$",
-        "^Annual report$",
-        "(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$",
-        "(?i)^Graph and Table of Infectious Diseases?$",
-        "^Presentation$",
-        "(?i)^Reviews and Information on Publications$",
-        "(?i)^PUBLIC HEALTH SERVICES?$",
-        "(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
-        "(?i)^Adrese autora$",
-        "(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
-        "(?i)^Acknowledgement to Referees$",
-        "(?i)^Behçet's disease\\.?$",
-        "(?i)^Isolation and identification of restriction endonuclease.*$",
-        "(?i)^CEREBROVASCULAR DISEASES?.?$",
-        "(?i)^Screening for abdominal aortic aneurysms?\\.?$",
-        "^Event management$",
-        "(?i)^Breakfast and Crohn's disease.*\\.?$",
-        "^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$",
-        "(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$",
-        "^Gushi hakubutsugaku$",
-        "^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$",
-        "^Intestinal spirocha?etosis$",
-        "^Treatment of Rodent Ulcer$",
-        "(?i)^\\W*Cloud Computing\\W*$",
-        "^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
-        "^Free Communications, Poster Presentations: Session [A-F]$",
-        "^“The Historical Aspects? of Quackery\\.?”$",
-        "^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
-        "^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
-        "(?i)^Case Report$",
-        "^Boletín Informativo$",
-        "(?i)^Glioblastoma Multiforme$",
-        "(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
-        "^Zaměstnanecké výhody$",
-        "(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
-        "(?i)^Carotid body tumours?\\.?$",
-        "(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
-        "^Avant-propos$",
-        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
-        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
-        "(?i)^PUBLIC HEALTH VERSUS THE STATE$",
-        "^Viñetas de Cortázar$",
-        "(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$",
-        "(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$",
-        "(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
-        "(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
-        "(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
-        "^Aus der AGMB$",
-        "^Znanstveno-stručni prilozi$",
-        "(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
-        "(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
-        "(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
-        "^Finanční analýza podniku$",
-        "^Financial analysis( of business)?$",
-        "(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
-        "^Jikken nihon shūshinsho$",
-        "(?i)^CORONER('|s)(s|') INQUESTS$",
-        "(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
-        "(?i)^Consultants' contract(s)?$",
-        "(?i)^Upute autorima$",
-        "(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
-        "^Joshi shin kokubun$",
-        "^Kōtō shōgaku dokuhon nōson'yō$",
-        "^Jinjō shōgaku shōka$",
-        "^Shōgaku shūjichō$",
-        "^Nihon joshi dokuhon$",
-        "^Joshi shin dokuhon$",
-        "^Chūtō kanbun dokuhon$",
-        "^Wabun dokuhon$",
-        "(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
-        "(?i)^cardiac rehabilitation$",
-        "(?i)^Analytical summary$",
-        "^Thesaurus resolutionum Sacrae Congregationis Concilii$",
-        "(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
-        "^Prikazi i osvrti$",
-        "^Rodinný dům s provozovnou$",
-        "^Family house with an establishment$",
-        "^Shinsei chūtō shin kokugun$",
-        "^Pulmonary alveolar proteinosis(\\.?)$",
-        "^Shinshū kanbun$",
-        "^Viñeta(s?) de Rodríguez$",
-        "(?i)^RUBRIKA UREDNIKA$",
-        "^A Matching Model of the Academic Publication Market$",
-        "^Yōgaku kōyō$",
-        "^Internetový marketing$",
-        "^Internet marketing$",
-        "^Chūtō kokugo dokuhon$",
-        "^Kokugo dokuhon$",
-        "^Antibiotic Cover for Dental Extraction(s?)$",
-        "^Strategie podniku$",
-        "^Strategy of an Enterprise$",
-        "(?i)^respiratory disease(s?)(\\.?)$",
-        "^Award(s?) for Gallantry in Civil Defence$",
-        "^Podniková kultura$",
-        "^Corporate Culture$",
-        "^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$",
-        "^Pracovní motivace$",
-        "^Work Motivation$",
-        "^Kaitei kōtō jogaku dokuhon$",
-        "^Konsolidovaná účetní závěrka$",
-        "^Consolidated Financial Statements$",
-        "(?i)^intracranial tumour(s?)$",
-        "^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
-        "^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
-        "^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
-        "^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
-        "^Úroveň motivačního procesu jako způsobu vedení lidí$",
-        "^The level of motivation process as a leadership$",
-        "^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
-        "(?i)^news and events$",
-        "(?i)^NOVOSTI I DOGAĐAJI$",
-        "^Sansū no gakushū$",
-        "^Posouzení informačního systému firmy a návrh změn$",
-        "^Information System Assessment and Proposal for ICT Modification$",
-        "^Stresové zatížení pracovníků ve vybrané profesi$",
-        "^Stress load in a specific job$",
-        "^Sunday: Poster Sessions, Pt.*$",
-        "^Monday: Poster Sessions, Pt.*$",
-        "^Wednesday: Poster Sessions, Pt.*",
-        "^Tuesday: Poster Sessions, Pt.*$",
-        "^Analýza reklamy$",
-        "^Analysis of advertising$",
-        "^Shōgaku shūshinsho$",
-        "^Shōgaku sansū$",
-        "^Shintei joshi kokubun$",
-        "^Taishō joshi kokubun dokuhon$",
-        "^Joshi kokubun$",
-        "^Účetní uzávěrka a účetní závěrka v ČR$",
-        "(?i)^The \"?Causes\"? of Cancer$",
-        "^Normas para la publicación de artículos$",
-        "^Editor('|s)(s|') [Rr]eply$",
-        "^Editor(’|s)(s|’) letter$",
-        "^Redaktoriaus žodis$",
-        "^DISCUSSION ON THE PRECEDING PAPER$",
-        "^Kōtō shōgaku shūshinsho jidōyō$",
-        "^Shōgaku nihon rekishi$",
-        "^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
-        "^Préface$",
-        "^Occupational [Hh]ealth [Ss]ervices.$",
-        "^In Memoriam Professor Toshiyuki TAKESHIMA$",
-        "^Účetní závěrka ve vybraném podniku.*$",
-        "^Financial statements in selected company$",
-        "^Abdominal [Aa]ortic [Aa]neurysms.*$",
-        "^Pseudomyxoma peritonei$",
-        "^Kazalo autora$",
-        "(?i)^uvodna riječ$",
-        "^Motivace jako způsob vedení lidí$",
-        "^Motivation as a leadership$",
-        "^Polyfunkční dům$",
-        "^Multi\\-funkcional building$",
-        "^Podnikatelský plán$",
-        "(?i)^Podnikatelský záměr$",
-        "(?i)^Business Plan$",
-        "^Oceňování nemovitostí$",
-        "^Marketingová komunikace$",
-        "^Marketing communication$",
-        "^Sumario Analítico$",
-        "^Riječ uredništva$",
-        "^Savjetovanja i priredbe$",
-        "^Índice$",
-        "^(Starobosanski nadpisi).*$",
-        "^Vzdělávání pracovníků v organizaci$",
-        "^Staff training in organization$",
-        "^(Life Histories of North American Geometridae).*$",
-        "^Strategická analýza podniku$",
-        "^Strategic Analysis of an Enterprise$",
-        "^Sadržaj$",
-        "^Upute suradnicima$",
-        "^Rodinný dům$",
-        "(?i)^Fami(l)?ly house$",
-        "^Upute autorima$",
-        "^Strategic Analysis$",
-        "^Finanční analýza vybraného podniku$",
-        "^Finanční analýza$",
-        "^Riječ urednika$",
-        "(?i)^Content(s?)$",
-        "(?i)^Inhalt$",
-        "^Jinjō shōgaku shūshinsho jidōyō$",
-        "(?i)^Index$",
-        "^Chūgaku kokubun kyōkasho$",
-        "^Retrato de una mujer$",
-        "^Retrato de un hombre$",
-        "^Kōtō shōgaku dokuhon$",
-        "^Shotōka kokugo$",
-        "^Shōgaku dokuhon$",
-        "^Jinjō shōgaku kokugo dokuhon$",
-        "^Shinsei kokugo dokuhon$",
-        "^Teikoku dokuhon$",
-        "^Instructions to Authors$",
-        "^KİTAP TAHLİLİ$",
-        "^PRZEGLĄD PIŚMIENNICTWA$",
-        "(?i)^Presentación$",
-        "^İçindekiler$",
-        "(?i)^Tabl?e of contents$",
-        "^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
-        "^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
-        "^Editorial( Board)?$",
-        "(?i)^Editorial \\(English\\)$",
-        "^Editörden$",
-        "^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
-        "^(Kiri Karl Morgensternile).*$",
-        "^(\\[Eksliibris Aleksandr).*\\]$",
-        "^(\\[Eksliibris Aleksandr).*$",
-        "^(Eksliibris Aleksandr).*$",
-        "^(Kiri A\\. de Vignolles).*$",
-        "^(2 kirja Karl Morgensternile).*$",
-        "^(Pirita kloostri idaosa arheoloogilised).*$",
-        "^(Kiri tundmatule).*$",
-        "^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
-        "^(Eksliibris Nikolai Birukovile).*$",
-        "^(Eksliibris Nikolai Issakovile).*$",
-        "^(WHP Cruise Summary Information of section).*$",
-        "^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
-        "^(Measurement of the spin\\-dependent structure function).*",
-        "(?i)^.*authors['’′]? reply\\.?$",
-        "(?i)^.*authors['’′]? response\\.?$",
-        "^Data [mM]anagement [sS]ervices\\.$",
-        "Research and Advanced Technology for Digital Libraries"
-      ]
-    },
-    "synonyms": {}
-  }
-}
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/authors.dump.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/authors.dump.json
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/ds.to.fix.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/ds.to.fix.json
@ -1,4 +0,0 @@
-{"websiteurl": "https://fairsharing.org", "englishname": "FAIRsharing", "officialname": "FAIRsharing", "id": "eosc________::oxford_e-research_centre::oxford_e-research_centre.fairsharing"}
-{"websiteurl": "https://FAIRsharing.org", "englishname": "FAIRsharing", "officialname": "FAIRsharing", "id": "fairsharing_::2521"}
-{"websiteurl": "https://fairsharing.org/", "englishname": "formerly: biosharing", "officialname": "FAIRsharing", "id": "re3data_____::r3d100010142"}
-{"websiteurl": "https://fairsharing.org/", "englishname": "FAIRsharing", "officialname": "FAIRsharing", "id": "openaire____::fairsharing"}
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/openorgs.to.fix.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/openorgs.to.fix.json
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/publications.to.fix.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/publications.to.fix.json
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/software.dump.2000.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/software.dump.2000.json
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/software.to.fix.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/software.to.fix.json
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/softwares.dump.2000.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/softwares.dump.2000.json
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/parameters/computeStatistics_parameters.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/parameters/computeStatistics_parameters.json
@ -1,32 +0,0 @@
-[
-  {
-    "paramName": "e",
-    "paramLongName": "entitiesPath",
-    "paramDescription": "the input entities",
-    "paramRequired": true
-  },
-  {
-    "paramName": "w",
-    "paramLongName": "workingPath",
-    "paramDescription": "path of the working directory",
-    "paramRequired": true
-  },
-  {
-    "paramName": "np",
-    "paramLongName": "numPartitions",
-    "paramDescription": "number of partitions for the similarity relations intermediate phases",
-    "paramRequired": false
-  },
-  {
-    "paramName": "dc",
-    "paramLongName": "dedupConfPath",
-    "paramDescription": "dedup configuration to be used",
-    "paramRequired": false
-  },
-  {
-    "paramName": "gt",
-    "paramLongName": "groundTruthFieldJPath",
-    "paramDescription": "field to be used as groundtruth",
-    "paramRequired": false
-  }
-]
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/parameters/createDedupEntity_parameters.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/parameters/createDedupEntity_parameters.json
--- a/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/parameters/createSimRels_parameters.json
+++ b/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/parameters/createSimRels_parameters.json
@ -22,11 +22,5 @@
    "paramLongName": "dedupConfPath",
    "paramDescription": "path of the dedup configuration",
    "paramRequired": true
-  },
-  {
-    "paramName": "ut",
-    "paramLongName": "useTree",
-    "paramDescription": "chose the tree configuration or not",
-    "paramRequired": true
  }
 ]
--- a/dnet-dedup-test/src/test/resources/graph_visualization_tool/graph_template.html
+++ b/dnet-dedup-test/src/test/resources/graph_visualization_tool/graph_template.html
@ -1,70 +0,0 @@
-<html>
-<head>
-    <script type="text/javascript" src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
-
-    <style type="text/css">
-        #mynetwork {
-            width: 1000px;
-            height: 700px;
-            border: 1px solid lightgray;
-        }
-
-        th, td {
-            font-size: 10px;
-        }
-    </style>
-</head>
-<body>
-<div id="mynetwork"></div>
-
-<script type="text/javascript">
-
-    // HTML parsing with all XSS goodness
-    function htmlTitle(html) {
-        const container = document.createElement("div");
-        container.innerHTML = html;
-        return container;
-    }
-
-    var nodesArray = %nodes%;
-    var edgesArray = %edges%;
-
-    for (var i = 0; i < nodesArray.length; i++) {
-        nodesArray[i].title = htmlTitle(nodesArray[i].title);
-    };
-
-    // create an array with nodes
-    var nodes = new vis.DataSet(nodesArray);
-
-    // create an array with edges
-    var edges = new vis.DataSet(edgesArray);
-
-    // HTML parsing with all XSS goodness
-    function htmlTitle(html) {
-        const container = document.createElement("div");
-        container.innerHTML = html;
-        return container;
-    }
-
-    // create a network
-    var container = document.getElementById('mynetwork');
-
-    // provide the data in the vis format
-    var data = {
-        nodes: nodes,
-        edges: edges
-    };
-
-    var options = {
-        physics:{enabled: false},
-        edges:{physics:false},
-        nodes:{font:{size:10}},
-        layout: {improvedLayout:true}
-    };
-
-    // initialize your network!
-    var network = new vis.Network(container, data, options);
-
-</script>
-</body>
-</html>
--- a/dnet-dedup.ipr
+++ b/dnet-dedup.ipr
@ -1,113 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
-->
-<project version="4" relativePaths="false"> 
-  <component name="ProjectRootManager" version="2" assert-keyword="true" project-jdk-name="1.8" jdk-15="true"/>  
-  <component name="CodeStyleManager"> 
-    <option name="USE_DEFAULT_CODE_STYLE_SCHEME" value="true"/>  
-    <option name="CODE_STYLE_SCHEME" value=""/> 
-  </component>  
-  <component name="libraryTable"/>  
-  <component name="CompilerConfiguration"> 
-    <option name="DEFAULT_COMPILER" value="Javac"/>  
-    <option name="CLEAR_OUTPUT_DIRECTORY" value="false"/>  
-    <!--
-    <wildcardResourcePatterns>
-      <entry name="${wildcardResourcePattern}"/>
-    </wildcardResourcePatterns>
-    -->  
-    <wildcardResourcePatterns>
-      <entry name="!?*.java"/>
-    </wildcardResourcePatterns>
-  </component>  
-  <component name="JavacSettings"> 
-    <option name="DEBUGGING_INFO" value="true"/>  
-    <option name="GENERATE_NO_WARNINGS" value="false"/>  
-    <option name="DEPRECATION" value="true"/>  
-    <option name="ADDITIONAL_OPTIONS_STRING" value=""/>  
-    <option name="MAXIMUM_HEAP_SIZE" value="128"/>  
-    <option name="USE_GENERICS_COMPILER" value="false"/> 
-  </component>  
-  <component name="JikesSettings"> 
-    <option name="DEBUGGING_INFO" value="true"/>  
-    <option name="DEPRECATION" value="true"/>  
-    <option name="GENERATE_NO_WARNINGS" value="false"/>  
-    <option name="GENERATE_MAKE_FILE_DEPENDENCIES" value="false"/>  
-    <option name="DO_FULL_DEPENDENCE_CHECK" value="false"/>  
-    <option name="IS_INCREMENTAL_MODE" value="false"/>  
-    <option name="IS_EMACS_ERRORS_MODE" value="true"/>  
-    <option name="ADDITIONAL_OPTIONS_STRING" value=""/>  
-    <option name="MAXIMUM_HEAP_SIZE" value="128"/> 
-  </component>  
-  <component name="AntConfiguration"> 
-    <option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>  
-    <option name="FILTER_TARGETS" value="false"/> 
-  </component>  
-  <component name="JavadocGenerationManager"> 
-    <option name="OUTPUT_DIRECTORY"/>  
-    <option name="OPTION_SCOPE" value="protected"/>  
-    <option name="OPTION_HIERARCHY" value="false"/>  
-    <option name="OPTION_NAVIGATOR" value="false"/>  
-    <option name="OPTION_INDEX" value="false"/>  
-    <option name="OPTION_SEPARATE_INDEX" value="false"/>  
-    <option name="OPTION_USE_1_1" value="false"/>  
-    <option name="OPTION_DOCUMENT_TAG_USE" value="false"/>  
-    <option name="OPTION_DOCUMENT_TAG_AUTHOR" value="false"/>  
-    <option name="OPTION_DOCUMENT_TAG_VERSION" value="false"/>  
-    <option name="OPTION_DOCUMENT_TAG_DEPRECATED" value="false"/>  
-    <option name="OPTION_DEPRECATED_LIST" value="false"/>  
-    <option name="OTHER_OPTIONS"/>  
-    <option name="HEAP_SIZE"/>  
-    <option name="OPEN_IN_BROWSER" value="false"/> 
-  </component>  
-  <component name="JUnitProjectSettings"> 
-    <option name="TEST_RUNNER" value="UI"/> 
-  </component>  
-  <component name="EntryPointsManager"> 
-    <entry_points/> 
-  </component>  
-  <component name="DataSourceManager"/>  
-  <component name="ExportToHTMLSettings"> 
-    <option name="PRINT_LINE_NUMBERS" value="false"/>  
-    <option name="OPEN_IN_BROWSER" value="false"/>  
-    <option name="OUTPUT_DIRECTORY"/> 
-  </component>  
-  <component name="ImportConfiguration"> 
-    <option name="VENDOR"/>  
-    <option name="RELEASE_TAG"/>  
-    <option name="LOG_MESSAGE"/>  
-    <option name="CHECKOUT_AFTER_IMPORT" value="true"/> 
-  </component>  
-  <component name="ProjectModuleManager"> 
-    <modules> 
-      <!-- module filepath="$$PROJECT_DIR$$/${pom.artifactId}.iml"/ -->  
-      <module filepath="$PROJECT_DIR$/dnet-dedup.iml"/>
-      <module filepath="$PROJECT_DIR$/dnet-pace-core/dnet-pace-core.iml"/>
-      <module filepath="$PROJECT_DIR$/dnet-dedup-test/dnet-dedup-test.iml"/>
-      <module filepath="$PROJECT_DIR$/dhp-build/dhp-code-style/dhp-code-style.iml"/>
-      <module filepath="$PROJECT_DIR$/dhp-build/dhp-build-assembly-resources/dhp-build-assembly-resources.iml"/>
-      <module filepath="$PROJECT_DIR$/dhp-build/dhp-build-properties-maven-plugin/dhp-build-properties-maven-plugin.iml"/>
-      <module filepath="$PROJECT_DIR$/dhp-build/dhp-build.iml"/>
-    </modules> 
-  </component>  
-  <UsedPathMacros> 
-    <!--<macro name="cargo"></macro>--> 
-  </UsedPathMacros> 
-</project>
--- a/dnet-dedup.iws
+++ b/dnet-dedup.iws
@ -1,418 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
-->
-<project version="4" relativePaths="false"> 
-  <component name="LvcsProjectConfiguration"> 
-    <option name="ADD_LABEL_ON_PROJECT_OPEN" value="true"/>  
-    <option name="ADD_LABEL_ON_PROJECT_COMPILATION" value="true"/>  
-    <option name="ADD_LABEL_ON_FILE_PACKAGE_COMPILATION" value="true"/>  
-    <option name="ADD_LABEL_ON_PROJECT_MAKE" value="true"/>  
-    <option name="ADD_LABEL_ON_RUNNING" value="true"/>  
-    <option name="ADD_LABEL_ON_DEBUGGING" value="true"/>  
-    <option name="ADD_LABEL_ON_UNIT_TEST_PASSED" value="true"/>  
-    <option name="ADD_LABEL_ON_UNIT_TEST_FAILED" value="true"/> 
-  </component>  
-  <component name="PropertiesComponent"> 
-    <property name="MemberChooser.copyJavadoc" value="false"/>  
-    <property name="GoToClass.includeLibraries" value="false"/>  
-    <property name="MemberChooser.showClasses" value="true"/>  
-    <property name="MemberChooser.sorted" value="false"/>  
-    <property name="GoToFile.includeJavaFiles" value="false"/>  
-    <property name="GoToClass.toSaveIncludeLibraries" value="false"/> 
-  </component>  
-  <component name="ToolWindowManager"> 
-    <frame x="-4" y="-4" width="1032" height="746" extended-state="6"/>  
-    <editor active="false"/>  
-    <layout> 
-      <window_info id="CVS" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>  
-      <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="7"/>  
-      <window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="0"/>  
-      <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="1"/>  
-      <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="1"/>  
-      <window_info id="Messages" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>  
-      <window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.4" order="6"/>  
-      <window_info id="Aspects" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>  
-      <window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="1"/>  
-      <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="2"/>  
-      <window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="2"/>  
-      <window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.4" order="4"/>  
-      <window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="sliding" type="sliding" visible="false" weight="0.4" order="0"/>  
-      <window_info id="Web" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="2"/>  
-      <window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="0"/>  
-      <window_info id="EJB" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="3"/>  
-      <window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="5"/> 
-    </layout> 
-  </component>  
-  <component name="ErrorTreeViewConfiguration"> 
-    <option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>  
-    <option name="HIDE_WARNINGS" value="false"/> 
-  </component>  
-  <component name="StructureViewFactory"> 
-    <option name="SORT_MODE" value="0"/>  
-    <option name="GROUP_INHERITED" value="true"/>  
-    <option name="AUTOSCROLL_MODE" value="true"/>  
-    <option name="SHOW_FIELDS" value="true"/>  
-    <option name="AUTOSCROLL_FROM_SOURCE" value="false"/>  
-    <option name="GROUP_GETTERS_AND_SETTERS" value="true"/>  
-    <option name="SHOW_INHERITED" value="false"/>  
-    <option name="HIDE_NOT_PUBLIC" value="false"/> 
-  </component>  
-  <component name="ProjectViewSettings"> 
-    <navigator currentView="ProjectPane" flattenPackages="false" showMembers="false" showStructure="false" autoscrollToSource="false" splitterProportion="0.5"/>  
-    <view id="ProjectPane"> 
-      <expanded_node type="directory" url="file://$PROJECT_DIR$"/> 
-    </view>  
-    <view id="SourcepathPane"/>  
-    <view id="ClasspathPane"/> 
-  </component>  
-  <component name="Commander"> 
-    <leftPanel view="Project"/>  
-    <rightPanel view="Project"/>  
-    <splitter proportion="0.5"/> 
-  </component>  
-  <component name="AspectsView"/>  
-  <component name="SelectInManager"/>  
-  <component name="HierarchyBrowserManager"> 
-    <option name="SHOW_PACKAGES" value="false"/>  
-    <option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>  
-    <option name="SORT_ALPHABETICALLY" value="false"/> 
-  </component>  
-  <component name="TodoView" selected-index="0"> 
-    <todo-panel id="selected-file"> 
-      <are-packages-shown value="false"/>  
-      <flatten-packages value="false"/>  
-      <is-autoscroll-to-source value="true"/> 
-    </todo-panel>  
-    <todo-panel id="all"> 
-      <are-packages-shown value="true"/>  
-      <flatten-packages value="false"/>  
-      <is-autoscroll-to-source value="true"/> 
-    </todo-panel> 
-  </component>  
-  <component name="editorManager"/>  
-  <component name="editorHistoryManager"/>  
-  <component name="DaemonCodeAnalyzer"> 
-    <disable_hints/> 
-  </component>  
-  <component name="InspectionManager"> 
-    <option name="AUTOSCROLL_TO_SOURCE" value="false"/>  
-    <option name="SPLITTER_PROPORTION" value="0.5"/>  
-    <profile name="Default"/> 
-  </component>  
-  <component name="BookmarkManager"/>  
-  <component name="DebuggerManager"> 
-    <line_breakpoints/>  
-    <exception_breakpoints> 
-      <breakpoint_any> 
-        <option name="NOTIFY_CAUGHT" value="true"/>  
-        <option name="NOTIFY_UNCAUGHT" value="true"/>  
-        <option name="ENABLED" value="false"/>  
-        <option name="SUSPEND_VM" value="true"/>  
-        <option name="COUNT_FILTER_ENABLED" value="false"/>  
-        <option name="COUNT_FILTER" value="0"/>  
-        <option name="CONDITION_ENABLED" value="false"/>  
-        <option name="CONDITION"/>  
-        <option name="LOG_ENABLED" value="false"/>  
-        <option name="LOG_EXPRESSION_ENABLED" value="false"/>  
-        <option name="LOG_MESSAGE"/>  
-        <option name="CLASS_FILTERS_ENABLED" value="false"/>  
-        <option name="INVERSE_CLASS_FILLTERS" value="false"/>  
-        <option name="SUSPEND_POLICY" value="SuspendAll"/> 
-      </breakpoint_any> 
-    </exception_breakpoints>  
-    <field_breakpoints/>  
-    <method_breakpoints/> 
-  </component>  
-  <component name="DebuggerSettings"> 
-    <option name="TRACING_FILTERS_ENABLED" value="true"/>  
-    <option name="TOSTRING_CLASSES_ENABLED" value="false"/>  
-    <option name="VALUE_LOOKUP_DELAY" value="700"/>  
-    <option name="DEBUGGER_TRANSPORT" value="0"/>  
-    <option name="FORCE_CLASSIC_VM" value="true"/>  
-    <option name="HIDE_DEBUGGER_ON_PROCESS_TERMINATION" value="false"/>  
-    <option name="SKIP_SYNTHETIC_METHODS" value="true"/>  
-    <option name="SKIP_CONSTRUCTORS" value="false"/>  
-    <option name="STEP_THREAD_SUSPEND_POLICY" value="SuspendThread"/>  
-    <default_breakpoint_settings> 
-      <option name="NOTIFY_CAUGHT" value="true"/>  
-      <option name="NOTIFY_UNCAUGHT" value="true"/>  
-      <option name="WATCH_MODIFICATION" value="true"/>  
-      <option name="WATCH_ACCESS" value="true"/>  
-      <option name="WATCH_ENTRY" value="true"/>  
-      <option name="WATCH_EXIT" value="true"/>  
-      <option name="ENABLED" value="true"/>  
-      <option name="SUSPEND_VM" value="true"/>  
-      <option name="COUNT_FILTER_ENABLED" value="false"/>  
-      <option name="COUNT_FILTER" value="0"/>  
-      <option name="CONDITION_ENABLED" value="false"/>  
-      <option name="CONDITION"/>  
-      <option name="LOG_ENABLED" value="false"/>  
-      <option name="LOG_EXPRESSION_ENABLED" value="false"/>  
-      <option name="LOG_MESSAGE"/>  
-      <option name="CLASS_FILTERS_ENABLED" value="false"/>  
-      <option name="INVERSE_CLASS_FILLTERS" value="false"/>  
-      <option name="SUSPEND_POLICY" value="SuspendAll"/> 
-    </default_breakpoint_settings>  
-    <filter> 
-      <option name="PATTERN" value="com.sun.*"/>  
-      <option name="ENABLED" value="true"/> 
-    </filter>  
-    <filter> 
-      <option name="PATTERN" value="java.*"/>  
-      <option name="ENABLED" value="true"/> 
-    </filter>  
-    <filter> 
-      <option name="PATTERN" value="javax.*"/>  
-      <option name="ENABLED" value="true"/> 
-    </filter>  
-    <filter> 
-      <option name="PATTERN" value="org.omg.*"/>  
-      <option name="ENABLED" value="true"/> 
-    </filter>  
-    <filter> 
-      <option name="PATTERN" value="sun.*"/>  
-      <option name="ENABLED" value="true"/> 
-    </filter>  
-    <filter> 
-      <option name="PATTERN" value="junit.*"/>  
-      <option name="ENABLED" value="true"/> 
-    </filter> 
-  </component>  
-  <component name="CompilerWorkspaceConfiguration"> 
-    <option name="COMPILE_IN_BACKGROUND" value="false"/>  
-    <option name="AUTO_SHOW_ERRORS_IN_EDITOR" value="true"/> 
-  </component>  
-  <component name="RunManager"> 
-    <activeType name="Application"/>  
-    <configuration selected="false" default="true" type="Applet" factoryName="Applet"> 
-      <module name=""/>  
-      <option name="MAIN_CLASS_NAME"/>  
-      <option name="HTML_FILE_NAME"/>  
-      <option name="HTML_USED" value="false"/>  
-      <option name="WIDTH" value="400"/>  
-      <option name="HEIGHT" value="300"/>  
-      <option name="POLICY_FILE" value="$APPLICATION_HOME_DIR$/bin/appletviewer.policy"/>  
-      <option name="VM_PARAMETERS"/> 
-    </configuration>  
-    <configuration selected="false" default="true" type="Remote" factoryName="Remote"> 
-      <option name="USE_SOCKET_TRANSPORT" value="true"/>  
-      <option name="SERVER_MODE" value="false"/>  
-      <option name="SHMEM_ADDRESS" value="javadebug"/>  
-      <option name="HOST" value="localhost"/>  
-      <option name="PORT" value="5005"/> 
-    </configuration>  
-    <configuration selected="false" default="true" type="Application" factoryName="Application"> 
-      <option name="MAIN_CLASS_NAME"/>  
-      <option name="VM_PARAMETERS"/>  
-      <option name="PROGRAM_PARAMETERS"/>  
-      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$"/>  
-      <module name=""/> 
-    </configuration>  
-    <configuration selected="false" default="true" type="JUnit" factoryName="JUnit"> 
-      <module name=""/>  
-      <option name="PACKAGE_NAME"/>  
-      <option name="MAIN_CLASS_NAME"/>  
-      <option name="METHOD_NAME"/>  
-      <option name="TEST_OBJECT" value="class"/>  
-      <option name="VM_PARAMETERS"/>  
-      <option name="PARAMETERS"/>  
-      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$"/>  
-      <option name="ADDITIONAL_CLASS_PATH"/>  
-      <option name="TEST_SEARCH_SCOPE"> 
-        <value defaultName="wholeProject"/> 
-      </option> 
-    </configuration> 
-  </component>  
-  <component name="VcsManagerConfiguration"> 
-    <option name="ACTIVE_VCS_NAME" value="git"/>  
-    <option name="STATE" value="0"/> 
-  </component>  
-  <component name="VssConfiguration"> 
-    <CheckoutOptions> 
-      <option name="COMMENT" value=""/>  
-      <option name="DO_NOT_GET_LATEST_VERSION" value="false"/>  
-      <option name="REPLACE_WRITABLE" value="false"/>  
-      <option name="RECURSIVE" value="false"/> 
-    </CheckoutOptions>  
-    <CheckinOptions> 
-      <option name="COMMENT" value=""/>  
-      <option name="KEEP_CHECKED_OUT" value="false"/>  
-      <option name="RECURSIVE" value="false"/> 
-    </CheckinOptions>  
-    <AddOptions> 
-      <option name="COMMENT" value=""/>  
-      <option name="STORE_ONLY_LATEST_VERSION" value="false"/>  
-      <option name="CHECK_OUT_IMMEDIATELY" value="false"/>  
-      <option name="FILE_TYPE" value="0"/> 
-    </AddOptions>  
-    <UndocheckoutOptions> 
-      <option name="MAKE_WRITABLE" value="false"/>  
-      <option name="REPLACE_LOCAL_COPY" value="0"/>  
-      <option name="RECURSIVE" value="false"/> 
-    </UndocheckoutOptions>  
-    <DiffOptions> 
-      <option name="IGNORE_WHITE_SPACE" value="false"/>  
-      <option name="IGNORE_CASE" value="false"/> 
-    </DiffOptions>  
-    <GetOptions> 
-      <option name="REPLACE_WRITABLE" value="0"/>  
-      <option name="MAKE_WRITABLE" value="false"/>  
-      <option name="RECURSIVE" value="false"/> 
-    </GetOptions>  
-    <option name="CLIENT_PATH" value=""/>  
-    <option name="SRCSAFEINI_PATH" value=""/>  
-    <option name="USER_NAME" value=""/>  
-    <option name="PWD" value=""/>  
-    <option name="SHOW_CHECKOUT_OPTIONS" value="true"/>  
-    <option name="SHOW_ADD_OPTIONS" value="true"/>  
-    <option name="SHOW_UNDOCHECKOUT_OPTIONS" value="true"/>  
-    <option name="SHOW_DIFF_OPTIONS" value="true"/>  
-    <option name="SHOW_GET_OPTIONS" value="true"/>  
-    <option name="USE_EXTERNAL_DIFF" value="false"/>  
-    <option name="EXTERNAL_DIFF_PATH" value=""/>  
-    <option name="REUSE_LAST_COMMENT" value="false"/>  
-    <option name="PUT_FOCUS_INTO_COMMENT" value="false"/>  
-    <option name="SHOW_CHECKIN_OPTIONS" value="true"/>  
-    <option name="LAST_COMMIT_MESSAGE" value=""/>  
-    <option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/> 
-  </component>  
-  <component name="CheckinPanelState"/>  
-  <component name="WebViewSettings"> 
-    <webview flattenPackages="false" showMembers="false" autoscrollToSource="false"/> 
-  </component>  
-  <component name="EjbViewSettings"> 
-    <EjbView showMembers="false" autoscrollToSource="false"/> 
-  </component>  
-  <component name="AppServerRunManager"/>  
-  <component name="StarteamConfiguration"> 
-    <option name="SERVER" value=""/>  
-    <option name="PORT" value="49201"/>  
-    <option name="USER" value=""/>  
-    <option name="PASSWORD" value=""/>  
-    <option name="PROJECT" value=""/>  
-    <option name="VIEW" value=""/>  
-    <option name="ALTERNATIVE_WORKING_PATH" value=""/>  
-    <option name="PUT_FOCUS_INTO_COMMENT" value="false"/>  
-    <option name="SHOW_CHECKIN_OPTIONS" value="true"/>  
-    <option name="LAST_COMMIT_MESSAGE" value=""/>  
-    <option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/> 
-  </component>  
-  <component name="Cvs2Configuration"> 
-    <option name="ON_FILE_ADDING" value="0"/>  
-    <option name="ON_FILE_REMOVING" value="0"/>  
-    <option name="PRUNE_EMPTY_DIRECTORIES" value="true"/>  
-    <option name="SHOW_UPDATE_OPTIONS" value="true"/>  
-    <option name="SHOW_ADD_OPTIONS" value="true"/>  
-    <option name="SHOW_REMOVE_OPTIONS" value="true"/>  
-    <option name="MERGING_MODE" value="0"/>  
-    <option name="MERGE_WITH_BRANCH1_NAME" value="HEAD"/>  
-    <option name="MERGE_WITH_BRANCH2_NAME" value="HEAD"/>  
-    <option name="RESET_STICKY" value="false"/>  
-    <option name="CREATE_NEW_DIRECTORIES" value="true"/>  
-    <option name="DEFAULT_TEXT_FILE_SUBSTITUTION" value="kv"/>  
-    <option name="PROCESS_UNKNOWN_FILES" value="false"/>  
-    <option name="PROCESS_DELETED_FILES" value="false"/>  
-    <option name="SHOW_EDIT_DIALOG" value="true"/>  
-    <option name="RESERVED_EDIT" value="false"/>  
-    <option name="FILE_HISTORY_SPLITTER_PROPORTION" value="0.6"/>  
-    <option name="SHOW_CHECKOUT_OPTIONS" value="true"/>  
-    <option name="CHECKOUT_DATE_OR_REVISION_SETTINGS"> 
-      <value> 
-        <option name="BRANCH" value=""/>  
-        <option name="DATE" value=""/>  
-        <option name="USE_BRANCH" value="false"/>  
-        <option name="USE_DATE" value="false"/> 
-      </value> 
-    </option>  
-    <option name="UPDATE_DATE_OR_REVISION_SETTINGS"> 
-      <value> 
-        <option name="BRANCH" value=""/>  
-        <option name="DATE" value=""/>  
-        <option name="USE_BRANCH" value="false"/>  
-        <option name="USE_DATE" value="false"/> 
-      </value> 
-    </option>  
-    <option name="SHOW_CHANGES_REVISION_SETTINGS"> 
-      <value> 
-        <option name="BRANCH" value=""/>  
-        <option name="DATE" value=""/>  
-        <option name="USE_BRANCH" value="false"/>  
-        <option name="USE_DATE" value="false"/> 
-      </value> 
-    </option>  
-    <option name="SHOW_OUTPUT" value="false"/>  
-    <option name="SHOW_FILE_HISTORY_AS_TREE" value="false"/>  
-    <option name="UPDATE_GROUP_BY_PACKAGES" value="false"/>  
-    <option name="ADD_WATCH_INDEX" value="0"/>  
-    <option name="REMOVE_WATCH_INDEX" value="0"/>  
-    <option name="UPDATE_KEYWORD_SUBSTITUTION"/>  
-    <option name="MAKE_NEW_FILES_READONLY" value="false"/>  
-    <option name="SHOW_CORRUPTED_PROJECT_FILES" value="0"/>  
-    <option name="TAG_AFTER_FILE_COMMIT" value="false"/>  
-    <option name="TAG_AFTER_FILE_COMMIT_NAME" value=""/>  
-    <option name="TAG_AFTER_PROJECT_COMMIT" value="false"/>  
-    <option name="TAG_AFTER_PROJECT_COMMIT_NAME" value=""/>  
-    <option name="PUT_FOCUS_INTO_COMMENT" value="false"/>  
-    <option name="SHOW_CHECKIN_OPTIONS" value="true"/>  
-    <option name="FORCE_NON_EMPTY_COMMENT" value="false"/>  
-    <option name="LAST_COMMIT_MESSAGE" value=""/>  
-    <option name="SAVE_LAST_COMMIT_MESSAGE" value="true"/>  
-    <option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>  
-    <option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false"/>  
-    <option name="OPTIMIZE_IMPORTS_BEFORE_FILE_COMMIT" value="false"/>  
-    <option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false"/>  
-    <option name="REFORMAT_BEFORE_FILE_COMMIT" value="false"/>  
-    <option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8"/>  
-    <option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5"/> 
-  </component>  
-  <component name="CvsTabbedWindow"/>  
-  <component name="SvnConfiguration"> 
-    <option name="USER" value=""/>  
-    <option name="PASSWORD" value=""/>  
-    <option name="AUTO_ADD_FILES" value="0"/>  
-    <option name="AUTO_DEL_FILES" value="0"/> 
-  </component>  
-  <component name="PerforceConfiguration"> 
-    <option name="PORT" value="magic:1666"/>  
-    <option name="USER" value=""/>  
-    <option name="PASSWORD" value=""/>  
-    <option name="CLIENT" value=""/>  
-    <option name="TRACE" value="false"/>  
-    <option name="PERFORCE_STATUS" value="true"/>  
-    <option name="CHANGELIST_OPTION" value="false"/>  
-    <option name="SYSTEMROOT" value=""/>  
-    <option name="P4_EXECUTABLE" value="p4"/>  
-    <option name="SHOW_BRANCH_HISTORY" value="false"/>  
-    <option name="GENERATE_COMMENT" value="false"/>  
-    <option name="SYNC_OPTION" value="Sync"/>  
-    <option name="PUT_FOCUS_INTO_COMMENT" value="false"/>  
-    <option name="SHOW_CHECKIN_OPTIONS" value="true"/>  
-    <option name="FORCE_NON_EMPTY_COMMENT" value="true"/>  
-    <option name="LAST_COMMIT_MESSAGE" value=""/>  
-    <option name="SAVE_LAST_COMMIT_MESSAGE" value="true"/>  
-    <option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>  
-    <option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false"/>  
-    <option name="OPTIMIZE_IMPORTS_BEFORE_FILE_COMMIT" value="false"/>  
-    <option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false"/>  
-    <option name="REFORMAT_BEFORE_FILE_COMMIT" value="false"/>  
-    <option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8"/>  
-    <option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5"/> 
-  </component> 
-</project>
--- a/dnet-pace-core/pom.xml
+++ b/dnet-pace-core/pom.xml
@ -6,7 +6,7 @@
 	<parent>
 		<groupId>eu.dnetlib</groupId>
 		<artifactId>dnet-dedup</artifactId>
-		<version>4.1.13-SNAPSHOT</version>
+		<version>4.1.7</version>
        <relativePath>../pom.xml</relativePath>
 	</parent>

@ -67,10 +67,7 @@
 			<artifactId>json-path</artifactId>
 		</dependency>

-		<dependency>
-			<groupId>com.ibm.icu</groupId>
-			<artifactId>icu4j</artifactId>
-		</dependency>
+

 	</dependencies>

--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/BlacklistAwareClusteringCombiner.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/BlacklistAwareClusteringCombiner.java
@ -1,59 +1,59 @@
 package eu.dnetlib.pace.clustering;

-import com.google.common.collect.Maps;
-import eu.dnetlib.pace.config.Config;
-import eu.dnetlib.pace.model.Document;
-import eu.dnetlib.pace.model.Field;
-import eu.dnetlib.pace.model.FieldListImpl;
-import eu.dnetlib.pace.model.MapDocument;
-
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
-import java.util.regex.Pattern;
+import java.util.Set;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+import eu.dnetlib.pace.config.Config;
+import eu.dnetlib.pace.model.Document;
+import eu.dnetlib.pace.model.Field;
+import eu.dnetlib.pace.model.FieldListImpl;
+import eu.dnetlib.pace.model.MapDocument;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;

 public class BlacklistAwareClusteringCombiner extends ClusteringCombiner {

-    public static Collection<String> filterAndCombine(final MapDocument a, final Config conf) {
-        Document filtered = filter(a, conf.blacklists());
-        return combine(filtered, conf);
-    }
+	private static final Log log = LogFactory.getLog(BlacklistAwareClusteringCombiner.class);

-    private static MapDocument filter(final MapDocument a, final Map<String, List<Pattern>> blacklists) {
-        if (blacklists == null || blacklists.isEmpty()) {
-            return a;
-        }
+	public static Collection<String> filterAndCombine(final MapDocument a, final Config conf) {

-        final Map<String, Field> filtered = Maps.newHashMap(a.getFieldMap());
+		final Document filtered = new BlacklistAwareClusteringCombiner().filter(a, conf.blacklists());
+		return combine(filtered, conf);
+	}

-        for (final Entry<String, List<Pattern>> e : blacklists.entrySet()) {
-            Field fields = a.getFieldMap().get(e.getKey());
-            if (fields != null) {
-                final FieldListImpl fl = new FieldListImpl();
+	private MapDocument filter(final MapDocument a, final Map<String, List<String>> blacklists) {
+		final Map<String, Field> filtered = Maps.newHashMap(a.getFieldMap());
+		if (blacklists != null) {
+			for (final Entry<String, Field> e : filtered.entrySet()) {

-                for (Field f : fields) {
-                    if (!isBlackListed(f.stringValue(), e.getValue())) {
-                        fl.add(f);
-                    }
-                }
-
-                filtered.put(e.getKey(), fl);
-            }
-        }
-
-        return new MapDocument(a.getIdentifier(), filtered);
-    }
-
-    private static boolean isBlackListed(String value, List<Pattern> blacklist) {
-        for (Pattern pattern : blacklist) {
-            if (pattern.matcher(value).matches()) {
-                return true;
-            }
-        }
-
-        return false;
-    }
+				final FieldListImpl fl = new FieldListImpl();
+				fl.addAll(Lists.newArrayList(Iterables.filter(e.getValue(), new FieldFilter(e.getKey(), blacklists))));
+				filtered.put(e.getKey(), fl);
+			}
+		}
+		return new MapDocument(a.getIdentifier(), filtered);
+	}

+	/**
+	 * Tries to match the fields in the regex blacklist.
+	 *
+	 * @param fieldName
+	 * @param value
+	 * @return true if the field matches, false otherwise
+	 */
+	protected boolean regexMatches(final String fieldName, final String value, final Map<String, Set<String>> blacklists) {
+		if (blacklists.containsKey(fieldName)) {
+			for (final String regex : blacklists.get(fieldName)) {
+				if (value.matches(regex)) return true;
+			}
+		}
+		return false;
+	}
 }
-
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringCombiner.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringCombiner.java
@ -20,6 +20,10 @@ public class ClusteringCombiner {
 	private static String COLLAPSE_ON= "collapseOn";

 	public static Collection<String> combine(final Document a, final Config conf) {
+		return new ClusteringCombiner().doCombine(a, conf);
+	}
+
+	private Collection<String> doCombine(final Document a, final Config conf) {
 		final Collection<String> res = Sets.newLinkedHashSet();
 		for (final ClusteringDef cd : conf.clusterings()) {
 			for (final String fieldName : cd.getFields()) {
@ -47,7 +51,7 @@ public class ClusteringCombiner {
 		return res;
 	}

-	private static String getPrefix(ClusteringDef cd, String fieldName) {
+	private String getPrefix(ClusteringDef cd, String fieldName) {
 		return cd.getName()+ SEPARATOR +
 				cd.getParams().keySet()
 						.stream()
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/FieldFilter.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/FieldFilter.java
@ -0,0 +1,48 @@
+package eu.dnetlib.pace.clustering;
+
+import java.util.List;
+import java.util.Map;
+
+import com.google.common.base.Predicate;
+
+import eu.dnetlib.pace.model.Field;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+public class FieldFilter implements Predicate<Field> {
+
+	private static final Log log = LogFactory.getLog(FieldFilter.class);
+
+	private Map<String, List<String>> blacklists;
+
+	private String filedName;
+
+	public FieldFilter(final String fieldName, final Map<String, List<String>> blacklists) {
+		this.filedName = fieldName;
+		this.blacklists = blacklists;
+	}
+
+	@Override
+	public boolean apply(final Field f) {
+		return !regexMatches(filedName, f.stringValue(), blacklists);
+	}
+
+	/**
+	 * Tries to match the fields in the regex blacklist.
+	 *
+	 * @param fieldName
+	 * @param value
+	 * @return true if the field matches, false otherwise
+	 */
+	protected boolean regexMatches(final String fieldName, final String value, final Map<String, List<String>> blacklists) {
+		if (blacklists.containsKey(fieldName)) {
+			final Iterable<String> regexes = blacklists.get(fieldName);
+			for (final String regex : regexes) {
+				if (StringUtils.isBlank(regex)) return false;
+				if (value.matches(regex)) return true;
+			}
+		}
+		return false;
+	}
+}
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java
@ -41,7 +41,7 @@ public class KeywordsClustering extends AbstractClusteringFunction {
    public Collection<String> apply(final Config conf, List<Field> fields) {
        return fields.stream().filter(f -> !f.isEmpty())
                .map(Field::stringValue)
-                .map(this::cleanup)
+                .map(this::cleanup) //TODO can I add this to the AbstractClusteringFunction without overriding the method here?
                .map(this::normalize)
                .map(s -> filterAllStopWords(s))
                .map(s -> doApply(conf, s))
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java
@ -1,77 +0,0 @@
-package eu.dnetlib.pace.clustering;
-
-import com.google.common.collect.Lists;
-import eu.dnetlib.pace.config.Config;
-import eu.dnetlib.pace.model.Field;
-import eu.dnetlib.pace.model.Person;
-import org.apache.commons.lang3.StringUtils;
-
-import java.util.*;
-import java.util.stream.Collectors;
-
-@ClusteringClass("lnfi")
-public class LastNameFirstInitial extends AbstractClusteringFunction{
-
-    private boolean DEFAULT_AGGRESSIVE = true;
-
-    public LastNameFirstInitial(final Map<String, Integer> params) {
-        super(params);
-    }
-
-    @Override
-    public Collection<String> apply(Config conf, List<Field> fields) {
-        return fields.stream().filter(f -> !f.isEmpty())
-                .map(Field::stringValue)
-                .map(this::normalize)
-                .map(s -> doApply(conf, s))
-                .map(c -> filterBlacklisted(c, ngramBlacklist))
-                .flatMap(c -> c.stream())
-                .filter(StringUtils::isNotBlank)
-                .collect(Collectors.toCollection(HashSet::new));
-    }
-
-    @Override
-    protected String normalize(final String s) {
-        return fixAliases(transliterate(nfd(unicodeNormalization(s))))
-                // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input strings
-                .replaceAll("[^ \\w]+", "")
-                .replaceAll("(\\p{InCombiningDiacriticalMarks})+", "")
-                .replaceAll("(\\p{Punct})+", " ")
-                .replaceAll("(\\d)+", " ")
-                .replaceAll("(\\n)+", " ")
-                .trim();
-    }
-
-    @Override
-    protected Collection<String> doApply(final Config conf, final String s) {
-
-        final List<String> res = Lists.newArrayList();
-
-        final boolean aggressive = (Boolean) (getParams().containsKey("aggressive") ? getParams().get("aggressive") : DEFAULT_AGGRESSIVE);
-
-        Person p = new Person(s, aggressive);
-
-        if (p.isAccurate()) {
-            String lastName = p.getNormalisedSurname().toLowerCase();
-            String firstInitial = p.getNormalisedFirstName().toLowerCase().substring(0,1);
-
-            res.add(firstInitial.concat(lastName));
-        }
-        else {  // is not accurate, meaning it has no defined name and surname
-            List<String> fullname = Arrays.asList(p.getNormalisedFullname().split(" "));
-            if (fullname.size() == 1) {
-                res.add(p.getNormalisedFullname().toLowerCase());
-            }
-            else if (fullname.size() == 2) {
-                res.add(fullname.get(0).substring(0,1).concat(fullname.get(1)).toLowerCase());
-                res.add(fullname.get(1).substring(0,1).concat(fullname.get(0)).toLowerCase());
-            }
-            else {
-                res.add(fullname.get(0).substring(0,1).concat(fullname.get(fullname.size()-1)).toLowerCase());
-                res.add(fullname.get(fullname.size()-1).substring(0,1).concat(fullname.get(0)).toLowerCase());
-            }
-        }
-
-        return res;
-    }
-}
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java
@ -9,7 +9,7 @@ import com.google.common.collect.Lists;
 import eu.dnetlib.pace.config.Config;
 import eu.dnetlib.pace.model.Person;

-@ClusteringClass("personHash")
+@ClusteringClass("personhash")
 public class PersonHash extends AbstractClusteringFunction {

 	private boolean DEFAULT_AGGRESSIVE = false;
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java
@ -3,8 +3,8 @@ package eu.dnetlib.pace.common;
 import com.google.common.base.Joiner;
 import com.google.common.base.Splitter;
 import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
-import com.ibm.icu.text.Transliterator;
 import eu.dnetlib.pace.clustering.NGramUtils;
 import eu.dnetlib.pace.model.Field;
 import eu.dnetlib.pace.model.FieldList;
@ -14,7 +14,6 @@ import org.apache.commons.lang3.StringUtils;

 import java.io.IOException;
 import java.io.StringWriter;
-import java.nio.charset.StandardCharsets;
 import java.text.Normalizer;
 import java.util.*;
 import java.util.regex.Matcher;
@ -32,7 +31,6 @@ public abstract class AbstractPaceFunctions {
    private static Map<String, String> cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv");

    //list of stopwords in different languages
-    protected static Set<String> stopwords_gr = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_gr.txt");
    protected static Set<String> stopwords_en = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt");
    protected static Set<String> stopwords_de = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_de.txt");
    protected static Set<String> stopwords_es = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_es.txt");
@ -40,15 +38,9 @@ public abstract class AbstractPaceFunctions {
    protected static Set<String> stopwords_it = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_it.txt");
    protected static Set<String> stopwords_pt = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_pt.txt");

-    //transliterator
-    protected static Transliterator transliterator = Transliterator.getInstance("Any-Eng");
-
    //blacklist of ngrams: to avoid generic keys
    protected static Set<String> ngramBlacklist = loadFromClasspath("/eu/dnetlib/pace/config/ngram_blacklist.txt");

-    //html regex for normalization
-    public final String HTML_REGEX = "<[^>]*>";
-
    private static final String alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
    private static final String aliases_from = "⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎àáâäæãåāèéêëēėęəîïíīįìôöòóœøōõûüùúūßśšłžźżçćčñń";
    private static final String aliases_to = "0123456789+-=()n0123456789+-=()aaaaaaaaeeeeeeeeiiiiiioooooooouuuuussslzzzcccnn";
@ -67,14 +59,14 @@ public abstract class AbstractPaceFunctions {
    }

    protected String cleanup(final String s) {
-
-        final String s1 = s.replaceAll(HTML_REGEX, "");
-        final String s2 = unicodeNormalization(s1.toLowerCase());
-        final String s3 = nfd(s2);
-        final String s4 = fixXML(s3);
-        final String s5 = s4.replaceAll("([0-9]+)", " $1 ");
-        final String s6 = transliterate(s5);
-        final String s7 = fixAliases(s6);
+        final String s0 = unicodeNormalization(s.toLowerCase());
+        final String s1 = fixAliases(s0);
+        final String s2 = nfd(s1);
+        final String s3 = s2.replaceAll("&ndash;", " ");
+        final String s4 = s3.replaceAll("&amp;", " ");
+        final String s5 = s4.replaceAll("&quot;", " ");
+        final String s6 = s5.replaceAll("&minus;", " ");
+        final String s7 = s6.replaceAll("([0-9]+)", " $1 ");
        final String s8 = s7.replaceAll("[^\\p{ASCII}]", "");
        final String s9 = s8.replaceAll("[\\p{Punct}]", " ");
        final String s10 = s9.replaceAll("\\n", " ");
@ -83,14 +75,6 @@ public abstract class AbstractPaceFunctions {
        return s12;
    }

-    protected String fixXML(final String a){
-
-        return a.replaceAll("&ndash;", " ")
-                .replaceAll("&amp;", " ")
-                .replaceAll("&quot;", " ")
-                .replaceAll("&minus;", " ");
-    }
-
    protected boolean checkNumbers(final String a, final String b) {
        final String numbersA = getNumbers(a);
        final String numbersB = getNumbers(b);
@ -128,31 +112,19 @@ public abstract class AbstractPaceFunctions {

    protected static String fixAliases(final String s) {
        final StringBuilder sb = new StringBuilder();
-
-        s.chars().forEach(ch -> {
+        for (final char ch : Lists.charactersOf(s)) {
            final int i = StringUtils.indexOf(aliases_from, ch);
-            sb.append(i >= 0 ? aliases_to.charAt(i) : (char)ch);
-        });
-
+            sb.append(i >= 0 ? aliases_to.charAt(i) : ch);
+        }
        return sb.toString();
    }

-    protected static String transliterate(final String s) {
-        try {
-            return transliterator.transliterate(s);
-        }
-        catch(Exception e) {
-            return s;
-        }
-    }
-
    protected String removeSymbols(final String s) {
        final StringBuilder sb = new StringBuilder();

-        s.chars().forEach(ch -> {
-            sb.append(StringUtils.contains(alpha, ch) ? (char)ch : ' ');
-        });
-
+        for (final char ch : Lists.charactersOf(s)) {
+            sb.append(StringUtils.contains(alpha, ch) ? ch : " ");
+        }
        return sb.toString().replaceAll("\\s+", " ");
    }

@ -165,7 +137,7 @@ public abstract class AbstractPaceFunctions {
    }

    protected String normalize(final String s) {
-        return fixAliases(transliterate(nfd(unicodeNormalization(s))))
+        return nfd(unicodeNormalization(s))
                .toLowerCase()
                // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input strings
                .replaceAll("[^ \\w]+", "")
@ -180,11 +152,6 @@ public abstract class AbstractPaceFunctions {
        return Normalizer.normalize(s, Normalizer.Form.NFD);
    }

-    public String utf8(final String s) {
-        byte[] bytes = s.getBytes(StandardCharsets.UTF_8);
-        return new String(bytes, StandardCharsets.UTF_8);
-    }
-
    public String unicodeNormalization(final String s) {

        Matcher m = hexUnicodePattern.matcher(s);
@ -218,7 +185,6 @@ public abstract class AbstractPaceFunctions {
        s = filterStopWords(s, stopwords_fr);
        s = filterStopWords(s, stopwords_pt);
        s = filterStopWords(s, stopwords_es);
-        s = filterStopWords(s, stopwords_gr);

        return s;
    }
@ -234,13 +200,10 @@ public abstract class AbstractPaceFunctions {
    }

    public static Set<String> loadFromClasspath(final String classpath) {
-
-        Transliterator transliterator = Transliterator.getInstance("Any-Eng");
-
        final Set<String> h = Sets.newHashSet();
        try {
-            for (final String s : IOUtils.readLines(NGramUtils.class.getResourceAsStream(classpath), StandardCharsets.UTF_8)) {
-                h.add(fixAliases(transliterator.transliterate(s))); //transliteration of the stopwords
+            for (final String s : IOUtils.readLines(NGramUtils.class.getResourceAsStream(classpath))) {
+                h.add(s);
            }
        } catch (final Throwable e) {
            return Sets.newHashSet();
@ -249,17 +212,14 @@ public abstract class AbstractPaceFunctions {
    }

    public static Map<String, String> loadMapFromClasspath(final String classpath) {
-
-        Transliterator transliterator = Transliterator.getInstance("Any-Eng");
-
        final Map<String, String> m = new HashMap<>();
        try {
-            for (final String s : IOUtils.readLines(AbstractPaceFunctions.class.getResourceAsStream(classpath), StandardCharsets.UTF_8)) {
+            for (final String s : IOUtils.readLines(AbstractPaceFunctions.class.getResourceAsStream(classpath))) {
                //string is like this: code;word1;word2;word3
                String[] line = s.split(";");
                String value = line[0];
                for (int i = 1; i < line.length; i++) {
-                    m.put(fixAliases(transliterator.transliterate(line[i].toLowerCase())), value);
+                    m.put(line[i].toLowerCase(), value);
                }
            }
        } catch (final Throwable e) {
@ -347,11 +307,10 @@ public abstract class AbstractPaceFunctions {
    public static <T> String readFromClasspath(final String filename, final Class<T> clazz) {
        final StringWriter sw = new StringWriter();
        try {
-            IOUtils.copy(clazz.getResourceAsStream(filename), sw, StandardCharsets.UTF_8);
+            IOUtils.copy(clazz.getResourceAsStream(filename), sw);
            return sw.toString();
        } catch (final IOException e) {
            throw new RuntimeException("cannot load resource from classpath: " + filename);
        }
    }
-
 }
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/Config.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/Config.java
@ -2,7 +2,6 @@ package eu.dnetlib.pace.config;

 import java.util.List;
 import java.util.Map;
-import java.util.regex.Pattern;

 import eu.dnetlib.pace.model.ClusteringDef;
 import eu.dnetlib.pace.model.FieldDef;
@ -48,7 +47,7 @@ public interface Config {
 	 *
 	 * @return the map
 	 */
-	public Map<String, List<Pattern>> blacklists();
+	public Map<String, List<String>> blacklists();


 	/**
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/DedupConfig.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/DedupConfig.java
@ -1,6 +1,5 @@
 package eu.dnetlib.pace.config;

-import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.Maps;
 import eu.dnetlib.pace.model.ClusteringDef;
@ -8,19 +7,15 @@ import eu.dnetlib.pace.model.FieldDef;
 import eu.dnetlib.pace.util.PaceException;
 import org.antlr.stringtemplate.StringTemplate;
 import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;

 import java.io.IOException;
 import java.io.Serializable;
-import java.nio.charset.StandardCharsets;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
-import java.util.regex.Pattern;
-import java.util.stream.Collectors;


 import eu.dnetlib.pace.tree.support.TreeNodeDef;
@ -36,9 +31,6 @@ public class DedupConfig implements Config, Serializable {

 	private WfConfig wf;

-	@JsonIgnore
-	private Map<String, List<Pattern>> blacklists;
-
 	private static Map<String, String> defaults = Maps.newHashMap();

 	static {
@ -65,12 +57,6 @@ public class DedupConfig implements Config, Serializable {
 			config = new ObjectMapper().readValue(json, DedupConfig.class);
 			config.getPace().initModel();
 			config.getPace().initTranslationMap();
-
-			config.blacklists = config.getPace().getBlacklists().entrySet()
-					.stream()
-					.collect(Collectors.toMap(e -> e.getKey(),
-							e ->e.getValue().stream().filter(s -> !StringUtils.isBlank(s)).map(Pattern::compile).collect(Collectors.toList()) ));
-
 			return config;
 		} catch (IOException e) {
 			throw new PaceException("Error in parsing configuration json", e);
@ -102,7 +88,7 @@ public class DedupConfig implements Config, Serializable {
 	}

 	private String readFromClasspath(final String resource) throws IOException {
-		return IOUtils.toString(getClass().getResource(resource), StandardCharsets.UTF_8);
+		return IOUtils.toString(getClass().getResource(resource));
 	}

 	public PaceConfig getPace() {
@ -151,8 +137,8 @@ public class DedupConfig implements Config, Serializable {
 	}

 	@Override
-	public Map<String, List<Pattern>> blacklists() {
-		return blacklists;
+	public Map<String, List<String>> blacklists() {
+		return getPace().getBlacklists();
 	}

 	@Override
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/PaceConfig.java
@ -3,7 +3,6 @@ package eu.dnetlib.pace.config;

 import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.google.common.collect.Maps;
-import com.ibm.icu.text.Transliterator;
 import eu.dnetlib.pace.common.AbstractPaceFunctions;
 import eu.dnetlib.pace.model.ClusteringDef;
 import eu.dnetlib.pace.model.FieldDef;
@ -44,12 +43,10 @@ public class PaceConfig extends AbstractPaceFunctions implements Serializable {

 	public void initTranslationMap(){
 		translationMap = Maps.newHashMap();
-
-		Transliterator transliterator = Transliterator.getInstance("Any-Eng");
 		for (String key : synonyms.keySet()) {
 			for (String term : synonyms.get(key)){
 				translationMap.put(
-						fixAliases(transliterator.transliterate(term.toLowerCase())),
+						normalize(term.toLowerCase()),
 				key);
 			}
 		}
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/Type.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/config/Type.java
@ -1,5 +1,5 @@
 package eu.dnetlib.pace.config;

 public enum Type {
-	String, Int, List, JSON, URL, StringConcat, DoubleArray
+	String, Int, List, JSON, URL
 }
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldValue.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldValue.java
@ -20,6 +20,4 @@ public interface FieldValue extends Field {
 	 */
 	public void setValue(final Object value);

-	public double[] doubleArrayValue();
-
 }
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldValueImpl.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldValueImpl.java
@ -58,10 +58,8 @@ public class FieldValueImpl extends AbstractField implements FieldValue {
 					throw new RuntimeException(value.toString());
 				}
 			case URL:
-				String str = value.toString();
-				return StringUtils.isBlank(str) || !isValidURL(str);
-			case DoubleArray:
-				return doubleArrayValue().length==0;
+			String str = value.toString();
+			return StringUtils.isBlank(str) || !isValidURL(str);
 		default:
 			return true;
 		}
@ -118,10 +116,6 @@ public class FieldValueImpl extends AbstractField implements FieldValue {
 		// }
 	}

-	public double[] doubleArrayValue() {
-		return (double[])getValue();
-	}
-
 	/*
 	 * (non-Javadoc)
 	 * 
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/Person.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/Person.java
@ -43,7 +43,7 @@ public class Person {
 			// s = s.replaceAll("[\\W&&[^,-]]", "");
 		}

-		if (s.contains(",")) {	//if the name contains a comma it is easy derivable the name and the surname
+		if (s.contains(",")) {
 			final String[] arr = s.split(",");
 			if (arr.length == 1) {
 				fullname = splitTerms(arr[0]);
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java
@ -1,154 +0,0 @@
-package eu.dnetlib.pace.tree;
-
-import com.google.common.collect.Iterables;
-import eu.dnetlib.pace.config.Config;
-import eu.dnetlib.pace.model.Field;
-import eu.dnetlib.pace.model.FieldList;
-import eu.dnetlib.pace.model.Person;
-import eu.dnetlib.pace.tree.support.AbstractComparator;
-import eu.dnetlib.pace.tree.support.ComparatorClass;
-import com.wcohen.ss.AbstractStringDistance;
-
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-import java.util.function.Function;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-@ComparatorClass("authorsMatch")
-public class AuthorsMatch extends AbstractComparator {
-
-    Map<String, String> params;
-
-    private double SURNAME_THRESHOLD;
-    private double NAME_THRESHOLD;
-    private double FULLNAME_THRESHOLD;
-    private String MODE; //full or surname
-    private int SIZE_THRESHOLD;
-    private String TYPE; //count or percentage
-    private int common;
-
-    public AuthorsMatch(Map<String, String> params){
-        super(params, new com.wcohen.ss.JaroWinkler());
-        this.params = params;
-
-        MODE = params.getOrDefault("mode", "full");
-        SURNAME_THRESHOLD = Double.parseDouble(params.getOrDefault("surname_th", "0.95"));
-        NAME_THRESHOLD = Double.parseDouble(params.getOrDefault("name_th", "0.95"));
-        FULLNAME_THRESHOLD = Double.parseDouble(params.getOrDefault("fullname_th", "0.9"));
-        SIZE_THRESHOLD = Integer.parseInt(params.getOrDefault("size_th", "20"));
-        TYPE = params.getOrDefault("type", "percentage");
-        common = 0;
-    }
-
-    protected AuthorsMatch(double w, AbstractStringDistance ssalgo) {
-        super(w, ssalgo);
-    }
-
-    @Override
-    public double compare(final Field a, final Field b, final Config conf) {
-
-        if (a.isEmpty() || b.isEmpty())
-            return -1;
-
-        if (((FieldList) a).size() > SIZE_THRESHOLD || ((FieldList) b).size() > SIZE_THRESHOLD)
-            return 1.0;
-
-        List<Person> aList = ((FieldList) a).stringList().stream().map(author -> new Person(author, false)).collect(Collectors.toList());
-        List<Person> bList = ((FieldList) b).stringList().stream().map(author -> new Person(author, false)).collect(Collectors.toList());
-
-        common = 0;
-        //compare each element of List1 with each element of List2
-        for (Person p1 : aList)
-
-            for (Person p2 : bList) {
-
-                //both persons are inaccurate
-                if (!p1.isAccurate() && !p2.isAccurate()) {
-                    //compare just normalized fullnames
-                    String fullname1 = normalization(p1.getNormalisedFullname().isEmpty()? p1.getOriginal() : p1.getNormalisedFullname());
-                    String fullname2 = normalization(p2.getNormalisedFullname().isEmpty()? p2.getOriginal() : p2.getNormalisedFullname());
-
-                    if (ssalgo.score(fullname1, fullname2) > FULLNAME_THRESHOLD) {
-                        common += 1;
-                        break;
-                    }
-                }
-
-                //one person is inaccurate
-                if (p1.isAccurate() ^ p2.isAccurate()) {
-                    //prepare data
-                    //data for the accurate person
-                    String name = normalization(p1.isAccurate()? p1.getNormalisedFirstName() : p2.getNormalisedFirstName());
-                    String surname = normalization(p1.isAccurate()? p1.getNormalisedSurname() : p2.getNormalisedSurname());
-
-                    //data for the inaccurate person
-                    String fullname = normalization(
-                            p1.isAccurate() ? ((p2.getNormalisedFullname().isEmpty()) ? p2.getOriginal() : p2.getNormalisedFullname()) : (p1.getNormalisedFullname().isEmpty() ? p1.getOriginal() : p1.getNormalisedFullname())
-                    );
-
-                    if (fullname.contains(surname)) {
-                        if (MODE.equals("full")) {
-                            if (fullname.contains(name)) {
-                                common += 1;
-                                break;
-                            }
-                        }
-                        else { //MODE equals "surname"
-                            common += 1;
-                            break;
-                        }
-                    }
-                }
-
-                //both persons are accurate
-                if (p1.isAccurate() && p2.isAccurate()) {
-
-                    if (compareSurname(p1, p2)) {
-                        if (MODE.equals("full")) {
-                            if(compareFirstname(p1, p2)) {
-                                common += 1;
-                                break;
-                            }
-                        }
-                        else { //MODE equals "surname"
-                            common += 1;
-                            break;
-                        }
-                    }
-
-                }
-
-            }
-
-        //normalization factor to compute the score
-        int normFactor = aList.size() == bList.size() ? aList.size() : (aList.size() + bList.size() - common);
-
-        if(TYPE.equals("percentage")) {
-            return (double) common / normFactor;
-        }
-        else {
-            return (double) common;
-        }
-    }
-
-    public boolean compareSurname(Person p1, Person p2) {
-        return ssalgo.score(normalization(p1.getNormalisedSurname()), normalization(p2.getNormalisedSurname())) > SURNAME_THRESHOLD;
-    }
-
-    public boolean compareFirstname(Person p1, Person p2) {
-
-        if(p1.getNormalisedFirstName().length()<=2 || p2.getNormalisedFirstName().length()<=2) {
-            if (firstLC(p1.getNormalisedFirstName()).equals(firstLC(p2.getNormalisedFirstName())))
-                return true;
-        }
-
-        return ssalgo.score(normalization(p1.getNormalisedFirstName()), normalization(p2.getNormalisedFirstName())) > NAME_THRESHOLD;
-    }
-
-    public String normalization(String s) {
-        return normalize(utf8(cleanup(s)));
-    }
-
-}
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/ContainsMatch.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/ContainsMatch.java
@ -0,0 +1,57 @@
+package eu.dnetlib.pace.tree;
+
+import eu.dnetlib.pace.config.Config;
+import eu.dnetlib.pace.tree.support.AbstractComparator;
+import eu.dnetlib.pace.tree.support.ComparatorClass;
+
+import java.util.Map;
+
+/**
+ * The Class Contains match
+ *
+ * @author miconis
+ * */
+@ComparatorClass("containsMatch")
+public class ContainsMatch extends AbstractComparator {
+
+    private Map<String, String> params;
+
+    public ContainsMatch(Map<String, String> params) {
+        super(params);
+        this.params = params;
+    }
+
+    @Override
+    public double distance(final String a, final String b, final Config conf) {
+
+        //read parameters
+        boolean caseSensitive = Boolean.parseBoolean(params.getOrDefault("caseSensitive", "false"));
+        String string = params.get("string");
+        String agg = params.get("bool");
+
+        String ca = a;
+        String cb = b;
+        if (!caseSensitive) {
+            ca = a.toLowerCase();
+            cb = b.toLowerCase();
+        }
+
+        switch(agg) {
+            case "AND":
+                if(ca.contains(string) && cb.contains(string))
+                    return 1.0;
+                break;
+            case "OR":
+                if(ca.contains(string) || cb.contains(string))
+                    return 1.0;
+                break;
+            case "XOR":
+                if(ca.contains(string) ^ cb.contains(string))
+                    return 1.0;
+                break;
+            default:
+                return 0.0;
+        }
+        return 0.0;
+    }
+}
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/CosineSimilarity.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/CosineSimilarity.java
@ -1,53 +0,0 @@
-package eu.dnetlib.pace.tree;
-
-import eu.dnetlib.pace.config.Config;
-import eu.dnetlib.pace.model.Field;
-import eu.dnetlib.pace.model.FieldList;
-import eu.dnetlib.pace.model.FieldValueImpl;
-import eu.dnetlib.pace.model.Person;
-import eu.dnetlib.pace.tree.support.AbstractComparator;
-import eu.dnetlib.pace.tree.support.ComparatorClass;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
-
-@ComparatorClass("cosineSimilarity")
-public class CosineSimilarity extends AbstractComparator {
-
-    Map<String, String> params;
-
-    public CosineSimilarity(Map<String,String> params) {
-        super(params);
-    }
-
-    @Override
-    public double compare(final Field a, final Field b, final Config conf) {
-
-        if (a.isEmpty() || b.isEmpty())
-            return -1;
-
-        double[] aVector = ((FieldValueImpl) a).doubleArrayValue();
-        double[] bVector = ((FieldValueImpl) b).doubleArrayValue();
-
-        return cosineSimilarity(aVector, bVector);
-    }
-
-    double cosineSimilarity(double[] a, double[] b) {
-        double dotProduct = 0;
-        double normASum = 0;
-        double normBSum = 0;
-
-        for(int i = 0; i < a.length; i ++) {
-            dotProduct += a[i] * b[i];
-            normASum += a[i] * a[i];
-            normBSum += b[i] * b[i];
-        }
-
-        double eucledianDist = Math.sqrt(normASum) * Math.sqrt(normBSum);
-        return dotProduct / eucledianDist;
-    }
-
-
-}
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/DomainExactMatch.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/DomainExactMatch.java
@ -16,7 +16,6 @@ public class DomainExactMatch extends ExactMatchIgnoreCase {

    @Override
    protected String getValue(final Field f) {
-
        try {
            return asUrl(super.getValue(f)).getHost();
        } catch (MalformedURLException e) {
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/InstanceTypeMatch.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/InstanceTypeMatch.java
@ -1,84 +0,0 @@
-package eu.dnetlib.pace.tree;
-
-import com.google.common.collect.Sets;
-import eu.dnetlib.pace.config.Config;
-import eu.dnetlib.pace.model.Field;
-import eu.dnetlib.pace.model.FieldList;
-import eu.dnetlib.pace.tree.support.AbstractComparator;
-import eu.dnetlib.pace.tree.support.ComparatorClass;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-
-@ComparatorClass("instanceTypeMatch")
-public class InstanceTypeMatch extends AbstractComparator {
-
-    final Map<String, String> translationMap = new HashMap<>();
-
-    public InstanceTypeMatch(Map<String, String> params){
-        super(params);
-
-        //jolly types
-        translationMap.put("Conference object", "*");
-        translationMap.put("Other literature type", "*");
-        translationMap.put("Unknown", "*");
-
-        //article types
-        translationMap.put("Article", "Article");
-        translationMap.put("Data Paper", "Article");
-        translationMap.put("Software Paper", "Article");
-        translationMap.put("Preprint", "Article");
-
-        //thesis types
-        translationMap.put("Thesis", "Thesis");
-        translationMap.put("Master thesis", "Thesis");
-        translationMap.put("Bachelor thesis", "Thesis");
-        translationMap.put("Doctoral thesis", "Thesis");
-    }
-
-
-    @Override
-    public double compare(final Field a, final Field b, final Config conf) {
-
-        if (a == null || b == null) {
-            return -1;
-        }
-
-        final List<String> sa = ((FieldList) a).stringList();
-        final List<String> sb = ((FieldList) b).stringList();
-
-        if (sa.isEmpty() || sb.isEmpty()) {
-            return -1;
-        }
-
-        final Set<String> ca = sa.stream().map(this::translate).collect(Collectors.toSet());
-        final Set<String> cb = sb.stream().map(this::translate).collect(Collectors.toSet());
-
-        //if at least one is a jolly type, it must produce a match
-        if (ca.contains("*") || cb.contains("*"))
-            return 1.0;
-
-        int incommon = Sets.intersection(ca, cb).size();
-
-        //if at least one is in common, it must produce a match
-        return incommon >= 1 ? 1 : 0;
-    }
-
-    public String translate(String term){
-        return translationMap.getOrDefault(term, term);
-    }
-
-    @Override
-    public double getWeight() {
-        return super.weight;
-    }
-
-    @Override
-    protected double normalize(final double d) {
-        return d;
-    }
-
-}
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java
@ -50,9 +50,6 @@ public class JaroWinklerNormalizedName extends AbstractComparator {
        cb = removeKeywords(cb, keywords2);
        cb = removeKeywords(cb, cities2);

-        ca = ca.replaceAll("[ ]{2,}", " ");
-        cb = cb.replaceAll("[ ]{2,}", " ");
-
        if (ca.isEmpty() && cb.isEmpty())
            return 1.0;
        else
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/JsonListMatch.java
@ -21,13 +21,9 @@ public class JsonListMatch extends AbstractComparator {
    private static final Log log = LogFactory.getLog(JsonListMatch.class);
    private Map<String, String> params;

-    private String MODE; //"percentage" or "count"
-
    public JsonListMatch(final Map<String, String> params) {
        super(params);
        this.params = params;
-
-        MODE = params.getOrDefault("mode", "percentage");
    }

    @Override
@ -50,10 +46,7 @@ public class JsonListMatch extends AbstractComparator {
            return 0.0;
        }

-        if (MODE.equals("percentage"))
-            return (double)incommon / (incommon + simDiff);
-        else
-            return incommon;
+        return (double)incommon / (incommon + simDiff);

    }

--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/ListContainsMatch.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/ListContainsMatch.java
@ -1,74 +0,0 @@
-package eu.dnetlib.pace.tree;
-
-import com.google.common.collect.Sets;
-import eu.dnetlib.pace.config.Config;
-import eu.dnetlib.pace.model.Field;
-import eu.dnetlib.pace.model.FieldList;
-import eu.dnetlib.pace.tree.support.AbstractComparator;
-import eu.dnetlib.pace.tree.support.ComparatorClass;
-
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-
-/**
- * The Class Contains match
- *
- * @author miconis
- * */
-@ComparatorClass("listContainsMatch")
-public class ListContainsMatch extends AbstractComparator {
-
-    private Map<String, String> params;
-    private boolean CASE_SENSITIVE;
-    private String STRING;
-    private String AGGREGATOR;
-
-    public ListContainsMatch(Map<String, String> params) {
-        super(params);
-        this.params = params;
-
-        //read parameters
-        CASE_SENSITIVE = Boolean.parseBoolean(params.getOrDefault("caseSensitive", "false"));
-        STRING = params.get("string");
-        AGGREGATOR = params.get("bool");
-    }
-
-    @Override
-    public double compare(final Field a, final Field b, final Config conf) {
-
-        List<String> sa = ((FieldList) a).stringList();
-        List<String> sb = ((FieldList) b).stringList();
-
-        if (sa.isEmpty() || sb.isEmpty()) {
-            return -1;
-        }
-
-        if (!CASE_SENSITIVE) {
-            sa = sa.stream().map(String::toLowerCase).collect(Collectors.toList());
-            sb = sb.stream().map(String::toLowerCase).collect(Collectors.toList());
-            STRING = STRING.toLowerCase();
-        }
-
-        switch(AGGREGATOR) {
-            case "AND":
-                if(sa.contains(STRING) && sb.contains(STRING))
-                    return 1.0;
-                break;
-            case "OR":
-                if(sa.contains(STRING) || sb.contains(STRING))
-                    return 1.0;
-                break;
-            case "XOR":
-                if(sa.contains(STRING) ^ sb.contains(STRING))
-                    return 1.0;
-                break;
-            default:
-                return 0.0;
-        }
-        return 0.0;
-
-    }
-}
-
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/NumbersComparator.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/NumbersComparator.java
@ -1,34 +0,0 @@
-package eu.dnetlib.pace.tree;
-
-import eu.dnetlib.pace.config.Config;
-import eu.dnetlib.pace.tree.support.AbstractComparator;
-import eu.dnetlib.pace.tree.support.ComparatorClass;
-
-import java.util.Map;
-
-@ComparatorClass("numbersComparator")
-public class NumbersComparator extends AbstractComparator {
-
-    Map<String, String> params;
-
-    public NumbersComparator(Map<String, String> params) {
-        super(params);
-        this.params = params;
-    }
-
-    @Override
-    public double distance(String a, String b, Config conf) {
-
-        //extracts numbers from the field
-        String numbers1 = getNumbers(nfd(a));
-        String numbers2 = getNumbers(nfd(b));
-
-        if (numbers1.isEmpty() || numbers2.isEmpty())
-            return -1.0;
-
-        int n1 = Integer.parseInt(numbers1);
-        int n2 = Integer.parseInt(numbers2);
-
-        return Math.abs(n1 - n2);
-    }
-}
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/StringContainsMatch.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/StringContainsMatch.java
@ -1,66 +0,0 @@
-package eu.dnetlib.pace.tree;
-
-import eu.dnetlib.pace.config.Config;
-import eu.dnetlib.pace.tree.support.AbstractComparator;
-import eu.dnetlib.pace.tree.support.ComparatorClass;
-
-import java.util.Map;
-
-/**
- * The Class Contains match
- *
- * @author miconis
- * */
-@ComparatorClass("stringContainsMatch")
-public class StringContainsMatch extends AbstractComparator {
-
-    private Map<String, String> params;
-
-    private boolean CASE_SENSITIVE;
-    private String STRING;
-    private String AGGREGATOR;
-
-    public StringContainsMatch(Map<String, String> params) {
-        super(params);
-        this.params = params;
-
-        //read parameters
-        CASE_SENSITIVE = Boolean.parseBoolean(params.getOrDefault("caseSensitive", "false"));
-        STRING = params.get("string");
-        AGGREGATOR = params.get("aggregator");
-
-    }
-
-    @Override
-    public double distance(final String a, final String b, final Config conf) {
-
-        String ca = a;
-        String cb = b;
-        if (!CASE_SENSITIVE) {
-            ca = a.toLowerCase();
-            cb = b.toLowerCase();
-            STRING = STRING.toLowerCase();
-        }
-
-        if (AGGREGATOR != null) {
-            switch (AGGREGATOR) {
-                case "AND":
-                    if (ca.contains(STRING) && cb.contains(STRING))
-                        return 1.0;
-                    break;
-                case "OR":
-                    if (ca.contains(STRING) || cb.contains(STRING))
-                        return 1.0;
-                    break;
-                case "XOR":
-                    if (ca.contains(STRING) ^ cb.contains(STRING))
-                        return 1.0;
-                    break;
-                default:
-                    return 0.0;
-            }
-        }
-
-        return 0.0;
-    }
-}
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/StringListMatch.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/StringListMatch.java
@ -19,13 +19,9 @@ public class StringListMatch extends AbstractComparator {
    private static final Log log = LogFactory.getLog(StringListMatch.class);
    private Map<String, String> params;

-    final private String TYPE; //percentage or count
-
    public StringListMatch(final Map<String, String> params) {
        super(params);
        this.params = params;
-
-        TYPE = params.getOrDefault("type", "percentage");
    }

    @Override
@ -35,7 +31,7 @@ public class StringListMatch extends AbstractComparator {
        final Set<String> pb = new HashSet<>(((FieldList) b).stringList());

        if (pa.isEmpty() || pb.isEmpty()) {
-            return -1;  //return undefined if one of the two lists is empty
+            return -1;  //return undefined if one of the two lists of pids is empty
        }

        int incommon = Sets.intersection(pa, pb).size();
@ -45,10 +41,7 @@ public class StringListMatch extends AbstractComparator {
            return 0.0;
        }

-        if(TYPE.equals("percentage"))
-            return (double)incommon / (incommon + simDiff);
-        else
-            return incommon;
+        return (double)incommon / (incommon + simDiff);

    }
 }
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeDef.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeDef.java
@ -1,6 +1,5 @@
 package eu.dnetlib.pace.tree.support;

-import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.pace.config.Config;
 import eu.dnetlib.pace.config.PaceConfig;
@ -10,7 +9,6 @@ import org.apache.commons.lang3.StringUtils;

 import java.io.IOException;
 import java.io.Serializable;
-import java.io.StringWriter;
 import java.util.List;

 public class TreeNodeDef implements Serializable {
@ -59,9 +57,8 @@ public class TreeNodeDef implements Serializable {
                double result2 = comparator(fieldConf).compare(doc1.getFieldMap().get(crossField), doc2.getFieldMap().get(fieldConf.getField()), conf);
                result = Math.max(result1,result2);
            }
-            else {
+            else
                result = comparator(fieldConf).compare(doc1.getFieldMap().get(fieldConf.getField()), doc2.getFieldMap().get(fieldConf.getField()), conf);
-            }

            stats.addFieldStats(
                    fieldConf.getComparator() + " on " + fieldConf.getField() + " " + fields.indexOf(fieldConf),
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessorForTesting.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessorForTesting.java
@ -1,248 +0,0 @@
-package eu.dnetlib.pace.util;
-
-import com.google.common.collect.Lists;
-import eu.dnetlib.pace.clustering.NGramUtils;
-import eu.dnetlib.pace.config.DedupConfig;
-import eu.dnetlib.pace.config.WfConfig;
-import eu.dnetlib.pace.model.Field;
-import eu.dnetlib.pace.model.MapDocument;
-import eu.dnetlib.pace.model.MapDocumentComparator;
-import eu.dnetlib.pace.tree.*;
-import eu.dnetlib.pace.tree.support.TreeProcessor;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-import java.util.*;
-
-public class BlockProcessorForTesting {
-
-        public static final List<String> accumulators= new ArrayList<>();
-
-        private static final Log log = LogFactory.getLog(eu.dnetlib.pace.util.BlockProcessorForTesting.class);
-
-        private DedupConfig dedupConf;
-
-        public static void constructAccumulator( final DedupConfig dedupConf) {
-            accumulators.add(String.format("%s::%s",dedupConf.getWf().getEntityType(), "records per hash key = 1"));
-            accumulators.add(String.format("%s::%s",dedupConf.getWf().getEntityType(), "missing " + dedupConf.getWf().getOrderField()));
-            accumulators.add(String.format("%s::%s",dedupConf.getWf().getEntityType(), String.format("Skipped records for count(%s) >= %s", dedupConf.getWf().getOrderField(), dedupConf.getWf().getGroupMaxSize())));
-            accumulators.add(String.format("%s::%s",dedupConf.getWf().getEntityType(), "skip list"));
-            accumulators.add(String.format("%s::%s",dedupConf.getWf().getEntityType(), "dedupSimilarity (x2)"));
-            accumulators.add(String.format("%s::%s",dedupConf.getWf().getEntityType(), "d < " + dedupConf.getWf().getThreshold()));
-        }
-
-        public BlockProcessorForTesting(DedupConfig dedupConf) {
-            this.dedupConf = dedupConf;
-        }
-
-        public void processSortedBlock(final String key, final List<MapDocument> documents, final Reporter context, boolean useTree, boolean noMatch)  {
-            if (documents.size() > 1) {
-//            log.info("reducing key: '" + key + "' records: " + q.size());
-                process(prepare(documents), context, useTree, noMatch);
-
-            } else {
-                context.incrementCounter(dedupConf.getWf().getEntityType(), "records per hash key = 1", 1);
-            }
-        }
-
-        public void process(final String key, final Iterable<MapDocument> documents, final Reporter context, boolean useTree, boolean noMatch)  {
-
-            final Queue<MapDocument> q = prepare(documents);
-
-            if (q.size() > 1) {
-//            log.info("reducing key: '" + key + "' records: " + q.size());
-                process(simplifyQueue(q, key, context), context, useTree, noMatch);
-
-            } else {
-                context.incrementCounter(dedupConf.getWf().getEntityType(), "records per hash key = 1", 1);
-            }
-        }
-
-        private Queue<MapDocument> prepare(final Iterable<MapDocument> documents) {
-            final Queue<MapDocument> queue = new PriorityQueue<>(100, new MapDocumentComparator(dedupConf.getWf().getOrderField()));
-
-            final Set<String> seen = new HashSet<String>();
-            final int queueMaxSize = dedupConf.getWf().getQueueMaxSize();
-
-            documents.forEach(doc -> {
-                if (queue.size() <= queueMaxSize) {
-                    final String id = doc.getIdentifier();
-
-                    if (!seen.contains(id)) {
-                        seen.add(id);
-                        queue.add(doc);
-                    }
-                }
-            });
-
-            return queue;
-        }
-
-        private Queue<MapDocument> simplifyQueue(final Queue<MapDocument> queue, final String ngram, final Reporter context) {
-            final Queue<MapDocument> q = new LinkedList<>();
-
-            String fieldRef = "";
-            final List<MapDocument> tempResults = Lists.newArrayList();
-
-            while (!queue.isEmpty()) {
-                final MapDocument result = queue.remove();
-
-                final String orderFieldName = dedupConf.getWf().getOrderField();
-                final Field orderFieldValue = result.values(orderFieldName);
-                if (!orderFieldValue.isEmpty()) {
-                    final String field = NGramUtils.cleanupForOrdering(orderFieldValue.stringValue());
-                    if (field.equals(fieldRef)) {
-                        tempResults.add(result);
-                    } else {
-                        populateSimplifiedQueue(q, tempResults, context, fieldRef, ngram);
-                        tempResults.clear();
-                        tempResults.add(result);
-                        fieldRef = field;
-                    }
-                } else {
-                    context.incrementCounter(dedupConf.getWf().getEntityType(), "missing " + dedupConf.getWf().getOrderField(), 1);
-                }
-            }
-            populateSimplifiedQueue(q, tempResults, context, fieldRef, ngram);
-
-            return q;
-        }
-
-        private void populateSimplifiedQueue(final Queue<MapDocument> q,
-                                             final List<MapDocument> tempResults,
-                                             final Reporter context,
-                                             final String fieldRef,
-                                             final String ngram) {
-            WfConfig wf = dedupConf.getWf();
-            if (tempResults.size() < wf.getGroupMaxSize()) {
-                q.addAll(tempResults);
-            } else {
-                context.incrementCounter(wf.getEntityType(), String.format("Skipped records for count(%s) >= %s", wf.getOrderField(), wf.getGroupMaxSize()), tempResults.size());
-//            log.info("Skipped field: " + fieldRef + " - size: " + tempResults.size() + " - ngram: " + ngram);
-            }
-        }
-
-        private void process(final Queue<MapDocument> queue, final Reporter context, boolean useTree, boolean noMatch)  {
-
-            while (!queue.isEmpty()) {
-
-                final MapDocument pivot = queue.remove();
-                final String idPivot = pivot.getIdentifier();
-
-                WfConfig wf = dedupConf.getWf();
-                final Field fieldsPivot = pivot.values(wf.getOrderField());
-                final String fieldPivot = (fieldsPivot == null) || fieldsPivot.isEmpty() ? "" : fieldsPivot.stringValue();
-
-                if (fieldPivot != null) {
-                    int i = 0;
-                    for (final MapDocument curr : queue) {
-                        final String idCurr = curr.getIdentifier();
-
-                        if (mustSkip(idCurr)) {
-
-                            context.incrementCounter(wf.getEntityType(), "skip list", 1);
-
-                            break;
-                        }
-
-                        if (i > wf.getSlidingWindowSize()) {
-                            break;
-                        }
-
-                        final Field fieldsCurr = curr.values(wf.getOrderField());
-                        final String fieldCurr = (fieldsCurr == null) || fieldsCurr.isEmpty() ? null : fieldsCurr.stringValue();
-
-                        if (!idCurr.equals(idPivot) && (fieldCurr != null)) {
-
-                            //draws no match relations (test purpose)
-                            if (noMatch) {
-                                emitOutput(!new TreeProcessor(dedupConf).compare(pivot, curr), idPivot, idCurr, context);
-                            }
-                            else {
-                                //use the decision tree implementation or the "normal" implementation of the similarity score (valid only for publications)
-                                if (useTree)
-                                    emitOutput(new TreeProcessor(dedupConf).compare(pivot, curr), idPivot, idCurr, context);
-                                else
-                                    emitOutput(publicationCompare(pivot, curr, dedupConf), idPivot, idCurr, context);
-                            }
-//                            if(new TreeProcessor(dedupConf).compare(pivot, curr) != publicationCompare(pivot, curr, dedupConf)) {
-//                                emitOutput(true, idPivot, idCurr, context);
-//                            }
-
-                        }
-                    }
-                }
-            }
-        }
-
-    protected static boolean compareInstanceType(MapDocument a, MapDocument b, DedupConfig conf) {
-        Map<String, String> params = new HashMap<>();
-        InstanceTypeMatch instanceTypeMatch = new InstanceTypeMatch(params);
-        double compare = instanceTypeMatch.compare(a.getFieldMap().get("instance"), b.getFieldMap().get("instance"), conf);
-        return compare>=1.0;
-    }
-
-    private boolean publicationCompare(MapDocument a, MapDocument b, DedupConfig config) {
-        //if the score gives 1, the publications are equivalent
-        Map<String, String> params = new HashMap<>();
-        params.put("jpath_value", "$.value");
-        params.put("jpath_classid", "$.qualifier.classid");
-        params.put("mode", "count");
-
-        double score = 0.0;
-
-        //levenstein title
-        LevensteinTitle levensteinTitle = new LevensteinTitle(params);
-        if(levensteinTitle.compare(a.getFieldMap().get("title"), b.getFieldMap().get("title"), config) >= 0.9) {
-            score += 0.2;
-        }
-
-        //pid
-        JsonListMatch jsonListMatch = new JsonListMatch(params);
-        if (jsonListMatch.compare(a.getFieldMap().get("pid"), b.getFieldMap().get("pid"), config) >= 1.0) {
-            score += 0.5;
-        }
-
-        //title version
-        TitleVersionMatch titleVersionMatch = new TitleVersionMatch(params);
-        double result1 = titleVersionMatch.compare(a.getFieldMap().get("title"), b.getFieldMap().get("title"), config);
-        if(result1<0 || result1>=1.0) {
-            score += 0.1;
-        }
-
-        //authors match
-        params.remove("mode");
-        AuthorsMatch authorsMatch = new AuthorsMatch(params);
-        double result2 = authorsMatch.compare(a.getFieldMap().get("authors"), b.getFieldMap().get("authors"), config);
-        if(result2 <0|| result2>=0.6) {
-            score += 0.2;
-        }
-
-        return score>=0.5;
-    }
-
-        private void emitOutput(final boolean result, final String idPivot, final String idCurr, final Reporter context)  {
-
-            if (result) {
-                writeSimilarity(context, idPivot, idCurr);
-                context.incrementCounter(dedupConf.getWf().getEntityType(), "dedupSimilarity (x2)", 1);
-            } else {
-                context.incrementCounter(dedupConf.getWf().getEntityType(), "d < " + dedupConf.getWf().getThreshold(), 1);
-            }
-        }
-
-        private boolean mustSkip(final String idPivot) {
-            return dedupConf.getWf().getSkipList().contains(getNsPrefix(idPivot));
-        }
-
-        private String getNsPrefix(final String id) {
-            return StringUtils.substringBetween(id, "|", "::");
-        }
-
-        private void writeSimilarity(final Reporter context, final String from, final String to)  {
-            final String type = dedupConf.getWf().getEntityType();
-
-            context.emit(type, from, to);
-        }
-}
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java
@ -7,13 +7,14 @@ import com.jayway.jsonpath.JsonPath;
 import com.jayway.jsonpath.Option;
 import eu.dnetlib.pace.config.DedupConfig;
 import eu.dnetlib.pace.config.Type;
-import eu.dnetlib.pace.model.*;
+import eu.dnetlib.pace.model.Field;
+import eu.dnetlib.pace.model.FieldListImpl;
+import eu.dnetlib.pace.model.FieldValueImpl;
+import eu.dnetlib.pace.model.MapDocument;
 import net.minidev.json.JSONArray;

-import java.math.BigDecimal;
 import java.util.*;
 import java.util.function.Predicate;
-import java.util.stream.Collectors;

 public class MapDocumentUtil {

@ -44,25 +45,6 @@ public class MapDocumentUtil {
                            .forEach(fi::add);
                    stringField.put(fdef.getName(), fi);
                    break;
-                case DoubleArray:
-                    stringField.put(
-                            fdef.getName(),
-                            new FieldValueImpl(Type.DoubleArray,
-                                    fdef.getName(),
-                                    getJPathArray(fdef.getPath(), json))
-                    );
-                    break;
-                case StringConcat:
-                    String[] jpaths = fdef.getPath().split("\\|\\|\\|");
-                    stringField.put(
-                            fdef.getName(),
-                            new FieldValueImpl(Type.String,
-                                    fdef.getName(),
-                                    truncateValue(Arrays.stream(jpaths).map(jpath -> getJPathString(jpath, json)).collect(Collectors.joining(" ")),
-                                            fdef.getLength())
-                            )
-                    );
-                    break;
            }
        });
        m.setFieldMap(stringField);
@ -121,30 +103,6 @@ public class MapDocumentUtil {
        }
    }

-    public static double[] getJPathArray(final String jsonPath, final String json) {
-        try {
-            Object o = JsonPath.read(json, jsonPath);
-            if (o instanceof double[])
-                return (double[]) o;
-            if (o instanceof JSONArray) {
-                Object[] objects = ((JSONArray) o).toArray();
-                double[] array = new double[objects.length];
-                for (int i = 0; i < objects.length; i++) {
-                    if (objects[i] instanceof BigDecimal)
-                        array[i] = ((BigDecimal)objects[i]).doubleValue();
-                    else
-                        array[i] = (double) objects[i];
-                }
-                return array;
-            }
-            return new double[0];
-        }
-        catch (Exception e) {
-            e.printStackTrace();
-            return new double[0];
-        }
-    }
-

    public static String truncateValue(String value, int length) {
        if (value == null)
--- a/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/city_map.csv
+++ b/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/city_map.csv
@ -909,6 +909,7 @@ city::2389086;Berberati;BBT;Berberati;Berbérati;Берберати;
 city::2389853;Bangui;BGF;Bangi;Bangis;Bangui;Mpan'nkoui;ban ji;bang-gi;bangi;bangwyy;Μπανγκουί;Банги;Бангі;בנגואי;بانگوئی;ባንጊ;バンギ;班基;방기;
 city::2255414;Pointe-Noire;PNR;Pointe-Noire;Puehnt-Nuar;Puent Nuaras;puaengteunualeu;Пуэнт-Нуар;푸앵트누아르;
 city::2258261;Dolisie;DIS;Dolisi;Dolisie;Dolisje;Dolizi;Dolosie;Loubomo;Lubomo;dolliji;dorishi;duo li xi;dwlysy;Долиси;Лубомо;دولیسی;ドリシー;多利西;돌리지;
+city::2259383;Kayes;Jacob;Kai;Kajes;Kaye;Kayes;Kaï;Кайес;
 city::2260535;Brazzaville;BZV;Braza;Brazavil;Brazavilis;Brazavilo;Brazzavil';Brazzaville;Maya-Maya;Mprazabil;N'Tamo;beulajabil;brazafyl;brazawyl;brzwwyl;bu la chai wei er;burazavu~iru;Μπραζαβίλ;Браззавиль;ברזוויל;برازافيل;برازاویل;ብራዛቪል;ブラザヴィル;布拉柴维尔;브라자빌;
 city::2657896;Zurich;Cirihe;Cirikh;Ciurichas;Cjurikh;Cjurikh khot;Cuerih;Curych;Cürih;Cīrihe;Gorad Cjurykh;Lungsod ng Zuerich;Lungsod ng Zürich;Su-la-sie;Suerix;Syurix;Sürix;Sŭ-là̤-sié;Tsuerix;Tsurique;Tsürix;Turicum;Turitg;ZRH;Zeurich;Zirich;Zirik;Zuerich;Zuerigh;Zuerih;Zuric;Zurich;Zuricu;Zurigh;Zurigo;Zuriko;Zurique;Zurych;Zurìcu;Zyriche;Zyrihu;Zúric;Zúrich;Zürich;Zürigh;Zürih;churihhi;chwilihi;curikku;jhyurika;jurikha;su li shi;su rik;suricc;tsiurikhi;tsyryk;zi'urikha;zwrykh;zyryk;zyurikha;zywrch;zywrh;zywrkh;Ζυρίχη;Горад Цюрых;Цирих;Цюрих;Цюрих хот;Ցյուրիխ;ציריך;زوريخ;زوریخ;زيورخ;زیورخ;زیورچ;سيۇرىخ;څوریخ;ܙܝܘܪܚ;ܬܣܝܪܝܟ;ज़्यूरिख़;झ्युरिक;জুরিখ;ਜ਼ਿਊਰਿਖ;சூரிக்கு;സൂറിച്ച്;ซูริก;ဇူးရစ်ချ်မြို့;ციურიხი;ዙሪክ;チューリッヒ;苏黎世;蘇黎世;취리히;
 city::2657970;Winterthur;Eulachstadt;Gorad Vintehrtur;Vintertour;Vintertur;Vintertura;Vinterturas;Vinterturi;Vinterturo;Vintertūra;Vintertūras;Vitudurum;Winterthour;Winterthur;ZLI;binteotueo;fyntrtwr;vu~intato~uru;wen te tu er;wntrtwr;Βίντερτουρ;Винтертур;Вінтертур;Горад Вінтэртур;فينترتور;ونترتور;ونٹرتھر;ვინტერთური;ヴィンタートゥール;温特图尔;빈터투어;
@ -2993,7 +2994,7 @@ city::262036;Glyfada;Aixone;Glifadha;Glifádha;Glyfada;Glyfáda;Γλυφάδα;
 city::262135;Galatsi;Galatsi;Galatsion;Galátsi;Galátsion;Γαλάτσι;Γαλάτσιον;
 city::263986;Agios Dimitrios;Agios Dimitrios;Ayios Dhimitrios;Brakhami;Brakhámi;Áyios Dhimítrios;Άγιος Δημήτριος;
 city::264194;Agia Paraskevi;Agia Paraskeue;Agia Paraskevi;Agía Paraskeví;Ayia Paraskevi;Ayía Paraskeví;Αγία Παρασκευή;
-city::264371;Athens;athenon;ATH;Afina;Afini;Afiny;An Aithin;Ateena;Atehny;Aten;Atena;Atenai;Atenas;Atenas - Athena;Atenas - Αθήνα;Αθηνών;Atene;Atenes;Ateni;Ateno;Atenoj;Ateny;Athen;Athena;Athenae;Athenai;Athene;Athenes;Athens;Atheny;Athina;Athinai;Athinia;Athènes;Athén;Athénes;Athény;Athína;Athínai;Atina;Atény;Atēnas;Atėnai;Aþena;Kota Athena;Lungsod ng Athina;Lungsod ng Athína;atene;atene si;ateni;athensa;athyna;atn;etens;xethens;ya dian;Αθήνα;Αθήναι;Αθηνα;Αθηναι;Атина;Атэны;Афины;Афіни;Аѳины;Աթենք;אתונה;آتن;أثينا;ئافېنا;ܐܬܝܢܐ;अथेन्स;ஏதென்ஸ்;เอเธนส์;ათენი;Ἀθῆναι;アテネ;雅典;아테네;아테네 시;
+city::264371;Athens;ATH;Afina;Afini;Afiny;An Aithin;Ateena;Atehny;Aten;Atena;Atenai;Atenas;Atenas - Athena;Atenas - Αθήνα;Atene;Atenes;Ateni;Ateno;Atenoj;Ateny;Athen;Athena;Athenae;Athenai;Athene;Athenes;Athens;Atheny;Athina;Athinai;Athinia;Athènes;Athén;Athénes;Athény;Athína;Athínai;Atina;Atény;Atēnas;Atėnai;Aþena;Kota Athena;Lungsod ng Athina;Lungsod ng Athína;atene;atene si;ateni;athensa;athyna;atn;etens;xethens;ya dian;Αθήνα;Αθήναι;Αθηνα;Αθηναι;Атина;Атэны;Афины;Афіни;Аѳины;Աթենք;אתונה;آتن;أثينا;ئافېنا;ܐܬܝܢܐ;अथेन्स;ஏதென்ஸ்;เอเธนส์;ათენი;Ἀθῆναι;アテネ;雅典;아테네;아테네 시;
 city::265243;Marousi;Amarousio;Amarousion;Amaroúsion;Marousi;Maroussi;Maroúsi;Αμαρούσιον;Μαρούσι;
 city::265488;Acharnes;Acharnae;Acharnai;Acharne;Acharnes;Akharnai;Akharnaí;Menidhi;Menidhion;Menidi;Menidion;Menioi;Menídhi;Menídhion;Meníoi;Αχαρνές;Αχαρναί;Μενίδι;Μενίδιον;
 city::265533;Aigaleo;Aegaleo;Aigaleo;Aigáleo;Egaleo;Αιγάλεω;
@ -5209,6 +5210,7 @@ city::2451478;Segou;Segi;Segou;Segu;Segú;Senkou;Ségou;sai gu;segu;sgw;syghw;Σ
 city::2453348;Mopti;MZI;Mopti;Moptis;mo pu ti;mobti;moputi;mwbty;mwpty;Μοπτί;Мопти;Мопті;موبتي;موپتی;موپٹی;モプティ;莫普提;몹티;
 city::2453662;Markala;Markala;
 city::2454268;Koutiala;KTX;Koutiala;Kutiala;ku jia la;Кутиала;庫佳拉;
+city::2455518;Kayes;Gorad Kaes;KYS;Kaes;Kagies;Kajes;Kajesas;Kayes;Kayi;ka yi;kai;kays;keseu;kyz;Καγιές;Горад Каес;Каес;Кайес;Каєс;كايس;کایس;کیز;კაესი;カイ;卡伊;케스;
 city::2457163;Gao;GAQ;Gao;Nkao;gao;gaw;gayw;jaw;jia ao;ka xo;Γκάο;Гао;Ґао;גאו;جاو;گائو;گاو;กาโอ;ガオ;加奥;가오;
 city::2460596;Bamako;BKO;Bamaco - Bamako;Bamakas;Bamako;Bamaku;Bamakó;Bamakɔ;Bammaco;Bammako;Mpamako;ba ma ke;bamako;bamakw;bmqw;Μπαμάκο;Бамако;Բամակո;במקו;باماكو;باماکو;ባማኮ;バマコ;巴馬科;바마코;
 city::1285173;Yenangyaung;Yaynangyoung;Yenangyaung;Yenangyoung;
@ -7472,6 +7474,7 @@ city::4177887;West Palm Beach;Litus Palmense Occidentale;Okcidenta Palm Beach;PB
 city::4178003;Weston;Uehston;Veston;Weston;vestana;wei si dun;wstwn;wstwn  flwryda;Вестон;Уэстон;وستون;وستون، فلوریدا;वेस्टन;韦斯顿;
 city::4179320;Albany;ABY;Albany;City of Opportunity;Olbani;albani;albany;albany  jarjya;albany  jwrjya;ao er ba ni;olbeoni;orubani;Олбани;Олбані;آلبانی، جورجیا;ألباني;البانی، جارجیا;अल्बानी;オールバニ;奧爾巴尼;올버니;
 city::4179574;Alpharetta;Al'faretta;Alfareta;Alpharetta;New Prospect Campground;alfarta  jwrjya;alfaryta;alfaryta  jarjya;alphareta;Алфарета;Альфаретта;آلفارتا، جورجیا;ألفاريتا;الفاریتا، جارجیا;अल्फारेटा;
+city::4180386;Athens;AHN;Atens;Atensas;Athens;Athens i Georgia;Athens-Clarke County;Atina;Atuns;Cedar Shoals;aeseonseu;asenzu;athensa;athyna;atn  jwrjya;atynz  jwrjya;ethensaklarka ka'unti;ya dian;Атенс;Атина;Атънс;אתנס;آتئنز، جورجیا;آتن، جورجیا;أثينا;ایتھنز، جارجیا;अथेन्स;एथेन्सक्लार्क काउन्टी;アセンズ;雅典;애선스;
 city::4180439;Atlanta;ATL;Atlant;Atlanta;Atlantae;Atlonta;Canebrake;Gorad Atlanta;Marthasdale;Marthasville;Standing Peachtree;Terminus;White Hall;Whitehall;aeteullaenta;arr‌lanra nagaram;atalanta;ateullaenta;ateullanta;atlanta;atoranta;atʼlantʼa;etalanta;etlanta;ya te lan da;Ατλάντα;Атлантæ;Атланта;Горад Атланта;Ատլանտա;אטלאנטא;אטלנטה;آتلانتا;أتلانتا;ئەتڵانتا;اٹلانٹا;اٹلانٹا، جارجیا;अटलांटा;अटलान्टा;एट्लान्टा;एत्लान्ता;আটলান্টা;એટલાન્ટા;அட்லான்டா;అట్లాంటా;ಅಟ್ಲಾಂಟಾ;അറ്റ്‌ലാന്റാ നഗരം;แอตแลนตา;ཨ་ཊི་ལཱན་ཊཱ།;အတ္တလန္တာမြို့;ატლანტა;አትላንታ;アトランタ;亚特兰大;亞特蘭大;아틀란타;아틀랜타;애틀랜타;
 city::4184530;Brookhaven;Brookhaven;Brookhaven Heights;Nort Atlanta;North Atlanta;brwk hawn  jwrjya;brwkhafn;nartha etlanta;Норт Атланта;بروكهافن;بروک هاون، جورجیا;بروک ہیون، جارجیا;नर्थ एट्लान्टा;
 city::4188985;Columbus;CSG;Columbus;Kolambus;Kolumbas;Kolumbus;Kulumbus;ge lun bu;klmbws  jwrjya;kolambasa;kolleombeoseu;kolumbus;koronbasu;kwlmbs  jarjya;kwlwmbws;qwlwmbws;Коламбус;Колумбус;Кълъмбъс;קולומבוס;كولومبوس;کلمبوس، جورجیا;کولمبس، جارجیا;कोलम्बस;コロンバス;哥伦布;콜럼버스;
@ -7950,7 +7953,7 @@ city::5258957;La Crosse;Gateway City;LSE;La Crosse;La Kros;La-Kross;Lac Rosse;La
 city::5261457;Madison;Gorad Madysan;MSN;Madison;Madisonas;Madisonia;Madisons;Madisun;Mantison;Medison;Medisona;Mehdison;madisan;madison;madisoni;madyswn;maediseun;mai di xun;maidisana;mdysn  wyskansyn;mdyswn;medisana;metican;Μάντισον;Горад Мадысан;Мадисон;Мадисън;Медисон;Медісон;Мэдисон;Մեդիսոն;מדיסון;ماديسون;مدیسن، ویسکانسین;میڈیسن;میڈیسون، وسکونسن;माडिसन्;मॅडिसन;मेडिसन;मैडिसन;மேடிசன்;მადისონი;マディソン;麦迪逊;매디슨;
 city::5263045;Milwaukee;Gorad Miluoki;Juneautown;Kilbourntown;MKE;Mahn-a-wau-kee Seepe;Mahn-a-wauk-ee See-pe;Mahn-a-waukee Seepe;Mahn-a-waukie;Mahn-ah-wauk Seepe;Mahnawauk;Man-a-wau-kee;Man-a-wauk-ee;Man-na-wah-kie;Mana'wa;Manawaki;Manawaukee;Manayaukee;Maunahwauke;Mee-lee-waug-ee;Meliki;Melleoki;Melwarik;Meneawkee;Meolaki;Mil-wah-kie;Milgouoki;Milioke;Millewacki;Millicki;Milo-aki;Milouagui;Milouakik;Milowages;Miluoki;Miluokʻi;Milvauchia;Milvoki;Milvokio;Milvokis;Milwacky;Milwahkie;Milwalka;Milwalky;Milwarck;Milwarik;Milwaucki;Milwaukee;Milwaukie;Minewaki;Miniaki;Minnawack;Winnipesaukee;mi er wo ji;mil wxki;mil-woki;mila'oyaki;milavoki;miluokʼi;milvaki;milvakki;milvauki;miruu~oki;mlwaky;mylwaky;mylwaky  wyskansyn;mylwwqy;Μιλγουόκι;Горад Мілуокі;Милвоки;Милуоки;Мілуокі;Միլուոքի;מילוואקי;מילווקי;ملواکی;ميلواكي;میلواکی، ویسکانسین;मिलवॉकी;मिल्वौकी;মিলওয়াকি;மில்வாக்கி;మిల్వాకీ;ಮಿಲ್ವಾಕೀ;มิลวอกี;მილუოკი;ミルウォーキー;密尔沃基;密爾沃基;밀워키;
 city::5264870;North La Crosse;;
-city::5265838;Oshkosh;Algoma;Brooklyn;OSH;Oshkosh;Oskosh;Sawdust City;ashkwsh  wyskansyn;ausakosa;awshkwsh;awshkwsh  wskwnsn;oshukoshu;Ошкош;אושקוש;أوشكوش;اشکوش، ویسکانسین;اوشکوش، وسکونسن;औशकोश;ოშკოში;オシュコシュ;
+city::5265838;Oshkosh;Algoma;Athens;Brooklyn;OSH;Oshkosh;Oskosh;Sawdust City;ashkwsh  wyskansyn;ausakosa;awshkwsh;awshkwsh  wskwnsn;oshukoshu;Ошкош;אושקוש;أوشكوش;اشکوش، ویسکانسین;اوشکوش، وسکونسن;औशकोश;ოშკოში;オシュコシュ;
 city::5268249;Racine;Kipikawi;Port Gilbert;RAC;Racine;Rasijn;Rasin;Rasinas;la xin;leosin;rashin;rasini;rasyn;rasyn  wskwnsn;rysyn  wyskansyn;Расийн;Расин;Расін;ראסין;راسين;راسین، وسکونسن;ریسین، ویسکانسین;რასინი;ラシーン;拉辛;러신;
 city::5278052;Waukesha;Prairieville;UES;Uokesho;Uokisha;Vokesha;Vokisha;Waukesha;u~okisho;wakysha  wyskansyn;wawkysha  wskwnsn;wkysha;Вокеша;Вокиша;Уокешо;Уокиша;واوکیشا، وسکونسن;واکیشا، ویسکانسین;وكيشا;უოკეშო;ウォキショー;
 city::5278420;West Allis;Vest Alis;alys ghrby  wyskansyn;wyst alys;Вест Алис;آلیس غربی، ویسکانسین;ويست أليس;ویسٹ الیس، وسکونسن;უესტ-ალისი;
--- a/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/stopwords_gr.txt
+++ b/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/stopwords_gr.txt
@ -1,847 +0,0 @@
-ένα
-έναν
-ένας
-αι
-ακομα
-ακομη
-ακριβως
-αληθεια
-αληθινα
-αλλα
-αλλαχου
-αλλες
-αλλη
-αλλην
-αλλης
-αλλιως
-αλλιωτικα
-αλλο
-αλλοι
-αλλοιως
-αλλοιωτικα
-αλλον
-αλλος
-αλλοτε
-αλλου
-αλλους
-αλλων
-αμα
-αμεσα
-αμεσως
-αν
-ανα
-αναμεσα
-αναμεταξυ
-ανευ
-αντι
-αντιπερα
-αντις
-ανω
-ανωτερω
-αξαφνα
-απ
-απεναντι
-απο
-αποψε
-από
-αρα
-αραγε
-αργα
-αργοτερο
-αριστερα
-αρκετα
-αρχικα
-ας
-αυριο
-αυτα
-αυτες
-αυτεσ
-αυτη
-αυτην
-αυτης
-αυτο
-αυτοι
-αυτον
-αυτος
-αυτοσ
-αυτου
-αυτους
-αυτουσ
-αυτων
-αφοτου
-αφου
-αἱ
-αἳ
-αἵ
-αὐτόσ
-αὐτὸς
-αὖ
-α∆ιακοπα
-βεβαια
-βεβαιοτατα
-γάρ
-γα
-γα^
-γε
-γι
-για
-γοῦν
-γρηγορα
-γυρω
-γὰρ
-δ'
-δέ
-δή
-δαί
-δαίσ
-δαὶ
-δαὶς
-δε
-δεν
-δι
-δι'
-διά
-δια
-διὰ
-δὲ
-δὴ
-δ’
-εαν
-εαυτο
-εαυτον
-εαυτου
-εαυτους
-εαυτων
-εγκαιρα
-εγκαιρως
-εγω
-ειθε
-ειμαι
-ειμαστε
-ειναι
-εις
-εισαι
-εισαστε
-ειστε
-ειτε
-ειχα
-ειχαμε
-ειχαν
-ειχατε
-ειχε
-ειχες
-ει∆εμη
-εκ
-εκαστα
-εκαστες
-εκαστη
-εκαστην
-εκαστης
-εκαστο
-εκαστοι
-εκαστον
-εκαστος
-εκαστου
-εκαστους
-εκαστων
-εκει
-εκεινα
-εκεινες
-εκεινεσ
-εκεινη
-εκεινην
-εκεινης
-εκεινο
-εκεινοι
-εκεινον
-εκεινος
-εκεινοσ
-εκεινου
-εκεινους
-εκεινουσ
-εκεινων
-εκτος
-εμας
-εμεις
-εμενα
-εμπρος
-εν
-ενα
-εναν
-ενας
-ενος
-εντελως
-εντος
-εντωμεταξυ
-ενω
-ενός
-εξ
-εξαφνα
-εξης
-εξισου
-εξω
-επ
-επί
-επανω
-επειτα
-επει∆η
-επι
-επισης
-επομενως
-εσας
-εσεις
-εσενα
-εστω
-εσυ
-ετερα
-ετεραι
-ετερας
-ετερες
-ετερη
-ετερης
-ετερο
-ετεροι
-ετερον
-ετερος
-ετερου
-ετερους
-ετερων
-ετουτα
-ετουτες
-ετουτη
-ετουτην
-ετουτης
-ετουτο
-ετουτοι
-ετουτον
-ετουτος
-ετουτου
-ετουτους
-ετουτων
-ετσι
-ευγε
-ευθυς
-ευτυχως
-εφεξης
-εχει
-εχεις
-εχετε
-εχθες
-εχομε
-εχουμε
-εχουν
-εχτες
-εχω
-εως
-εἰ
-εἰμί
-εἰμὶ
-εἰς
-εἰσ
-εἴ
-εἴμι
-εἴτε
-ε∆ω
-η
-ημασταν
-ημαστε
-ημουν
-ησασταν
-ησαστε
-ησουν
-ηταν
-ητανε
-ητοι
-ηττον
-η∆η
-θα
-ι
-ιι
-ιιι
-ισαμε
-ισια
-ισως
-ισωσ
-ι∆ια
-ι∆ιαν
-ι∆ιας
-ι∆ιες
-ι∆ιο
-ι∆ιοι
-ι∆ιον
-ι∆ιος
-ι∆ιου
-ι∆ιους
-ι∆ιων
-ι∆ιως
-κ
-καί
-καίτοι
-καθ
-καθε
-καθεμια
-καθεμιας
-καθενα
-καθενας
-καθενος
-καθετι
-καθολου
-καθως
-και
-κακα
-κακως
-καλα
-καλως
-καμια
-καμιαν
-καμιας
-καμποσα
-καμποσες
-καμποση
-καμποσην
-καμποσης
-καμποσο
-καμποσοι
-καμποσον
-καμποσος
-καμποσου
-καμποσους
-καμποσων
-κανεις
-κανεν
-κανενα
-κανεναν
-κανενας
-κανενος
-καποια
-καποιαν
-καποιας
-καποιες
-καποιο
-καποιοι
-καποιον
-καποιος
-καποιου
-καποιους
-καποιων
-καποτε
-καπου
-καπως
-κατ
-κατά
-κατα
-κατι
-κατιτι
-κατοπιν
-κατω
-κατὰ
-καὶ
-κι
-κιολας
-κλπ
-κοντα
-κτλ
-κυριως
-κἀν
-κἂν
-λιγακι
-λιγο
-λιγωτερο
-λογω
-λοιπα
-λοιπον
-μέν
-μέσα
-μή
-μήτε
-μία
-μα
-μαζι
-μακαρι
-μακρυα
-μαλιστα
-μαλλον
-μας
-με
-μεθ
-μεθαυριο
-μειον
-μελει
-μελλεται
-μεμιας
-μεν
-μερικα
-μερικες
-μερικοι
-μερικους
-μερικων
-μεσα
-μετ
-μετά
-μετα
-μεταξυ
-μετὰ
-μεχρι
-μη
-μην
-μηπως
-μητε
-μη∆ε
-μιά
-μια
-μιαν
-μιας
-μολις
-μολονοτι
-μοναχα
-μονες
-μονη
-μονην
-μονης
-μονο
-μονοι
-μονομιας
-μονος
-μονου
-μονους
-μονων
-μου
-μπορει
-μπορουν
-μπραβο
-μπρος
-μἐν
-μὲν
-μὴ
-μὴν
-να
-ναι
-νωρις
-ξανα
-ξαφνικα
-ο
-οι
-ολα
-ολες
-ολη
-ολην
-ολης
-ολο
-ολογυρα
-ολοι
-ολον
-ολονεν
-ολος
-ολοτελα
-ολου
-ολους
-ολων
-ολως
-ολως∆ιολου
-ομως
-ομωσ
-οποια
-οποιαν
-οποιαν∆ηποτε
-οποιας
-οποιας∆ηποτε
-οποια∆ηποτε
-οποιες
-οποιες∆ηποτε
-οποιο
-οποιοι
-οποιον
-οποιον∆ηποτε
-οποιος
-οποιος∆ηποτε
-οποιου
-οποιους
-οποιους∆ηποτε
-οποιου∆ηποτε
-οποιο∆ηποτε
-οποιων
-οποιων∆ηποτε
-οποι∆ηποτε
-οποτε
-οποτε∆ηποτε
-οπου
-οπου∆ηποτε
-οπως
-οπωσ
-ορισμενα
-ορισμενες
-ορισμενων
-ορισμενως
-οσα
-οσα∆ηποτε
-οσες
-οσες∆ηποτε
-οση
-οσην
-οσην∆ηποτε
-οσης
-οσης∆ηποτε
-οση∆ηποτε
-οσο
-οσοι
-οσοι∆ηποτε
-οσον
-οσον∆ηποτε
-οσος
-οσος∆ηποτε
-οσου
-οσους
-οσους∆ηποτε
-οσου∆ηποτε
-οσο∆ηποτε
-οσων
-οσων∆ηποτε
-οταν
-οτι
-οτι∆ηποτε
-οτου
-ου
-ουτε
-ου∆ε
-οχι
-οἱ
-οἳ
-οἷς
-οὐ
-οὐδ
-οὐδέ
-οὐδείσ
-οὐδεὶς
-οὐδὲ
-οὐδὲν
-οὐκ
-οὐχ
-οὐχὶ
-οὓς
-οὔτε
-οὕτω
-οὕτως
-οὕτωσ
-οὖν
-οὗ
-οὗτος
-οὗτοσ
-παλι
-παντοτε
-παντου
-παντως
-παρ
-παρά
-παρα
-παρὰ
-περί
-περα
-περι
-περιπου
-περισσοτερο
-περσι
-περυσι
-περὶ
-πια
-πιθανον
-πιο
-πισω
-πλαι
-πλεον
-πλην
-ποια
-ποιαν
-ποιας
-ποιες
-ποιεσ
-ποιο
-ποιοι
-ποιον
-ποιος
-ποιοσ
-ποιου
-ποιους
-ποιουσ
-ποιων
-πολυ
-ποσες
-ποση
-ποσην
-ποσης
-ποσοι
-ποσος
-ποσους
-ποτε
-που
-πουθε
-πουθενα
-ποῦ
-πρεπει
-πριν
-προ
-προκειμενου
-προκειται
-προπερσι
-προς
-προσ
-προτου
-προχθες
-προχτες
-πρωτυτερα
-πρόσ
-πρὸ
-πρὸς
-πως
-πωσ
-σαν
-σας
-σε
-σεις
-σημερα
-σιγα
-σου
-στα
-στη
-στην
-στης
-στις
-στο
-στον
-στου
-στους
-στων
-συγχρονως
-συν
-συναμα
-συνεπως
-συνηθως
-συχνα
-συχνας
-συχνες
-συχνη
-συχνην
-συχνης
-συχνο
-συχνοι
-συχνον
-συχνος
-συχνου
-συχνους
-συχνων
-συχνως
-σχε∆ον
-σωστα
-σόσ
-σύ
-σύν
-σὸς
-σὺ
-σὺν
-τά
-τήν
-τί
-τίς
-τίσ
-τα
-ταυτα
-ταυτες
-ταυτη
-ταυτην
-ταυτης
-ταυτο,ταυτον
-ταυτος
-ταυτου
-ταυτων
-ταχα
-ταχατε
-ταῖς
-τα∆ε
-τε
-τελικα
-τελικως
-τες
-τετοια
-τετοιαν
-τετοιας
-τετοιες
-τετοιο
-τετοιοι
-τετοιον
-τετοιος
-τετοιου
-τετοιους
-τετοιων
-τη
-την
-της
-τησ
-τι
-τινα
-τιποτα
-τιποτε
-τις
-τισ
-το
-τοί
-τοι
-τοιοῦτος
-τοιοῦτοσ
-τον
-τος
-τοσα
-τοσες
-τοση
-τοσην
-τοσης
-τοσο
-τοσοι
-τοσον
-τοσος
-τοσου
-τοσους
-τοσων
-τοτε
-του
-τουλαχιστο
-τουλαχιστον
-τους
-τουτα
-τουτες
-τουτη
-τουτην
-τουτης
-τουτο
-τουτοι
-τουτοις
-τουτον
-τουτος
-τουτου
-τουτους
-τουτων
-τούσ
-τοὺς
-τοῖς
-τοῦ
-τυχον
-των
-τωρα
-τό
-τόν
-τότε
-τὰ
-τὰς
-τὴν
-τὸ
-τὸν
-τῆς
-τῆσ
-τῇ
-τῶν
-τῷ
-υπ
-υπερ
-υπο
-υποψη
-υποψιν
-υπό
-υστερα
-φετος
-χαμηλα
-χθες
-χτες
-χωρις
-χωριστα
-ψηλα
-ω
-ωραια
-ως
-ωσ
-ωσαν
-ωσοτου
-ωσπου
-ωστε
-ωστοσο
-ωχ
-ἀλλ'
-ἀλλά
-ἀλλὰ
-ἀλλ’
-ἀπ
-ἀπό
-ἀπὸ
-ἀφ
-ἂν
-ἃ
-ἄλλος
-ἄλλοσ
-ἄν
-ἄρα
-ἅμα
-ἐάν
-ἐγώ
-ἐγὼ
-ἐκ
-ἐμόσ
-ἐμὸς
-ἐν
-ἐξ
-ἐπί
-ἐπεὶ
-ἐπὶ
-ἐστι
-ἐφ
-ἐὰν
-ἑαυτοῦ
-ἔτι
-ἡ
-ἢ
-ἣ
-ἤ
-ἥ
-ἧς
-ἵνα
-ὁ
-ὃ
-ὃν
-ὃς
-ὅ
-ὅδε
-ὅθεν
-ὅπερ
-ὅς
-ὅσ
-ὅστις
-ὅστισ
-ὅτε
-ὅτι
-ὑμόσ
-ὑπ
-ὑπέρ
-ὑπό
-ὑπὲρ
-ὑπὸ
-ὡς
-ὡσ
-ὥς
-ὥστε
-ὦ
-ᾧ
-∆α
-∆ε
-∆εινα
-∆εν
-∆εξια
-∆ηθεν
-∆ηλα∆η
-∆ι
-∆ια
-∆ιαρκως
-∆ικα
-∆ικο
-∆ικοι
-∆ικος
-∆ικου
-∆ικους
-∆ιολου
-∆ιπλα
-∆ιχως
--- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/AbstractPaceTest.java
+++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/AbstractPaceTest.java
@ -9,7 +9,6 @@ import org.apache.commons.io.IOUtils;

 import java.io.IOException;
 import java.io.StringWriter;
-import java.nio.charset.StandardCharsets;
 import java.util.List;
 import java.util.stream.Collectors;

@ -18,7 +17,7 @@ public abstract class AbstractPaceTest extends AbstractPaceFunctions {
 	protected String readFromClasspath(final String filename) {
 		final StringWriter sw = new StringWriter();
 		try {
-			IOUtils.copy(getClass().getResourceAsStream(filename), sw, StandardCharsets.UTF_8);
+			IOUtils.copy(getClass().getResourceAsStream(filename), sw);
 			return sw.toString();
 		} catch (final IOException e) {
 			throw new RuntimeException("cannot load resource from classpath: " + filename);
@ -37,10 +36,6 @@ public abstract class AbstractPaceTest extends AbstractPaceFunctions {
 		return new FieldValueImpl(Type.URL, "url", s);
 	}

-	protected Field array(final double[] a) {
-		return new FieldValueImpl(Type.DoubleArray, "array", a);
-	}
-
 	protected Field createFieldList(List<String> strings, String fieldName){

 		List<FieldValueImpl> fieldValueStream = strings.stream().map(s -> new FieldValueImpl(Type.String, fieldName, s)).collect(Collectors.toList());
--- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java
+++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java
@ -2,15 +2,12 @@ package eu.dnetlib.pace.clustering;

 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
 import eu.dnetlib.pace.AbstractPaceTest;
 import eu.dnetlib.pace.common.AbstractPaceFunctions;
 import eu.dnetlib.pace.config.DedupConfig;
 import org.junit.jupiter.api.*;

 import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;

 public class ClusteringFunctionTest extends AbstractPaceTest {

@ -50,7 +47,7 @@ public class ClusteringFunctionTest extends AbstractPaceTest {
 	@Test
 	public void testNgramPairs() {
 		params.put("ngramLen", 3);
-		params.put("max", 2);
+		params.put("max", 1);

 		final ClusteringFunction np = new NgramPairs(params);

@ -62,7 +59,7 @@ public class ClusteringFunctionTest extends AbstractPaceTest {
 	@Test
 	public void testSortedNgramPairs() {
 		params.put("ngramLen", 3);
-		params.put("max", 2);
+		params.put("max", 1);

 		final ClusteringFunction np = new SortedNgramPairs(params);

@ -73,11 +70,6 @@ public class ClusteringFunctionTest extends AbstractPaceTest {
 		final String s2 = "Pisa University";
 		System.out.println(s2);
 		System.out.println(np.apply(conf, Lists.newArrayList(title(s2))));
-
-		final String s3 = "Parco Tecnologico Agroalimentare Umbria";
-		System.out.println(s3);
-		System.out.println(np.apply(conf, Lists.newArrayList(title(s3))));
-
 	}

 	@Test
@ -103,11 +95,6 @@ public class ClusteringFunctionTest extends AbstractPaceTest {
 		final String s = "Search for the Standard Model Higgs Boson";
 		System.out.println(s);
 		System.out.println(sp.apply(conf, Lists.newArrayList(title(s))));
-
-		params.put("len", 3);
-		params.put("max", 1);
-
-		System.out.println(sp.apply(conf, Lists.newArrayList(title("Framework for general-purpose deduplication"))));
 	}

 	@Test
@ -145,18 +132,6 @@ public class ClusteringFunctionTest extends AbstractPaceTest {
 		System.out.println(s);
 		System.out.println(sp.apply(conf, Lists.newArrayList(title(s))));

-		s = "JRC Open Power Plants Database (JRC-PPDB-OPEN)";
-		System.out.println(s);
-		System.out.println(sp.apply(conf, Lists.newArrayList(title(s))));
-
-		s = "JRC Open Power Plants Database";
-		System.out.println(s);
-		System.out.println(sp.apply(conf, Lists.newArrayList(title(s))));
-
-		s = "niivue/niivue: 0.21.1";
-		System.out.println(s);
-		System.out.println(sp.apply(conf, Lists.newArrayList(title(s))));
-
 	}

 	@Test
@ -199,51 +174,5 @@ public class ClusteringFunctionTest extends AbstractPaceTest {
 		System.out.println("s5 = " + s5);
 		System.out.println(cf.apply(conf, Lists.newArrayList(title(s5))));

-		final String s6 = "National and Kapodistrian University of Athens";
-		System.out.println("s6 = " + s6);
-		System.out.println(cf.apply(conf, Lists.newArrayList(title(s6))));
-
-		final String s7 = "Εθνικό και Καποδιστριακό Πανεπιστήμιο Αθηνών";
-		System.out.println("s7 = " + s7);
-		System.out.println(cf.apply(conf, Lists.newArrayList(title(s7))));
-
 	}
-
-	@Test
-	public void testPersonClustering(){
-
-		final ClusteringFunction cf = new PersonClustering(params);
-		final String s = "Abd-Alla, Abo-el-nour N.";
-		System.out.println("s = " + s);
-		System.out.println(cf.apply(conf, Lists.newArrayList(title(s))));
-
-		final String s1 = "Manghi, Paolo";
-		System.out.println("s1 = " + s1);
-		System.out.println(cf.apply(conf, Lists.newArrayList(title(s1))));
-
-	}
-
-	@Test
-	public void testPersonHash(){
-
-		final ClusteringFunction cf = new PersonHash(params);
-		final String s = "Manghi, Paolo";
-		System.out.println("s = " + s);
-		System.out.println(cf.apply(conf, Lists.newArrayList(title(s))));
-
-		final String s1 = "Manghi, P.";
-		System.out.println("s = " + s1);
-		System.out.println(cf.apply(conf, Lists.newArrayList(title(s1))));
-
-	}
-
-	@Test
-	public void testLastNameFirstInitial(){
-
-		final ClusteringFunction cf = new LastNameFirstInitial(params);
-		final String s = "LI Yonghong";
-		System.out.println("s = " + s);
-		System.out.println(cf.apply(conf, Lists.newArrayList(title(s))));
-	}
-
-}
+}
--- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java
+++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java
@ -1,42 +1,32 @@
 package eu.dnetlib.pace.comparators;

-import eu.dnetlib.pace.AbstractPaceTest;
 import eu.dnetlib.pace.clustering.NGramUtils;
-import eu.dnetlib.pace.config.Type;
-import eu.dnetlib.pace.model.Field;
-import eu.dnetlib.pace.model.FieldValueImpl;
+import eu.dnetlib.pace.model.MapDocument;
 import eu.dnetlib.pace.tree.*;
 import eu.dnetlib.pace.config.DedupConfig;
+import eu.dnetlib.pace.util.MapDocumentUtil;

 import org.junit.jupiter.api.*;
 import static org.junit.jupiter.api.Assertions.assertEquals;

-import java.util.ArrayList;
-import java.util.Arrays;
+import eu.dnetlib.pace.common.AbstractPaceFunctions;
+
 import java.util.HashMap;
 import java.util.Map;

-@TestInstance(TestInstance.Lifecycle.PER_CLASS)
-public class ComparatorTest extends AbstractPaceTest {
+
+public class ComparatorTest extends AbstractPaceFunctions {

 	private Map<String, String> params;
 	private DedupConfig conf;

 	@BeforeAll
 	public void setup() {
-		conf = DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/config/organization.current.conf.json", ComparatorTest.class));
-	}
-
-	@BeforeEach
-	public void beforeEachTest() {
 		params = new HashMap<>();
 		params.put("weight", "1.0");
-		params.put("surname_th", "0.99");
-		params.put("name_th", "0.95");
-		params.put("jpath_value", "$.value");
-		params.put("jpath_classid", "$.qualifier.classid");
-	}
+		conf = DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/config/organization.current.conf.json", ComparatorTest.class));

+	}

 	@Test
 	public void testCleanForSorting() {
@ -64,10 +54,7 @@ public class ComparatorTest extends AbstractPaceTest {
 		//particular cases
 		assertEquals(1.0, cityMatch.distance("Free University of Bozen-Bolzano", "Università di Bolzano", conf));
 		assertEquals(1.0, cityMatch.distance("Politechniki Warszawskiej (Warsaw University of Technology)", "Warsaw University of Technology", conf));
-
-		// failing becasuse 'Allen' is a transliterrated greek stopword
-		// assertEquals(-1.0, cityMatch.distance("Allen (United States)", "United States Military Academy", conf));
-		assertEquals(-1.0, cityMatch.distance("Washington (United States)", "United States Military Academy", conf));
+		assertEquals(-1.0, cityMatch.distance("Allen (United States)", "United States Military Academy", conf));
 	}

 	@Test
@ -81,7 +68,7 @@ public class ComparatorTest extends AbstractPaceTest {
 		assertEquals(1.0, keywordMatch.distance("Polytechnic University of Turin", "POLITECNICO DI TORINO", conf));
 		assertEquals(1.0, keywordMatch.distance("Istanbul Commerce University", "İstanbul Ticarət Universiteti", conf));
 		assertEquals(1.0, keywordMatch.distance("Franklin College", "Concordia College", conf));
-		assertEquals(2.0/3.0, keywordMatch.distance("University of Georgia", "Georgia State University", conf));
+		assertEquals(0.5, keywordMatch.distance("University of Georgia", "Georgia State University", conf));
 		assertEquals(0.5, keywordMatch.distance("University College London", "University of London", conf));
 		assertEquals(0.5, keywordMatch.distance("Washington State University", "University of Washington", conf));
 		assertEquals(-1.0, keywordMatch.distance("Allen (United States)", "United States Military Academy", conf));
@ -89,46 +76,15 @@ public class ComparatorTest extends AbstractPaceTest {
 	}

 	@Test
-	public void listContainsMatchTest(){
+	public void containsMatchTest(){

-		Field a = createFieldList(Arrays.asList("Article", "Publication", "ORP"), "instanceType");
-		Field b = createFieldList(Arrays.asList("Publication", "Article", "ORP"), "instanceType");
-
-		params.put("string", "Article");
+		params.put("string", "openorgs");
 		params.put("bool", "XOR");
 		params.put("caseSensitive", "false");

-		ListContainsMatch listContainsMatch = new ListContainsMatch(params);
+		final ContainsMatch containsMatch = new ContainsMatch(params);

-		assertEquals(0.0, listContainsMatch.compare(a, b, conf));
-
-		params.put("string", "Article");
-		params.put("bool", "AND");
-		params.put("caseSensitive", "false");
-
-		listContainsMatch = new ListContainsMatch(params);
-
-		assertEquals(1.0, listContainsMatch.compare(a, b, conf));
-	}
-
-	@Test
-	public void stringContainsMatchTest(){
-
-		params.put("string", "openorgs");
-		params.put("aggregator", "XOR");
-		params.put("caseSensitive", "false");
-
-		StringContainsMatch stringContainsMatch = new StringContainsMatch(params);
-
-		assertEquals(0.0, stringContainsMatch.distance("openorgs", "openorgs", conf));
-
-		params.put("string", "openorgs");
-		params.put("aggregator", "AND");
-		params.put("caseSensitive", "false");
-
-		stringContainsMatch = new StringContainsMatch(params);
-
-		assertEquals(1.0, stringContainsMatch.distance("openorgs", "openorgs", conf));
+		assertEquals(0.0, containsMatch.distance("openorgs", "openorgs", conf));
 	}

 	@Test
@ -160,152 +116,12 @@ public class ComparatorTest extends AbstractPaceTest {
 		result = jaroWinklerNormalizedName.distance("NOAA - Servicio Meteorol\\u00f3gico Nacional", "NOAA - NWS", conf);
 		System.out.println("result = " + result);

-	}
-
-	@Test
-	public void jaroWinklerTest() {
-
-		final JaroWinkler jaroWinkler = new JaroWinkler(params);
-
-		double result = jaroWinkler.distance("Sofia", "Sofìa", conf);
-		System.out.println("result = " + result);
-
-		result = jaroWinkler.distance("University of Victoria Dataverse", "University of Windsor Dataverse", conf);
-		System.out.println("result = " + result);
-
-		result = jaroWinkler.distance("Victoria Dataverse", "Windsor Dataverse", conf);
-		System.out.println("result = " + result);

 	}

 	@Test
-	public void levensteinTitleTest() {
-
-		final LevensteinTitle levensteinTitle = new LevensteinTitle(params);
-
-		double result = levensteinTitle.distance("Degradation of lignin β‐aryl ether units in Arabidopsis thaliana expressing LigD, LigF and LigG from Sphingomonas paucimobilis SYK‐6", "Degradation of lignin β-aryl ether units in <i>Arabidopsis thaliana</i> expressing <i>LigD</i>, <i>LigF</i> and <i>LigG</i> from <i>Sphingomonas paucimobilis</i> SYK-6", conf);
-
-		System.out.println("result = " + result);
-	}
-
-	@Test
-	public void levensteinTest() {
-		final Levenstein levenstein = new Levenstein(params);
-
-		double result = levenstein.distance("la bruzzo", "la bruzzo", conf);
-		System.out.println("result = " + result);
-	}
-
-	@Test
-	public void instanceTypeMatchTest() {
-
-		final InstanceTypeMatch instanceTypeMatch = new InstanceTypeMatch(params);
-
-		Field a = createFieldList(Arrays.asList("Article", "Article", "Article"), "instanceType");
-		Field b = createFieldList(Arrays.asList("Article", "Article", "Article"), "instanceType");
-		double result = instanceTypeMatch.compare(a, b, conf);
-
-		assertEquals(1.0, result);
-
-		Field c = createFieldList(Arrays.asList("Conference object", "Conference object", "Conference object"), "instanceType");
-		result = instanceTypeMatch.compare(c, b, conf);
-
-		assertEquals(1.0, result);
-
-		Field d = createFieldList(Arrays.asList("Master thesis", "Master thesis", "Master thesis"), "instanceType");
-		Field e = createFieldList(Arrays.asList("Bachelor thesis", "Bachelor thesis", "Bachelor thesis"), "instanceType");
-		result = instanceTypeMatch.compare(d, e, conf);
-
-		assertEquals(1.0, result);
-
-		Field g = createFieldList(Arrays.asList("Software Paper", "Software Paper"), "instanceType");
-		result = instanceTypeMatch.compare(e, g, conf);
-
-		assertEquals(0.0, result);
-
-		Field h = createFieldList(Arrays.asList("Other literature type", "Article"), "instanceType");
-		result = instanceTypeMatch.compare(a, h, conf);
-
-		assertEquals(1.0, result);
-	}
-
-	@Test
-	public void authorsMatchTest() {
-
-		AuthorsMatch authorsMatch = new AuthorsMatch(params);
-
-		Field a = createFieldList(Arrays.asList("La Bruzzo, Sandro", "Atzori, Claudio", "De Bonis, Michele"), "authors");
-		Field b = createFieldList(Arrays.asList("Atzori, C.", "La Bruzzo, S.", "De Bonis, M."), "authors");
-		double result = authorsMatch.compare(a, b, conf);
-
-		assertEquals(1.0, result);
-
-		Field c = createFieldList(Arrays.asList("Manghi, Paolo"), "authors");
-		Field d = createFieldList(Arrays.asList("Manghi, Pasquale"), "authors");
-		result = authorsMatch.compare(c, d, conf);
-
-		assertEquals(0.0, result) ;
-
-		params.put("mode", "surname");
-		authorsMatch = new AuthorsMatch(params);
-		result = authorsMatch.compare(c, d, conf);
-
-		assertEquals(1.0, result);
-
-		Field e = createFieldList(Arrays.asList("Manghi, Paolo", "Atzori, Claudio"), "authors");
-		result = authorsMatch.compare(a, e, conf);
-
-		assertEquals(0.25, result);
-
-		Field f = createFieldList(new ArrayList<>(), "authors");
-		result = authorsMatch.compare(f,f, conf);
-		System.out.println("result = " + result);
+	public void jsonListMatchTest(){

 	}

-	@Test
-	public void jsonListMatch() {
-
-		JsonListMatch jsonListMatch = new JsonListMatch(params);
-
-		Field a = createFieldList(Arrays.asList("{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":null,\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:actionset\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"doi\",\"classname\":\"Digital Object Identifier\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"10.1111/pbi.12655\"}"), "authors");
-		Field b = createFieldList(Arrays.asList("{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":\"\",\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:crosswalk:repository\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"pmc\",\"classname\":\"PubMed Central ID\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"PMC5399005\"}","{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":\"\",\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:crosswalk:repository\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"pmid\",\"classname\":\"PubMed ID\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"27775869\"}","{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":\"\",\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"user:claim\",\"classname\":\"Linked by user\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"doi\",\"classname\":\"Digital Object Identifier\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"10.1111/pbi.12655\"}","{\"datainfo\":{\"deletedbyinference\":false,\"inferenceprovenance\":\"\",\"inferred\":false,\"invisible\":false,\"provenanceaction\":{\"classid\":\"sysimport:crosswalk:repository\",\"classname\":\"Harvested\",\"schemeid\":\"dnet:provenanceActions\",\"schemename\":\"dnet:provenanceActions\"},\"trust\":\"0.9\"},\"qualifier\":{\"classid\":\"handle\",\"classname\":\"Handle\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\"},\"value\":\"1854/LU-8523529\"}"), "authors");
-
-		double result = jsonListMatch.compare(a, b, conf);
-
-		assertEquals(0.25, result);
-
-		params.put("mode", "count");
-		jsonListMatch = new JsonListMatch(params);
-		result = jsonListMatch.compare(a, b, conf);
-
-		assertEquals(1.0, result);
-	}
-
-	@Test
-	public void domainExactMatch() {
-
-		DomainExactMatch domainExactMatch = new DomainExactMatch(params);
-		Field a = url("http://www.flowrepository.org");
-		Field b = url("http://flowrepository.org/");
-
-		double compare = domainExactMatch.compare(a, b, conf);
-		System.out.println("compare = " + compare);
-
-	}
-
-	@Test
-	public void cosineSimilarity() {
-
-		CosineSimilarity cosineSimilarity = new CosineSimilarity(params);
-
-		Field a = new FieldValueImpl(Type.DoubleArray, "array", new double[]{1,2,3});
-		Field b = new FieldValueImpl(Type.DoubleArray, "array", new double[]{1,2,3});
-
-		double compare = cosineSimilarity.compare(a, b, conf);
-
-		System.out.println("compare = " + compare);
-	}
-
-
 }
--- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java
+++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java
@ -2,12 +2,10 @@ package eu.dnetlib.pace.config;


 import eu.dnetlib.pace.AbstractPaceTest;
-import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
 import eu.dnetlib.pace.clustering.ClusteringClass;
 import eu.dnetlib.pace.clustering.ClusteringCombiner;
 import eu.dnetlib.pace.model.Field;
 import eu.dnetlib.pace.model.FieldList;
-import eu.dnetlib.pace.model.FieldValue;
 import eu.dnetlib.pace.model.MapDocument;
 import eu.dnetlib.pace.tree.JsonListMatch;
 import eu.dnetlib.pace.tree.support.AggType;
@ -21,7 +19,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;

-import java.util.*;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 import java.util.stream.Collectors;


@ -83,7 +84,7 @@ public class ConfigTest extends AbstractPaceTest {
 	}

 	@Test
-	public void asMapDocumentTest1() {
+	public void asMapDocumentTest() {

 		DedupConfig dedupConf = DedupConfig.load(readFromClasspath("publication.current.conf.json"));

@ -99,21 +100,9 @@ public class ConfigTest extends AbstractPaceTest {

 		System.out.println("mapDocument = " + mapDocument.getFieldMap().get("title").stringValue());

+
    }

-	@Test
-	public void authorAsMapDocument() {
-
-		DedupConfig dedupConf = DedupConfig.load(readFromClasspath("author.fdup.conf.json"));
-
-		final String json = readFromClasspath("author.json");
-
-		final MapDocument mapDocument = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, json);
-
-		System.out.println("mapDocument = " + Arrays.toString(((FieldValue) mapDocument.getFieldMap().get("topics")).doubleArrayValue()));
-
-	}
-
    @Test
    public  void testJPath()  {
        final String json = readFromClasspath("organization.json");
@ -139,23 +128,6 @@ public class ConfigTest extends AbstractPaceTest {
 		assertEquals("doi", combine[2].split(":")[1]);
 	}

-	@Test
-	public void filterAndCombineTest() {
-
-		DedupConfig dedupConf = DedupConfig.load(readFromClasspath("pub.prod.conf.json"));
-
-		final String json = readFromClasspath("publication.example.json");
-
-		final MapDocument mapDocument = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, json);
-
-		Collection<String> strings = BlacklistAwareClusteringCombiner.filterAndCombine(mapDocument, dedupConf);
-
-		for (String s: strings) {
-			System.out.println("s = " + s);
-		}
-
-	}
-
 	@Test
 	public void crossCompareTest() {

--- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java
+++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java
@ -1,19 +1,17 @@
 package eu.dnetlib.pace.util;

-import eu.dnetlib.pace.model.Person;
 import org.junit.jupiter.api.*;

 import java.util.HashMap;
 import java.util.Map;
-import static org.junit.jupiter.api.Assertions.assertEquals;

 public class UtilTest {

-    static Map<String, String> params;
+    Map<String, String> params;

    @BeforeAll
-    public static void setUp(){
-        params = new HashMap<>();
+    public void setUp(){
+        params = new HashMap<String, String>();
    }

    @Test
@ -22,17 +20,4 @@ public class UtilTest {
        paceResolver.getComparator("keywordMatch", params);
    }

-    @Test
-    public void personTest() {
-        Person p = new Person("j. f. kennedy", false);
-
-        assertEquals("kennedy", p.getSurnameString());
-        assertEquals("j f", p.getNameString());
-
-        p = new Person("Guan-Hua Du", false);
-
-        System.out.println("surname = " + p.getSurnameString());
-        System.out.println("name = " + p.getNameString());
-    }
-
 }
--- a/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/author.fdup.conf.json
+++ b/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/author.fdup.conf.json
@ -1,134 +0,0 @@
-{
-  "wf": {
-    "threshold": "0.99",
-    "dedupRun": "001",
-    "entityType": "author",
-    "subEntityType": "author",
-    "subEntityValue": "author",
-    "orderField": "fullname",
-    "queueMaxSize": "200",
-    "groupMaxSize": "100",
-    "maxChildren": "100",
-    "slidingWindowSize": "50",
-    "rootBuilder": [
-      "result",
-      "resultProject_outcome_isProducedBy",
-      "resultResult_publicationDataset_isRelatedTo",
-      "resultResult_similarity_isAmongTopNSimilarDocuments",
-      "resultResult_similarity_hasAmongTopNSimilarDocuments",
-      "resultOrganization_affiliation_isAffiliatedWith",
-      "resultResult_part_hasPart",
-      "resultResult_part_isPartOf",
-      "resultResult_supplement_isSupplementTo",
-      "resultResult_supplement_isSupplementedBy",
-      "resultResult_version_isVersionOf"
-    ],
-    "includeChildren": "true",
-    "maxIterations": 20,
-    "idPath": "$.id"
-  },
-  "pace": {
-    "clustering" : [
-      { "name" : "lnfi", "fields" : [ "fullname" ], "params" : {} }
-    ],
-    "decisionTree": {
-      "start": {
-        "fields": [
-          {
-            "field": "orcid",
-            "comparator": "exactMatch",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "orcids",
-        "ignoreUndefined": "true"
-      },
-      "orcids": {
-        "fields": [
-          {
-            "field": "orcids",
-            "comparator": "stringListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {"type": "count"}
-          }
-        ],
-        "threshold": 3.0,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "coauthors",
-        "undefined": "coauthors",
-        "ignoreUndefined": "true"
-      },
-      "coauthors": {
-        "fields": [
-          {
-            "field": "coauthors",
-            "comparator": "authorsMatch",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {"type": "count"}
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "MAX",
-        "positive": "topicsMatch",
-        "negative": "NO_MATCH",
-        "undefined": "topicsMatch",
-        "ignoreUndefined": "true"
-      },
-      "topicsMatch": {
-        "fields": [
-          {
-            "field": "topics",
-            "comparator": "cosineSimilarity",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "false"
-      }
-    },
-    "model": [
-      {
-        "name": "topics",
-        "type": "DoubleArray",
-        "path": "$.topics"
-      },
-      {
-        "name": "fullname",
-        "type": "String",
-        "path": "$.fullname"
-      },
-      {
-        "name": "orcid",
-        "type": "String",
-        "path": "$.orcid"
-      },
-      {
-        "name": "coauthors",
-        "type": "List",
-        "path": "$.coAuthors[*].fullname"
-      },
-      {
-        "name": "orcids",
-        "type": "List",
-        "path": "$.coAuthors[*].orcid"
-      }
-    ],
-    "blacklists": {},
-    "synonyms": {}
-  }
-}
--- a/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/author.json
+++ b/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/author.json
@ -1 +0,0 @@
-{"fullname":"Zaragoza, Maria Cleofé","firstname":"Maria Cleofé","lastname":"Zaragoza","coAuthors":[{"fullname":"Cambras, Trinitat","lastname":"Cambras","firstname":"Trinitat","orcid":"0000-0002-9009-4690"},{"fullname":"Castro-Marrero, Jesús","lastname":"Castro-Marrero","firstname":"Jesús","orcid":""},{"fullname":"Díez-Noguera, Antoni","lastname":"Díez-Noguera","firstname":"Antoni","orcid":""},{"fullname":"Alegre, José","lastname":"Alegre","firstname":"José","orcid":"0000-0002-7582-7585"}],"topics":[0.9522090839562252,0.04779091604377485],"orcid":"0000-0002-9797-0219","id":"author::1a10826c83c7f9f0dcebe7df05e37a2a","pubId":"50|pmid________::db7fd19db5a620eafad40cfb97f9690d"}
--- a/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/pub.instancetype.tree.conf.json
+++ b/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/pub.instancetype.tree.conf.json
@ -1,442 +0,0 @@
-{
-  "wf": {
-    "threshold": "0.99",
-    "dedupRun": "001",
-    "entityType": "result",
-    "subEntityType": "resulttype",
-    "subEntityValue": "publication",
-    "orderField": "title",
-    "queueMaxSize": "200",
-    "groupMaxSize": "100",
-    "maxChildren": "100",
-    "slidingWindowSize": "50",
-    "rootBuilder": [
-      "result",
-      "resultProject_outcome_isProducedBy",
-      "resultResult_publicationDataset_isRelatedTo",
-      "resultResult_similarity_isAmongTopNSimilarDocuments",
-      "resultResult_similarity_hasAmongTopNSimilarDocuments",
-      "resultOrganization_affiliation_isAffiliatedWith",
-      "resultResult_part_hasPart",
-      "resultResult_part_isPartOf",
-      "resultResult_supplement_isSupplementTo",
-      "resultResult_supplement_isSupplementedBy",
-      "resultResult_version_isVersionOf"
-    ],
-    "includeChildren": "true",
-    "maxIterations": 20,
-    "idPath": "$.id"
-  },
-  "pace": {
-    "clustering": [
-      {
-        "name": "wordsStatsSuffixPrefixChain",
-        "fields": [
-          "title"
-        ],
-        "params": {
-          "mod": "10"
-        }
-      },
-      {
-        "name": "lowercase",
-        "fields": [
-          "doi",
-          "altdoi"
-        ],
-        "params": {
-          "collapseOn:pid": "0"
-        }
-      }
-    ],
-    "decisionTree": {
-      "start": {
-        "fields": [
-          {
-            "field": "instance",
-            "comparator": "instanceTypeMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          }
-        ],
-        "threshold": 0.5,
-        "aggregation": "MAX",
-        "positive": "layer1",
-        "negative": "NO_MATCH",
-        "undefined": "layer1",
-        "ignoreUndefined": "true"
-      },
-      "layer1": {
-        "fields": [
-          {
-            "field": "pid",
-            "comparator": "jsonListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "jpath_value": "$.value",
-              "jpath_classid": "$.qualifier.classid"
-            }
-          },
-          {
-            "field": "pid",
-            "comparator": "jsonListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "jpath_value": "$.value",
-              "jpath_classid": "$.qualifier.classid",
-              "crossCompare": "alternateid"
-            }
-          }
-        ],
-        "threshold": 0.5,
-        "aggregation": "MAX",
-        "positive": "layer2",
-        "negative": "layer3",
-        "undefined": "layer3",
-        "ignoreUndefined": "true"
-      },
-      "layer2": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 0.9,
-        "aggregation": "AVG",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "true"
-      },
-      "layer3": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "titleVersionMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          },
-          {
-            "field": "authors",
-            "comparator": "sizeMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "AND",
-        "positive": "layer4",
-        "negative": "NO_MATCH",
-        "undefined": "layer4",
-        "ignoreUndefined": "false"
-      },
-      "layer4": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 0.99,
-        "aggregation": "AVG",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "true"
-      }
-    },
-    "model": [
-      {
-        "name": "doi",
-        "type": "String",
-        "path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "altdoi",
-        "type": "String",
-        "path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "pid",
-        "type": "JSON",
-        "path": "$.instance[*].pid[*]",
-        "overrideMatch": "true"
-      },
-      {
-        "name": "alternateid",
-        "type": "JSON",
-        "path": "$.instance[*].alternateIdentifier[*]",
-        "overrideMatch": "true"
-      },
-      {
-        "name": "title",
-        "type": "String",
-        "path": "$.title[?(@.qualifier.classid == 'main title')].value",
-        "length": 250,
-        "size": 5
-      },
-      {
-        "name": "authors",
-        "type": "List",
-        "path": "$.author[*].fullname",
-        "size": 200
-      },
-      {
-        "name": "resulttype",
-        "type": "String",
-        "path": "$.resulttype.classid"
-      },
-      {
-        "name": "instance",
-        "type": "List",
-        "path": "$.instance[*].instancetype.classname"
-      }
-    ],
-    "blacklists": {
-      "title": [
-        "(?i)^Data Management Plan",
-        "^Inside Front Cover$",
-        "(?i)^Poster presentations$",
-        "^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
-        "^Problems with perinatal pathology\\.?$",
-        "(?i)^Cases? of Puerperal Convulsions$",
-        "(?i)^Operative Gyna?ecology$",
-        "(?i)^Mind the gap\\!?\\:?$",
-        "^Chronic fatigue syndrome\\.?$",
-        "^Cartas? ao editor Letters? to the Editor$",
-        "^Note from the Editor$",
-        "^Anesthesia Abstract$",
-        "^Annual report$",
-        "(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$",
-        "(?i)^Graph and Table of Infectious Diseases?$",
-        "^Presentation$",
-        "(?i)^Reviews and Information on Publications$",
-        "(?i)^PUBLIC HEALTH SERVICES?$",
-        "(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
-        "(?i)^Adrese autora$",
-        "(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
-        "(?i)^Acknowledgement to Referees$",
-        "(?i)^Behçet's disease\\.?$",
-        "(?i)^Isolation and identification of restriction endonuclease.*$",
-        "(?i)^CEREBROVASCULAR DISEASES?.?$",
-        "(?i)^Screening for abdominal aortic aneurysms?\\.?$",
-        "^Event management$",
-        "(?i)^Breakfast and Crohn's disease.*\\.?$",
-        "^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$",
-        "(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$",
-        "^Gushi hakubutsugaku$",
-        "^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$",
-        "^Intestinal spirocha?etosis$",
-        "^Treatment of Rodent Ulcer$",
-        "(?i)^\\W*Cloud Computing\\W*$",
-        "^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
-        "^Free Communications, Poster Presentations: Session [A-F]$",
-        "^“The Historical Aspects? of Quackery\\.?”$",
-        "^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
-        "^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
-        "(?i)^Case Report$",
-        "^Boletín Informativo$",
-        "(?i)^Glioblastoma Multiforme$",
-        "(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
-        "^Zaměstnanecké výhody$",
-        "(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
-        "(?i)^Carotid body tumours?\\.?$",
-        "(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
-        "^Avant-propos$",
-        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
-        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
-        "(?i)^PUBLIC HEALTH VERSUS THE STATE$",
-        "^Viñetas de Cortázar$",
-        "(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$",
-        "(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$",
-        "(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
-        "(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
-        "(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
-        "^Aus der AGMB$",
-        "^Znanstveno-stručni prilozi$",
-        "(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
-        "(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
-        "(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
-        "^Finanční analýza podniku$",
-        "^Financial analysis( of business)?$",
-        "(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
-        "^Jikken nihon shūshinsho$",
-        "(?i)^CORONER('|s)(s|') INQUESTS$",
-        "(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
-        "(?i)^Consultants' contract(s)?$",
-        "(?i)^Upute autorima$",
-        "(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
-        "^Joshi shin kokubun$",
-        "^Kōtō shōgaku dokuhon nōson'yō$",
-        "^Jinjō shōgaku shōka$",
-        "^Shōgaku shūjichō$",
-        "^Nihon joshi dokuhon$",
-        "^Joshi shin dokuhon$",
-        "^Chūtō kanbun dokuhon$",
-        "^Wabun dokuhon$",
-        "(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
-        "(?i)^cardiac rehabilitation$",
-        "(?i)^Analytical summary$",
-        "^Thesaurus resolutionum Sacrae Congregationis Concilii$",
-        "(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
-        "^Prikazi i osvrti$",
-        "^Rodinný dům s provozovnou$",
-        "^Family house with an establishment$",
-        "^Shinsei chūtō shin kokugun$",
-        "^Pulmonary alveolar proteinosis(\\.?)$",
-        "^Shinshū kanbun$",
-        "^Viñeta(s?) de Rodríguez$",
-        "(?i)^RUBRIKA UREDNIKA$",
-        "^A Matching Model of the Academic Publication Market$",
-        "^Yōgaku kōyō$",
-        "^Internetový marketing$",
-        "^Internet marketing$",
-        "^Chūtō kokugo dokuhon$",
-        "^Kokugo dokuhon$",
-        "^Antibiotic Cover for Dental Extraction(s?)$",
-        "^Strategie podniku$",
-        "^Strategy of an Enterprise$",
-        "(?i)^respiratory disease(s?)(\\.?)$",
-        "^Award(s?) for Gallantry in Civil Defence$",
-        "^Podniková kultura$",
-        "^Corporate Culture$",
-        "^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$",
-        "^Pracovní motivace$",
-        "^Work Motivation$",
-        "^Kaitei kōtō jogaku dokuhon$",
-        "^Konsolidovaná účetní závěrka$",
-        "^Consolidated Financial Statements$",
-        "(?i)^intracranial tumour(s?)$",
-        "^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
-        "^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
-        "^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
-        "^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
-        "^Úroveň motivačního procesu jako způsobu vedení lidí$",
-        "^The level of motivation process as a leadership$",
-        "^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
-        "(?i)^news and events$",
-        "(?i)^NOVOSTI I DOGAĐAJI$",
-        "^Sansū no gakushū$",
-        "^Posouzení informačního systému firmy a návrh změn$",
-        "^Information System Assessment and Proposal for ICT Modification$",
-        "^Stresové zatížení pracovníků ve vybrané profesi$",
-        "^Stress load in a specific job$",
-        "^Sunday: Poster Sessions, Pt.*$",
-        "^Monday: Poster Sessions, Pt.*$",
-        "^Wednesday: Poster Sessions, Pt.*",
-        "^Tuesday: Poster Sessions, Pt.*$",
-        "^Analýza reklamy$",
-        "^Analysis of advertising$",
-        "^Shōgaku shūshinsho$",
-        "^Shōgaku sansū$",
-        "^Shintei joshi kokubun$",
-        "^Taishō joshi kokubun dokuhon$",
-        "^Joshi kokubun$",
-        "^Účetní uzávěrka a účetní závěrka v ČR$",
-        "(?i)^The \"?Causes\"? of Cancer$",
-        "^Normas para la publicación de artículos$",
-        "^Editor('|s)(s|') [Rr]eply$",
-        "^Editor(’|s)(s|’) letter$",
-        "^Redaktoriaus žodis$",
-        "^DISCUSSION ON THE PRECEDING PAPER$",
-        "^Kōtō shōgaku shūshinsho jidōyō$",
-        "^Shōgaku nihon rekishi$",
-        "^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
-        "^Préface$",
-        "^Occupational [Hh]ealth [Ss]ervices.$",
-        "^In Memoriam Professor Toshiyuki TAKESHIMA$",
-        "^Účetní závěrka ve vybraném podniku.*$",
-        "^Financial statements in selected company$",
-        "^Abdominal [Aa]ortic [Aa]neurysms.*$",
-        "^Pseudomyxoma peritonei$",
-        "^Kazalo autora$",
-        "(?i)^uvodna riječ$",
-        "^Motivace jako způsob vedení lidí$",
-        "^Motivation as a leadership$",
-        "^Polyfunkční dům$",
-        "^Multi\\-funkcional building$",
-        "^Podnikatelský plán$",
-        "(?i)^Podnikatelský záměr$",
-        "(?i)^Business Plan$",
-        "^Oceňování nemovitostí$",
-        "^Marketingová komunikace$",
-        "^Marketing communication$",
-        "^Sumario Analítico$",
-        "^Riječ uredništva$",
-        "^Savjetovanja i priredbe$",
-        "^Índice$",
-        "^(Starobosanski nadpisi).*$",
-        "^Vzdělávání pracovníků v organizaci$",
-        "^Staff training in organization$",
-        "^(Life Histories of North American Geometridae).*$",
-        "^Strategická analýza podniku$",
-        "^Strategic Analysis of an Enterprise$",
-        "^Sadržaj$",
-        "^Upute suradnicima$",
-        "^Rodinný dům$",
-        "(?i)^Fami(l)?ly house$",
-        "^Upute autorima$",
-        "^Strategic Analysis$",
-        "^Finanční analýza vybraného podniku$",
-        "^Finanční analýza$",
-        "^Riječ urednika$",
-        "(?i)^Content(s?)$",
-        "(?i)^Inhalt$",
-        "^Jinjō shōgaku shūshinsho jidōyō$",
-        "(?i)^Index$",
-        "^Chūgaku kokubun kyōkasho$",
-        "^Retrato de una mujer$",
-        "^Retrato de un hombre$",
-        "^Kōtō shōgaku dokuhon$",
-        "^Shotōka kokugo$",
-        "^Shōgaku dokuhon$",
-        "^Jinjō shōgaku kokugo dokuhon$",
-        "^Shinsei kokugo dokuhon$",
-        "^Teikoku dokuhon$",
-        "^Instructions to Authors$",
-        "^KİTAP TAHLİLİ$",
-        "^PRZEGLĄD PIŚMIENNICTWA$",
-        "(?i)^Presentación$",
-        "^İçindekiler$",
-        "(?i)^Tabl?e of contents$",
-        "^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
-        "^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
-        "^Editorial( Board)?$",
-        "(?i)^Editorial \\(English\\)$",
-        "^Editörden$",
-        "^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
-        "^(Kiri Karl Morgensternile).*$",
-        "^(\\[Eksliibris Aleksandr).*\\]$",
-        "^(\\[Eksliibris Aleksandr).*$",
-        "^(Eksliibris Aleksandr).*$",
-        "^(Kiri A\\. de Vignolles).*$",
-        "^(2 kirja Karl Morgensternile).*$",
-        "^(Pirita kloostri idaosa arheoloogilised).*$",
-        "^(Kiri tundmatule).*$",
-        "^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
-        "^(Eksliibris Nikolai Birukovile).*$",
-        "^(Eksliibris Nikolai Issakovile).*$",
-        "^(WHP Cruise Summary Information of section).*$",
-        "^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
-        "^(Measurement of the spin\\-dependent structure function).*",
-        "(?i)^.*authors['’′]? reply\\.?$",
-        "(?i)^.*authors['’′]? response\\.?$",
-        "^Data [mM]anagement [sS]ervices\\.$",
-        "Research and Advanced Technology for Digital Libraries"
-      ]
-    },
-    "synonyms": {}
-  }
-}
--- a/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/pub.new.tree.conf.json
+++ b/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/pub.new.tree.conf.json
@ -1,465 +0,0 @@
-{
-  "wf": {
-    "threshold": "0.99",
-    "dedupRun": "001",
-    "entityType": "result",
-    "subEntityType": "resulttype",
-    "subEntityValue": "publication",
-    "orderField": "title",
-    "queueMaxSize": "200",
-    "groupMaxSize": "100",
-    "maxChildren": "100",
-    "slidingWindowSize": "50",
-    "rootBuilder": [
-      "result",
-      "resultProject_outcome_isProducedBy",
-      "resultResult_publicationDataset_isRelatedTo",
-      "resultResult_similarity_isAmongTopNSimilarDocuments",
-      "resultResult_similarity_hasAmongTopNSimilarDocuments",
-      "resultOrganization_affiliation_isAffiliatedWith",
-      "resultResult_part_hasPart",
-      "resultResult_part_isPartOf",
-      "resultResult_supplement_isSupplementTo",
-      "resultResult_supplement_isSupplementedBy",
-      "resultResult_version_isVersionOf"
-    ],
-    "includeChildren": "true",
-    "maxIterations": 20,
-    "idPath": "$.id"
-  },
-  "pace": {
-    "clustering": [
-      {
-        "name": "wordsStatsSuffixPrefixChain",
-        "fields": [
-          "title"
-        ],
-        "params": {
-          "mod": "10"
-        }
-      },
-      {
-        "name": "lowercase",
-        "fields": [
-          "doi",
-          "altdoi"
-        ],
-        "params": {
-          "collapseOn:pid": "0"
-        }
-      }
-    ],
-    "decisionTree": {
-      "start": {
-        "fields": [
-          {
-            "field": "pid",
-            "comparator": "jsonListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "jpath_value": "$.value",
-              "jpath_classid": "$.qualifier.classid",
-              "mode": "count"
-            }
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "MAX",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "instanceTypeCheck",
-        "ignoreUndefined": "false"
-      },
-      "instanceTypeCheck": {
-        "fields": [
-          {
-            "field": "instance",
-            "comparator": "instanceTypeMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          }
-        ],
-        "threshold": 0.5,
-        "aggregation": "MAX",
-        "positive": "pidVSaltid",
-        "negative": "NO_MATCH",
-        "undefined": "pidVSaltid",
-        "ignoreUndefined": "true"
-      },
-      "pidVSaltid": {
-        "fields": [
-          {
-            "field": "pid",
-            "comparator": "jsonListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "jpath_value": "$.value",
-              "jpath_classid": "$.qualifier.classid",
-              "crossCompare": "alternateid",
-              "mode": "count"
-            }
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "MAX",
-        "positive": "softCheck",
-        "negative": "earlyExits",
-        "undefined": "earlyExits",
-        "ignoreUndefined": "true"
-      },
-      "softCheck": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 0.9,
-        "aggregation": "AVG",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "true"
-      },
-      "earlyExits": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "titleVersionMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          },
-          {
-            "field": "authors",
-            "comparator": "sizeMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          },
-          {
-            "field": "authors",
-            "comparator": "authorsMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "surname_th": 0.99,
-              "fullname_th": 0.95,
-              "mode": "surname"
-            }
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "AND",
-        "positive": "strongCheck",
-        "negative": "NO_MATCH",
-        "undefined": "strongCheck",
-        "ignoreUndefined": "false"
-      },
-      "strongCheck": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 0.99,
-        "aggregation": "AVG",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "true"
-      }
-    },
-    "model": [
-      {
-        "name": "doi",
-        "type": "String",
-        "path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "altdoi",
-        "type": "String",
-        "path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "pid",
-        "type": "JSON",
-        "path": "$.instance[*].pid[*]",
-        "overrideMatch": "true"
-      },
-      {
-        "name": "alternateid",
-        "type": "JSON",
-        "path": "$.instance[*].alternateIdentifier[*]",
-        "overrideMatch": "true"
-      },
-      {
-        "name": "title",
-        "type": "String",
-        "path": "$.title[?(@.qualifier.classid == 'main title')].value",
-        "length": 250,
-        "size": 5
-      },
-      {
-        "name": "authors",
-        "type": "List",
-        "path": "$.author[*].fullname",
-        "size": 200
-      },
-      {
-        "name": "resulttype",
-        "type": "String",
-        "path": "$.resulttype.classid"
-      },
-      {
-        "name": "instance",
-        "type": "List",
-        "path": "$.instance[*].instancetype.classname"
-      }
-    ],
-    "blacklists": {
-      "title": [
-        "(?i)^Data Management Plan",
-        "^Inside Front Cover$",
-        "(?i)^Poster presentations$",
-        "^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
-        "^Problems with perinatal pathology\\.?$",
-        "(?i)^Cases? of Puerperal Convulsions$",
-        "(?i)^Operative Gyna?ecology$",
-        "(?i)^Mind the gap\\!?\\:?$",
-        "^Chronic fatigue syndrome\\.?$",
-        "^Cartas? ao editor Letters? to the Editor$",
-        "^Note from the Editor$",
-        "^Anesthesia Abstract$",
-        "^Annual report$",
-        "(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$",
-        "(?i)^Graph and Table of Infectious Diseases?$",
-        "^Presentation$",
-        "(?i)^Reviews and Information on Publications$",
-        "(?i)^PUBLIC HEALTH SERVICES?$",
-        "(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
-        "(?i)^Adrese autora$",
-        "(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
-        "(?i)^Acknowledgement to Referees$",
-        "(?i)^Behçet's disease\\.?$",
-        "(?i)^Isolation and identification of restriction endonuclease.*$",
-        "(?i)^CEREBROVASCULAR DISEASES?.?$",
-        "(?i)^Screening for abdominal aortic aneurysms?\\.?$",
-        "^Event management$",
-        "(?i)^Breakfast and Crohn's disease.*\\.?$",
-        "^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$",
-        "(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$",
-        "^Gushi hakubutsugaku$",
-        "^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$",
-        "^Intestinal spirocha?etosis$",
-        "^Treatment of Rodent Ulcer$",
-        "(?i)^\\W*Cloud Computing\\W*$",
-        "^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
-        "^Free Communications, Poster Presentations: Session [A-F]$",
-        "^“The Historical Aspects? of Quackery\\.?”$",
-        "^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
-        "^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
-        "(?i)^Case Report$",
-        "^Boletín Informativo$",
-        "(?i)^Glioblastoma Multiforme$",
-        "(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
-        "^Zaměstnanecké výhody$",
-        "(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
-        "(?i)^Carotid body tumours?\\.?$",
-        "(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
-        "^Avant-propos$",
-        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
-        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
-        "(?i)^PUBLIC HEALTH VERSUS THE STATE$",
-        "^Viñetas de Cortázar$",
-        "(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$",
-        "(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$",
-        "(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
-        "(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
-        "(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
-        "^Aus der AGMB$",
-        "^Znanstveno-stručni prilozi$",
-        "(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
-        "(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
-        "(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
-        "^Finanční analýza podniku$",
-        "^Financial analysis( of business)?$",
-        "(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
-        "^Jikken nihon shūshinsho$",
-        "(?i)^CORONER('|s)(s|') INQUESTS$",
-        "(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
-        "(?i)^Consultants' contract(s)?$",
-        "(?i)^Upute autorima$",
-        "(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
-        "^Joshi shin kokubun$",
-        "^Kōtō shōgaku dokuhon nōson'yō$",
-        "^Jinjō shōgaku shōka$",
-        "^Shōgaku shūjichō$",
-        "^Nihon joshi dokuhon$",
-        "^Joshi shin dokuhon$",
-        "^Chūtō kanbun dokuhon$",
-        "^Wabun dokuhon$",
-        "(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
-        "(?i)^cardiac rehabilitation$",
-        "(?i)^Analytical summary$",
-        "^Thesaurus resolutionum Sacrae Congregationis Concilii$",
-        "(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
-        "^Prikazi i osvrti$",
-        "^Rodinný dům s provozovnou$",
-        "^Family house with an establishment$",
-        "^Shinsei chūtō shin kokugun$",
-        "^Pulmonary alveolar proteinosis(\\.?)$",
-        "^Shinshū kanbun$",
-        "^Viñeta(s?) de Rodríguez$",
-        "(?i)^RUBRIKA UREDNIKA$",
-        "^A Matching Model of the Academic Publication Market$",
-        "^Yōgaku kōyō$",
-        "^Internetový marketing$",
-        "^Internet marketing$",
-        "^Chūtō kokugo dokuhon$",
-        "^Kokugo dokuhon$",
-        "^Antibiotic Cover for Dental Extraction(s?)$",
-        "^Strategie podniku$",
-        "^Strategy of an Enterprise$",
-        "(?i)^respiratory disease(s?)(\\.?)$",
-        "^Award(s?) for Gallantry in Civil Defence$",
-        "^Podniková kultura$",
-        "^Corporate Culture$",
-        "^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$",
-        "^Pracovní motivace$",
-        "^Work Motivation$",
-        "^Kaitei kōtō jogaku dokuhon$",
-        "^Konsolidovaná účetní závěrka$",
-        "^Consolidated Financial Statements$",
-        "(?i)^intracranial tumour(s?)$",
-        "^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
-        "^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
-        "^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
-        "^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
-        "^Úroveň motivačního procesu jako způsobu vedení lidí$",
-        "^The level of motivation process as a leadership$",
-        "^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
-        "(?i)^news and events$",
-        "(?i)^NOVOSTI I DOGAĐAJI$",
-        "^Sansū no gakushū$",
-        "^Posouzení informačního systému firmy a návrh změn$",
-        "^Information System Assessment and Proposal for ICT Modification$",
-        "^Stresové zatížení pracovníků ve vybrané profesi$",
-        "^Stress load in a specific job$",
-        "^Sunday: Poster Sessions, Pt.*$",
-        "^Monday: Poster Sessions, Pt.*$",
-        "^Wednesday: Poster Sessions, Pt.*",
-        "^Tuesday: Poster Sessions, Pt.*$",
-        "^Analýza reklamy$",
-        "^Analysis of advertising$",
-        "^Shōgaku shūshinsho$",
-        "^Shōgaku sansū$",
-        "^Shintei joshi kokubun$",
-        "^Taishō joshi kokubun dokuhon$",
-        "^Joshi kokubun$",
-        "^Účetní uzávěrka a účetní závěrka v ČR$",
-        "(?i)^The \"?Causes\"? of Cancer$",
-        "^Normas para la publicación de artículos$",
-        "^Editor('|s)(s|') [Rr]eply$",
-        "^Editor(’|s)(s|’) letter$",
-        "^Redaktoriaus žodis$",
-        "^DISCUSSION ON THE PRECEDING PAPER$",
-        "^Kōtō shōgaku shūshinsho jidōyō$",
-        "^Shōgaku nihon rekishi$",
-        "^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
-        "^Préface$",
-        "^Occupational [Hh]ealth [Ss]ervices.$",
-        "^In Memoriam Professor Toshiyuki TAKESHIMA$",
-        "^Účetní závěrka ve vybraném podniku.*$",
-        "^Financial statements in selected company$",
-        "^Abdominal [Aa]ortic [Aa]neurysms.*$",
-        "^Pseudomyxoma peritonei$",
-        "^Kazalo autora$",
-        "(?i)^uvodna riječ$",
-        "^Motivace jako způsob vedení lidí$",
-        "^Motivation as a leadership$",
-        "^Polyfunkční dům$",
-        "^Multi\\-funkcional building$",
-        "^Podnikatelský plán$",
-        "(?i)^Podnikatelský záměr$",
-        "(?i)^Business Plan$",
-        "^Oceňování nemovitostí$",
-        "^Marketingová komunikace$",
-        "^Marketing communication$",
-        "^Sumario Analítico$",
-        "^Riječ uredništva$",
-        "^Savjetovanja i priredbe$",
-        "^Índice$",
-        "^(Starobosanski nadpisi).*$",
-        "^Vzdělávání pracovníků v organizaci$",
-        "^Staff training in organization$",
-        "^(Life Histories of North American Geometridae).*$",
-        "^Strategická analýza podniku$",
-        "^Strategic Analysis of an Enterprise$",
-        "^Sadržaj$",
-        "^Upute suradnicima$",
-        "^Rodinný dům$",
-        "(?i)^Fami(l)?ly house$",
-        "^Upute autorima$",
-        "^Strategic Analysis$",
-        "^Finanční analýza vybraného podniku$",
-        "^Finanční analýza$",
-        "^Riječ urednika$",
-        "(?i)^Content(s?)$",
-        "(?i)^Inhalt$",
-        "^Jinjō shōgaku shūshinsho jidōyō$",
-        "(?i)^Index$",
-        "^Chūgaku kokubun kyōkasho$",
-        "^Retrato de una mujer$",
-        "^Retrato de un hombre$",
-        "^Kōtō shōgaku dokuhon$",
-        "^Shotōka kokugo$",
-        "^Shōgaku dokuhon$",
-        "^Jinjō shōgaku kokugo dokuhon$",
-        "^Shinsei kokugo dokuhon$",
-        "^Teikoku dokuhon$",
-        "^Instructions to Authors$",
-        "^KİTAP TAHLİLİ$",
-        "^PRZEGLĄD PIŚMIENNICTWA$",
-        "(?i)^Presentación$",
-        "^İçindekiler$",
-        "(?i)^Tabl?e of contents$",
-        "^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
-        "^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
-        "^Editorial( Board)?$",
-        "(?i)^Editorial \\(English\\)$",
-        "^Editörden$",
-        "^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
-        "^(Kiri Karl Morgensternile).*$",
-        "^(\\[Eksliibris Aleksandr).*\\]$",
-        "^(\\[Eksliibris Aleksandr).*$",
-        "^(Eksliibris Aleksandr).*$",
-        "^(Kiri A\\. de Vignolles).*$",
-        "^(2 kirja Karl Morgensternile).*$",
-        "^(Pirita kloostri idaosa arheoloogilised).*$",
-        "^(Kiri tundmatule).*$",
-        "^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
-        "^(Eksliibris Nikolai Birukovile).*$",
-        "^(Eksliibris Nikolai Issakovile).*$",
-        "^(WHP Cruise Summary Information of section).*$",
-        "^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
-        "^(Measurement of the spin\\-dependent structure function).*",
-        "(?i)^.*authors['’′]? reply\\.?$",
-        "(?i)^.*authors['’′]? response\\.?$",
-        "^Data [mM]anagement [sS]ervices\\.$",
-        "Research and Advanced Technology for Digital Libraries"
-      ]
-    },
-    "synonyms": {}
-  }
-}
--- a/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/pub.prod.conf.json
+++ b/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/pub.prod.conf.json
@ -1,402 +0,0 @@
-
-{
-  "wf": {
-    "threshold": "0.99",
-    "dedupRun": "001",
-    "entityType": "result",
-    "subEntityType": "resulttype",
-    "subEntityValue": "publication",
-    "orderField": "title",
-    "queueMaxSize": "200",
-    "groupMaxSize": "100",
-    "maxChildren": "100",
-    "slidingWindowSize": "50",
-    "rootBuilder": [
-      "result",
-      "resultProject_outcome_isProducedBy",
-      "resultResult_publicationDataset_isRelatedTo",
-      "resultResult_similarity_isAmongTopNSimilarDocuments",
-      "resultResult_similarity_hasAmongTopNSimilarDocuments",
-      "resultOrganization_affiliation_isAffiliatedWith",
-      "resultResult_part_hasPart",
-      "resultResult_part_isPartOf",
-      "resultResult_supplement_isSupplementTo",
-      "resultResult_supplement_isSupplementedBy",
-      "resultResult_version_isVersionOf"
-    ],
-    "includeChildren": "true",
-    "maxIterations": 20,
-    "idPath": "$.id"
-  },
-  "pace": {
-    "clustering" : [
-      { "name" : "wordsStatsSuffixPrefixChain", "fields" : [ "title" ], "params" : { "mod" : "10" } },
-      { "name" : "lowercase", "fields" : [ "doi", "altdoi" ], "params" : { "collapseOn:pid": "0"} }
-    ],
-    "decisionTree": {
-      "start": {
-        "fields": [
-          {
-            "field": "pid",
-            "comparator": "jsonListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "jpath_value": "$.value",
-              "jpath_classid": "$.qualifier.classid"
-            }
-          },
-          {
-            "field": "pid",
-            "comparator": "jsonListMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {
-              "jpath_value": "$.value",
-              "jpath_classid": "$.qualifier.classid",
-              "crossCompare": "alternateid"
-            }
-          }
-        ],
-        "threshold": 0.5,
-        "aggregation": "MAX",
-        "positive": "layer1",
-        "negative": "layer2",
-        "undefined": "layer2",
-        "ignoreUndefined": "true"
-      },
-      "layer1": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 0.9,
-        "aggregation": "AVG",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "true"
-      },
-      "layer2": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "titleVersionMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          },
-          {
-            "field": "authors",
-            "comparator": "sizeMatch",
-            "weight": 1.0,
-            "countIfUndefined": "false",
-            "params": {}
-          }
-        ],
-        "threshold": 1.0,
-        "aggregation": "AND",
-        "positive": "layer3",
-        "negative": "NO_MATCH",
-        "undefined": "layer3",
-        "ignoreUndefined": "false"
-      },
-      "layer3": {
-        "fields": [
-          {
-            "field": "title",
-            "comparator": "levensteinTitle",
-            "weight": 1.0,
-            "countIfUndefined": "true",
-            "params": {}
-          }
-        ],
-        "threshold": 0.99,
-        "aggregation": "AVG",
-        "positive": "MATCH",
-        "negative": "NO_MATCH",
-        "undefined": "NO_MATCH",
-        "ignoreUndefined": "true"
-      }
-    },
-    "model": [
-      {
-        "name": "doi",
-        "type": "String",
-        "path": "$.instance.pid[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "altdoi",
-        "type": "String",
-        "path": "$.instance.alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
-      },
-      {
-        "name": "pid",
-        "type": "JSON",
-        "path": "$.instance.pid",
-        "overrideMatch": "true"
-      },
-      {
-        "name": "alternateid",
-        "type": "JSON",
-        "path": "$.instance.alternateIdentifier",
-        "overrideMatch": "true"
-      },
-      {
-        "name": "title",
-        "type": "String",
-        "path": "$.title[?(@.qualifier.classid == 'main title')].value",
-        "length": 250,
-        "size": 5
-      },
-      {
-        "name": "authors",
-        "type": "List",
-        "path": "$.author[*].fullname",
-        "size": 200
-      },
-      {
-        "name": "resulttype",
-        "type": "String",
-        "path": "$.resulttype.classid"
-      }
-    ],
-    "blacklists": {
-      "title": [
-        "(?i)^Data Management Plan",
-        "^Inside Front Cover$",
-        "(?i)^Poster presentations$",
-        "^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
-        "^Problems with perinatal pathology\\.?$",
-        "(?i)^Cases? of Puerperal Convulsions$",
-        "(?i)^Operative Gyna?ecology$",
-        "(?i)^Mind the gap\\!?\\:?$",
-        "^Chronic fatigue syndrome\\.?$",
-        "^Cartas? ao editor Letters? to the Editor$",
-        "^Note from the Editor$",
-        "^Anesthesia Abstract$",
-        "^Annual report$",
-        "(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$",
-        "(?i)^Graph and Table of Infectious Diseases?$",
-        "^Presentation$",
-        "(?i)^Reviews and Information on Publications$",
-        "(?i)^PUBLIC HEALTH SERVICES?$",
-        "(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
-        "(?i)^Adrese autora$",
-        "(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
-        "(?i)^Acknowledgement to Referees$",
-        "(?i)^Behçet's disease\\.?$",
-        "(?i)^Isolation and identification of restriction endonuclease.*$",
-        "(?i)^CEREBROVASCULAR DISEASES?.?$",
-        "(?i)^Screening for abdominal aortic aneurysms?\\.?$",
-        "^Event management$",
-        "(?i)^Breakfast and Crohn's disease.*\\.?$",
-        "^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$",
-        "(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$",
-        "^Gushi hakubutsugaku$",
-        "^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$",
-        "^Intestinal spirocha?etosis$",
-        "^Treatment of Rodent Ulcer$",
-        "(?i)^\\W*Cloud Computing\\W*$",
-        "^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
-        "^Free Communications, Poster Presentations: Session [A-F]$",
-        "^“The Historical Aspects? of Quackery\\.?”$",
-        "^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
-        "^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
-        "(?i)^Case Report$",
-        "^Boletín Informativo$",
-        "(?i)^Glioblastoma Multiforme$",
-        "(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
-        "^Zaměstnanecké výhody$",
-        "(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
-        "(?i)^Carotid body tumours?\\.?$",
-        "(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
-        "^Avant-propos$",
-        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
-        "(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
-        "(?i)^PUBLIC HEALTH VERSUS THE STATE$",
-        "^Viñetas de Cortázar$",
-        "(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$",
-        "(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$",
-        "(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
-        "(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
-        "(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
-        "^Aus der AGMB$",
-        "^Znanstveno-stručni prilozi$",
-        "(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
-        "(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
-        "(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
-        "^Finanční analýza podniku$",
-        "^Financial analysis( of business)?$",
-        "(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
-        "^Jikken nihon shūshinsho$",
-        "(?i)^CORONER('|s)(s|') INQUESTS$",
-        "(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
-        "(?i)^Consultants' contract(s)?$",
-        "(?i)^Upute autorima$",
-        "(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
-        "^Joshi shin kokubun$",
-        "^Kōtō shōgaku dokuhon nōson'yō$",
-        "^Jinjō shōgaku shōka$",
-        "^Shōgaku shūjichō$",
-        "^Nihon joshi dokuhon$",
-        "^Joshi shin dokuhon$",
-        "^Chūtō kanbun dokuhon$",
-        "^Wabun dokuhon$",
-        "(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
-        "(?i)^cardiac rehabilitation$",
-        "(?i)^Analytical summary$",
-        "^Thesaurus resolutionum Sacrae Congregationis Concilii$",
-        "(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
-        "^Prikazi i osvrti$",
-        "^Rodinný dům s provozovnou$",
-        "^Family house with an establishment$",
-        "^Shinsei chūtō shin kokugun$",
-        "^Pulmonary alveolar proteinosis(\\.?)$",
-        "^Shinshū kanbun$",
-        "^Viñeta(s?) de Rodríguez$",
-        "(?i)^RUBRIKA UREDNIKA$",
-        "^A Matching Model of the Academic Publication Market$",
-        "^Yōgaku kōyō$",
-        "^Internetový marketing$",
-        "^Internet marketing$",
-        "^Chūtō kokugo dokuhon$",
-        "^Kokugo dokuhon$",
-        "^Antibiotic Cover for Dental Extraction(s?)$",
-        "^Strategie podniku$",
-        "^Strategy of an Enterprise$",
-        "(?i)^respiratory disease(s?)(\\.?)$",
-        "^Award(s?) for Gallantry in Civil Defence$",
-        "^Podniková kultura$",
-        "^Corporate Culture$",
-        "^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$",
-        "^Pracovní motivace$",
-        "^Work Motivation$",
-        "^Kaitei kōtō jogaku dokuhon$",
-        "^Konsolidovaná účetní závěrka$",
-        "^Consolidated Financial Statements$",
-        "(?i)^intracranial tumour(s?)$",
-        "^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
-        "^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
-        "^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
-        "^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
-        "^Úroveň motivačního procesu jako způsobu vedení lidí$",
-        "^The level of motivation process as a leadership$",
-        "^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
-        "(?i)^news and events$",
-        "(?i)^NOVOSTI I DOGAĐAJI$",
-        "^Sansū no gakushū$",
-        "^Posouzení informačního systému firmy a návrh změn$",
-        "^Information System Assessment and Proposal for ICT Modification$",
-        "^Stresové zatížení pracovníků ve vybrané profesi$",
-        "^Stress load in a specific job$",
-        "^Sunday: Poster Sessions, Pt.*$",
-        "^Monday: Poster Sessions, Pt.*$",
-        "^Wednesday: Poster Sessions, Pt.*",
-        "^Tuesday: Poster Sessions, Pt.*$",
-        "^Analýza reklamy$",
-        "^Analysis of advertising$",
-        "^Shōgaku shūshinsho$",
-        "^Shōgaku sansū$",
-        "^Shintei joshi kokubun$",
-        "^Taishō joshi kokubun dokuhon$",
-        "^Joshi kokubun$",
-        "^Účetní uzávěrka a účetní závěrka v ČR$",
-        "(?i)^The \"?Causes\"? of Cancer$",
-        "^Normas para la publicación de artículos$",
-        "^Editor('|s)(s|') [Rr]eply$",
-        "^Editor(’|s)(s|’) letter$",
-        "^Redaktoriaus žodis$",
-        "^DISCUSSION ON THE PRECEDING PAPER$",
-        "^Kōtō shōgaku shūshinsho jidōyō$",
-        "^Shōgaku nihon rekishi$",
-        "^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
-        "^Préface$",
-        "^Occupational [Hh]ealth [Ss]ervices.$",
-        "^In Memoriam Professor Toshiyuki TAKESHIMA$",
-        "^Účetní závěrka ve vybraném podniku.*$",
-        "^Financial statements in selected company$",
-        "^Abdominal [Aa]ortic [Aa]neurysms.*$",
-        "^Pseudomyxoma peritonei$",
-        "^Kazalo autora$",
-        "(?i)^uvodna riječ$",
-        "^Motivace jako způsob vedení lidí$",
-        "^Motivation as a leadership$",
-        "^Polyfunkční dům$",
-        "^Multi\\-funkcional building$",
-        "^Podnikatelský plán$",
-        "(?i)^Podnikatelský záměr$",
-        "(?i)^Business Plan$",
-        "^Oceňování nemovitostí$",
-        "^Marketingová komunikace$",
-        "^Marketing communication$",
-        "^Sumario Analítico$",
-        "^Riječ uredništva$",
-        "^Savjetovanja i priredbe$",
-        "^Índice$",
-        "^(Starobosanski nadpisi).*$",
-        "^Vzdělávání pracovníků v organizaci$",
-        "^Staff training in organization$",
-        "^(Life Histories of North American Geometridae).*$",
-        "^Strategická analýza podniku$",
-        "^Strategic Analysis of an Enterprise$",
-        "^Sadržaj$",
-        "^Upute suradnicima$",
-        "^Rodinný dům$",
-        "(?i)^Fami(l)?ly house$",
-        "^Upute autorima$",
-        "^Strategic Analysis$",
-        "^Finanční analýza vybraného podniku$",
-        "^Finanční analýza$",
-        "^Riječ urednika$",
-        "(?i)^Content(s?)$",
-        "(?i)^Inhalt$",
-        "^Jinjō shōgaku shūshinsho jidōyō$",
-        "(?i)^Index$",
-        "^Chūgaku kokubun kyōkasho$",
-        "^Retrato de una mujer$",
-        "^Retrato de un hombre$",
-        "^Kōtō shōgaku dokuhon$",
-        "^Shotōka kokugo$",
-        "^Shōgaku dokuhon$",
-        "^Jinjō shōgaku kokugo dokuhon$",
-        "^Shinsei kokugo dokuhon$",
-        "^Teikoku dokuhon$",
-        "^Instructions to Authors$",
-        "^KİTAP TAHLİLİ$",
-        "^PRZEGLĄD PIŚMIENNICTWA$",
-        "(?i)^Presentación$",
-        "^İçindekiler$",
-        "(?i)^Tabl?e of contents$",
-        "^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
-        "^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
-        "^Editorial( Board)?$",
-        "(?i)^Editorial \\(English\\)$",
-        "^Editörden$",
-        "^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
-        "^(Kiri Karl Morgensternile).*$",
-        "^(\\[Eksliibris Aleksandr).*\\]$",
-        "^(\\[Eksliibris Aleksandr).*$",
-        "^(Eksliibris Aleksandr).*$",
-        "^(Kiri A\\. de Vignolles).*$",
-        "^(2 kirja Karl Morgensternile).*$",
-        "^(Pirita kloostri idaosa arheoloogilised).*$",
-        "^(Kiri tundmatule).*$",
-        "^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
-        "^(Eksliibris Nikolai Birukovile).*$",
-        "^(Eksliibris Nikolai Issakovile).*$",
-        "^(WHP Cruise Summary Information of section).*$",
-        "^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
-        "^(Measurement of the spin\\-dependent structure function).*",
-        "(?i)^.*authors['’′]? reply\\.?$",
-        "(?i)^.*authors['’′]? response\\.?$"
-      ]
-    },
-    "synonyms": {}
-  }
-}
--- a/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/publication.example.json
+++ b/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/publication.example.json
@ -1 +0,0 @@
-{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "pid": [], "oaiprovenance": {"originDescription": {"metadataNamespace": "", "harvestDate": "2021-06-10T10:03:36.091Z", "baseURL": "file%3A%2F%2F%2Fvar%2Flib%2Fdnet%2Fdata%2Fsygma%2Fnew_ingestion%2Fcrossref", "datestamp": "", "altered": true, "identifier": ""}}, "relevantdate": [], "contributor": [], "id": "50|sygma_______::3bbb03e6ec8df0d219b2d2165ea1d446", "subject": [], "lastupdatetimestamp": 1628684944004, "author": [{"surname": "Pan", "fullname": "Pan, Mengwu", "pid": [], "name": "Mengwu", "rank": 1}, {"surname": "Blattner", "fullname": "Blattner, Christine", "pid": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "orcid_pending", "classname": "Open Researcher and Contributor ID", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "0000-0002-7250-5273"}], "name": "Christine", "rank": 2}], "collectedfrom": [{"value": "Sygma", "key": "10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9"}], "instance": [{"refereed": {"classid": "UNKNOWN", "classname": "Unknown", "schemename": "dnet:review_levels", "schemeid": "dnet:review_levels"}, "hostedby": {"value": "Cancers", "key": "10|issn__online::69ba871b903253074dcf4054e619afff"}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "value": "https://creativecommons.org/licenses/by/4.0/"}, "url": ["http://dx.doi.org/10.3390/cancers13040745"], "pid": [], "distributionlocation": "", "alternateIdentifier": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "10.3390/cancers13040745"}], "collectedfrom": {"value": "Sygma", "key": "10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemename": "dnet:publication_resource", "schemeid": "dnet:publication_resource"}}], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "dateofcollection": "2021-06-10T10:03:36.091Z", "fulltext": [], "dateoftransformation": "2021-07-20T16:59:21.682Z", "description": [], "format": [], "journal": {"issnPrinted": "", "vol": "13", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "name": "Cancers", "iss": "4", "sp": "745", "edition": "", "issnOnline": "2072-6694", "ep": "", "issnLinking": ""}, "coverage": [], "externalReference": [], "language": {"classid": "eng", "classname": "English", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [], "extraInfo": [], "originalId": ["10.3390/cancers13040745", "50|sygma_______::3bbb03e6ec8df0d219b2d2165ea1d446"], "source": [], "context": [], "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Regulation of p53 by E3s"}]}
--- a/pom.xml
+++ b/pom.xml
@ -5,7 +5,7 @@

    <groupId>eu.dnetlib</groupId>
    <artifactId>dnet-dedup</artifactId>
-    <version>4.1.13-SNAPSHOT</version>
+    <version>4.1.7</version>

    <packaging>pom</packaging>

@ -22,7 +22,7 @@

    <scm>
        <developerConnection>scm:git:https://code-repo.d4science.org/D-Net/dnet-dedup.git</developerConnection>
-        <tag>dnet-dedup-4.0.3</tag>
+        <tag>dnet-dedup-4.1.7</tag>
    </scm>

    <modules>
@ -144,7 +144,14 @@
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-surefire-plugin</artifactId>
-                    <version>2.22.0</version>
+                    <version>2.19.1</version>
+                    <dependencies>
+                        <dependency>
+                            <groupId>org.junit.jupiter</groupId>
+                            <artifactId>junit-jupiter</artifactId>
+                            <version>${junit-jupiter.version}</version>
+                        </dependency>
+                    </dependencies>
                    <configuration>
                        <redirectTestOutputToFile>false</redirectTestOutputToFile>
                    </configuration>
@ -221,8 +228,7 @@
        <google.guava.version>15.0</google.guava.version>

        <spark.version>2.2.0</spark.version>
-        <!--<jackson.version>2.9.6</jackson.version>-->
-        <jackson.version>2.6.5</jackson.version>
+        <jackson.version>2.9.6</jackson.version>
        <mockito-core.version>3.3.3</mockito-core.version>

        <commons.lang.version>3.5</commons.lang.version>
@ -254,7 +260,7 @@
        <oozie.use.system.libpath>true</oozie.use.system.libpath>
        <properties.maven.plugin.version>2.0.1</properties.maven.plugin.version>
        <junit-jupiter.version>5.6.1</junit-jupiter.version>
-        <maven.dependency.eu.dnetlib.dhp.dhp-build-assembly-resources.jar.path>../dhp-build/dhp-build-assembly-resources/target/dhp-build-assembly-resources-${project.version}.jar</maven.dependency.eu.dnetlib.dhp.dhp-build-assembly-resources.jar.path>
+        <maven.dependency.eu.dnetlib.dhp.dhp-build-assembly-resources.jar.path>../dhp-build/dhp-build-assembly-resources/target/dhp-build-assembly-resources-4.0.6-SNAPSHOT.jar</maven.dependency.eu.dnetlib.dhp.dhp-build-assembly-resources.jar.path>

    </properties>

@ -403,11 +409,7 @@
                <version>2.4.0</version>
            </dependency>

-            <dependency>
-                <groupId>com.ibm.icu</groupId>
-                <artifactId>icu4j</artifactId>
-                <version>70.1</version>
-            </dependency>
+

        </dependencies>

--- a/release.properties
+++ b/release.properties
@ -0,0 +1,11 @@
+#release configuration
+#Tue Sep 29 12:04:49 CEST 2020
+scm.tagNameFormat=@{project.artifactId}-@{project.version}
+pushChanges=true
+scm.url=scm\:git\:https\://code-repo.d4science.org/D-Net/dnet-dedup.git
+preparationGoals=clean verify
+projectVersionPolicyId=default
+remoteTagging=true
+scm.commentPrefix=[maven-release-plugin] 
+exec.snapshotReleasePluginAllowed=false
+completedPhase=check-poms
				`@ -1 +0,0 @@`
				{"fullname":"Zaragoza, Maria Cleofé","firstname":"Maria Cleofé","lastname":"Zaragoza","coAuthors":[{"fullname":"Cambras, Trinitat","lastname":"Cambras","firstname":"Trinitat","orcid":"0000-0002-9009-4690"},{"fullname":"Castro-Marrero, Jesús","lastname":"Castro-Marrero","firstname":"Jesús","orcid":""},{"fullname":"Díez-Noguera, Antoni","lastname":"Díez-Noguera","firstname":"Antoni","orcid":""},{"fullname":"Alegre, José","lastname":"Alegre","firstname":"José","orcid":"0000-0002-7582-7585"}],"topics":[0.9522090839562252,0.04779091604377485],"orcid":"0000-0002-9797-0219","id":"author::1a10826c83c7f9f0dcebe7df05e37a2a","pubId":"50\|pmid________::db7fd19db5a620eafad40cfb97f9690d"}
				`@ -1 +0,0 @@`
				{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "pid": [], "oaiprovenance": {"originDescription": {"metadataNamespace": "", "harvestDate": "2021-06-10T10:03:36.091Z", "baseURL": "file%3A%2F%2F%2Fvar%2Flib%2Fdnet%2Fdata%2Fsygma%2Fnew_ingestion%2Fcrossref", "datestamp": "", "altered": true, "identifier": ""}}, "relevantdate": [], "contributor": [], "id": "50\|sygma_______::3bbb03e6ec8df0d219b2d2165ea1d446", "subject": [], "lastupdatetimestamp": 1628684944004, "author": [{"surname": "Pan", "fullname": "Pan, Mengwu", "pid": [], "name": "Mengwu", "rank": 1}, {"surname": "Blattner", "fullname": "Blattner, Christine", "pid": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "orcid_pending", "classname": "Open Researcher and Contributor ID", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "0000-0002-7250-5273"}], "name": "Christine", "rank": 2}], "collectedfrom": [{"value": "Sygma", "key": "10\|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9"}], "instance": [{"refereed": {"classid": "UNKNOWN", "classname": "Unknown", "schemename": "dnet:review_levels", "schemeid": "dnet:review_levels"}, "hostedby": {"value": "Cancers", "key": "10\|issn__online::69ba871b903253074dcf4054e619afff"}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "value": "https://creativecommons.org/licenses/by/4.0/"}, "url": ["http://dx.doi.org/10.3390/cancers13040745"], "pid": [], "distributionlocation": "", "alternateIdentifier": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "10.3390/cancers13040745"}], "collectedfrom": {"value": "Sygma", "key": "10\|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemename": "dnet:publication_resource", "schemeid": "dnet:publication_resource"}}], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "dateofcollection": "2021-06-10T10:03:36.091Z", "fulltext": [], "dateoftransformation": "2021-07-20T16:59:21.682Z", "description": [], "format": [], "journal": {"issnPrinted": "", "vol": "13", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "name": "Cancers", "iss": "4", "sp": "745", "edition": "", "issnOnline": "2072-6694", "ep": "", "issnLinking": ""}, "coverage": [], "externalReference": [], "language": {"classid": "eng", "classname": "English", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [], "extraInfo": [], "originalId": ["10.3390/cancers13040745", "50\|sygma_______::3bbb03e6ec8df0d219b2d2165ea1d446"], "source": [], "context": [], "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Regulation of p53 by E3s"}]}