diff --git a/.DS_Store b/.DS_Store
index f316c9219..6e3f1637d 100644
Binary files a/.DS_Store and b/.DS_Store differ
diff --git a/dependencies.txt b/dependencies.txt
new file mode 100644
index 000000000..29c11c16d
--- /dev/null
+++ b/dependencies.txt
@@ -0,0 +1,252 @@
+[INFO] Scanning for projects...
+[INFO] ------------------------------------------------------------------------
+[INFO] Reactor Build Order:
+[INFO]
+[INFO] dnet-dedup [pom]
+[INFO] dnet-pace-core [jar]
+[INFO] dnet-dedup-test [jar]
+[INFO]
+[INFO] -----------------------< eu.dnetlib:dnet-dedup >------------------------
+[INFO] Building dnet-dedup 3.0.3-SNAPSHOT [1/3]
+[INFO] --------------------------------[ pom ]---------------------------------
+[INFO]
+[INFO] --- maven-dependency-plugin:3.0.0:tree (default-cli) @ dnet-dedup ---
+[INFO] eu.dnetlib:dnet-dedup:pom:3.0.3-SNAPSHOT
+[INFO]
+[INFO] ---------------------< eu.dnetlib:dnet-pace-core >----------------------
+[INFO] Building dnet-pace-core 3.0.3-SNAPSHOT [2/3]
+[INFO] --------------------------------[ jar ]---------------------------------
+[INFO]
+[INFO] --- maven-dependency-plugin:3.0.0:tree (default-cli) @ dnet-pace-core ---
+[INFO] eu.dnetlib:dnet-pace-core:jar:3.0.3-SNAPSHOT
+[INFO] +- edu.cmu:secondstring:jar:1.0.0:compile
+[INFO] +- com.google.guava:guava:jar:15.0:compile
+[INFO] +- com.google.code.gson:gson:jar:2.2.2:compile
+[INFO] +- commons-lang:commons-lang:jar:2.6:compile
+[INFO] +- commons-io:commons-io:jar:2.4:compile
+[INFO] +- commons-collections:commons-collections:jar:3.2.1:compile
+[INFO] +- com.googlecode.protobuf-java-format:protobuf-java-format:jar:1.2:compile
+[INFO] +- org.antlr:stringtemplate:jar:3.2:compile
+[INFO] | \- org.antlr:antlr:jar:2.7.7:compile
+[INFO] +- commons-logging:commons-logging:jar:1.1.3:compile
+[INFO] +- junit:junit:jar:4.9:test
+[INFO] | \- org.hamcrest:hamcrest-core:jar:1.1:test
+[INFO] +- org.reflections:reflections:jar:0.9.10:compile
+[INFO] | +- org.javassist:javassist:jar:3.19.0-GA:compile
+[INFO] | \- com.google.code.findbugs:annotations:jar:2.0.1:compile
+[INFO] +- com.fasterxml.jackson.core:jackson-databind:jar:2.6.6:compile
+[INFO] | +- com.fasterxml.jackson.core:jackson-annotations:jar:2.6.0:compile
+[INFO] | \- com.fasterxml.jackson.core:jackson-core:jar:2.6.6:compile
+[INFO] +- org.codehaus.jackson:jackson-mapper-asl:jar:1.9.13:compile
+[INFO] | \- org.codehaus.jackson:jackson-core-asl:jar:1.9.13:compile
+[INFO] \- org.apache.commons:commons-math3:jar:3.6.1:compile
+[INFO]
+[INFO] ---------------------< eu.dnetlib:dnet-dedup-test >---------------------
+[INFO] Building dnet-dedup-test 3.0.3-SNAPSHOT [3/3]
+[INFO] --------------------------------[ jar ]---------------------------------
+[INFO]
+[INFO] --- maven-dependency-plugin:3.0.0:tree (default-cli) @ dnet-dedup-test ---
+[INFO] eu.dnetlib:dnet-dedup-test:jar:3.0.3-SNAPSHOT
+[INFO] +- eu.dnetlib:dnet-pace-core:jar:3.0.3-SNAPSHOT:compile
+[INFO] | +- edu.cmu:secondstring:jar:1.0.0:compile
+[INFO] | +- com.google.guava:guava:jar:15.0:compile
+[INFO] | +- com.google.code.gson:gson:jar:2.2.2:compile
+[INFO] | +- commons-lang:commons-lang:jar:2.6:compile
+[INFO] | +- commons-io:commons-io:jar:2.4:compile
+[INFO] | +- commons-collections:commons-collections:jar:3.2.1:compile
+[INFO] | +- com.googlecode.protobuf-java-format:protobuf-java-format:jar:1.2:compile
+[INFO] | +- org.antlr:stringtemplate:jar:3.2:compile
+[INFO] | | \- org.antlr:antlr:jar:2.7.7:compile
+[INFO] | +- commons-logging:commons-logging:jar:1.1.3:compile
+[INFO] | +- org.reflections:reflections:jar:0.9.10:compile
+[INFO] | | +- org.javassist:javassist:jar:3.19.0-GA:compile
+[INFO] | | \- com.google.code.findbugs:annotations:jar:2.0.1:compile
+[INFO] | +- com.fasterxml.jackson.core:jackson-databind:jar:2.6.6:compile
+[INFO] | | +- com.fasterxml.jackson.core:jackson-annotations:jar:2.6.0:compile
+[INFO] | | \- com.fasterxml.jackson.core:jackson-core:jar:2.6.6:compile
+[INFO] | +- org.codehaus.jackson:jackson-mapper-asl:jar:1.9.13:compile
+[INFO] | | \- org.codehaus.jackson:jackson-core-asl:jar:1.9.13:compile
+[INFO] | \- org.apache.commons:commons-math3:jar:3.6.1:compile
+[INFO] +- eu.dnetlib:dnet-openaire-data-protos:jar:3.9.3-proto250:compile
+[INFO] | +- com.google.protobuf:protobuf-java:jar:2.5.0:compile
+[INFO] | \- log4j:log4j:jar:1.2.17:compile (version selected from constraint [1.2.17,1.2.17])
+[INFO] +- org.apache.spark:spark-core_2.11:jar:2.2.0:provided
+[INFO] | +- org.apache.avro:avro:jar:1.7.7:provided
+[INFO] | | +- com.thoughtworks.paranamer:paranamer:jar:2.3:provided
+[INFO] | | \- org.apache.commons:commons-compress:jar:1.4.1:provided
+[INFO] | | \- org.tukaani:xz:jar:1.0:provided
+[INFO] | +- org.apache.avro:avro-mapred:jar:hadoop2:1.7.7:provided
+[INFO] | | +- org.apache.avro:avro-ipc:jar:1.7.7:provided
+[INFO] | | \- org.apache.avro:avro-ipc:jar:tests:1.7.7:provided
+[INFO] | +- com.twitter:chill_2.11:jar:0.8.0:provided
+[INFO] | | \- com.esotericsoftware:kryo-shaded:jar:3.0.3:provided
+[INFO] | | +- com.esotericsoftware:minlog:jar:1.3.0:provided
+[INFO] | | \- org.objenesis:objenesis:jar:2.1:provided
+[INFO] | +- com.twitter:chill-java:jar:0.8.0:provided
+[INFO] | +- org.apache.xbean:xbean-asm5-shaded:jar:4.4:provided
+[INFO] | +- org.apache.hadoop:hadoop-client:jar:2.6.5:provided
+[INFO] | | +- org.apache.hadoop:hadoop-common:jar:2.6.5:provided
+[INFO] | | | +- commons-cli:commons-cli:jar:1.2:provided
+[INFO] | | | +- xmlenc:xmlenc:jar:0.52:provided
+[INFO] | | | +- commons-httpclient:commons-httpclient:jar:3.1:provided
+[INFO] | | | +- commons-configuration:commons-configuration:jar:1.6:provided
+[INFO] | | | | +- commons-digester:commons-digester:jar:1.8:provided
+[INFO] | | | | | \- commons-beanutils:commons-beanutils:jar:1.7.0:provided
+[INFO] | | | | \- commons-beanutils:commons-beanutils-core:jar:1.8.0:provided
+[INFO] | | | +- org.apache.hadoop:hadoop-auth:jar:2.6.5:provided
+[INFO] | | | | \- org.apache.directory.server:apacheds-kerberos-codec:jar:2.0.0-M15:provided
+[INFO] | | | | +- org.apache.directory.server:apacheds-i18n:jar:2.0.0-M15:provided
+[INFO] | | | | +- org.apache.directory.api:api-asn1-api:jar:1.0.0-M20:provided
+[INFO] | | | | \- org.apache.directory.api:api-util:jar:1.0.0-M20:provided
+[INFO] | | | +- org.apache.curator:curator-client:jar:2.6.0:provided
+[INFO] | | | \- org.htrace:htrace-core:jar:3.0.4:provided
+[INFO] | | +- org.apache.hadoop:hadoop-hdfs:jar:2.6.5:provided
+[INFO] | | | +- org.mortbay.jetty:jetty-util:jar:6.1.26:provided
+[INFO] | | | \- xerces:xercesImpl:jar:2.9.1:provided
+[INFO] | | | \- xml-apis:xml-apis:jar:1.3.04:provided
+[INFO] | | +- org.apache.hadoop:hadoop-mapreduce-client-app:jar:2.6.5:provided
+[INFO] | | | +- org.apache.hadoop:hadoop-mapreduce-client-common:jar:2.6.5:provided
+[INFO] | | | | +- org.apache.hadoop:hadoop-yarn-client:jar:2.6.5:provided
+[INFO] | | | | \- org.apache.hadoop:hadoop-yarn-server-common:jar:2.6.5:provided
+[INFO] | | | \- org.apache.hadoop:hadoop-mapreduce-client-shuffle:jar:2.6.5:provided
+[INFO] | | +- org.apache.hadoop:hadoop-yarn-api:jar:2.6.5:provided
+[INFO] | | +- org.apache.hadoop:hadoop-mapreduce-client-core:jar:2.6.5:provided
+[INFO] | | | \- org.apache.hadoop:hadoop-yarn-common:jar:2.6.5:provided
+[INFO] | | | +- javax.xml.bind:jaxb-api:jar:2.2.2:provided
+[INFO] | | | | \- javax.xml.stream:stax-api:jar:1.0-2:provided
+[INFO] | | | +- org.codehaus.jackson:jackson-jaxrs:jar:1.9.13:provided
+[INFO] | | | \- org.codehaus.jackson:jackson-xc:jar:1.9.13:provided
+[INFO] | | +- org.apache.hadoop:hadoop-mapreduce-client-jobclient:jar:2.6.5:provided
+[INFO] | | \- org.apache.hadoop:hadoop-annotations:jar:2.6.5:provided
+[INFO] | +- org.apache.spark:spark-launcher_2.11:jar:2.2.0:provided
+[INFO] | +- org.apache.spark:spark-network-common_2.11:jar:2.2.0:provided
+[INFO] | | \- org.fusesource.leveldbjni:leveldbjni-all:jar:1.8:provided
+[INFO] | +- org.apache.spark:spark-network-shuffle_2.11:jar:2.2.0:provided
+[INFO] | +- org.apache.spark:spark-unsafe_2.11:jar:2.2.0:provided
+[INFO] | +- net.java.dev.jets3t:jets3t:jar:0.9.3:provided
+[INFO] | | +- org.apache.httpcomponents:httpcore:jar:4.3.3:provided
+[INFO] | | +- org.apache.httpcomponents:httpclient:jar:4.3.6:provided
+[INFO] | | +- javax.activation:activation:jar:1.1.1:provided
+[INFO] | | +- mx4j:mx4j:jar:3.0.2:provided
+[INFO] | | +- javax.mail:mail:jar:1.4.7:provided
+[INFO] | | +- org.bouncycastle:bcprov-jdk15on:jar:1.51:provided
+[INFO] | | \- com.jamesmurty.utils:java-xmlbuilder:jar:1.0:provided
+[INFO] | | \- net.iharder:base64:jar:2.3.8:provided
+[INFO] | +- org.apache.curator:curator-recipes:jar:2.6.0:provided
+[INFO] | | +- org.apache.curator:curator-framework:jar:2.6.0:provided
+[INFO] | | \- org.apache.zookeeper:zookeeper:jar:3.4.6:provided
+[INFO] | +- javax.servlet:javax.servlet-api:jar:3.1.0:provided
+[INFO] | +- org.apache.commons:commons-lang3:jar:3.5:provided
+[INFO] | +- com.google.code.findbugs:jsr305:jar:1.3.9:provided
+[INFO] | +- org.slf4j:slf4j-api:jar:1.7.16:provided
+[INFO] | +- org.slf4j:jul-to-slf4j:jar:1.7.16:provided
+[INFO] | +- org.slf4j:jcl-over-slf4j:jar:1.7.16:provided
+[INFO] | +- org.slf4j:slf4j-log4j12:jar:1.7.16:provided
+[INFO] | +- com.ning:compress-lzf:jar:1.0.3:provided
+[INFO] | +- org.xerial.snappy:snappy-java:jar:1.1.2.6:provided
+[INFO] | +- net.jpountz.lz4:lz4:jar:1.3.0:provided
+[INFO] | +- org.roaringbitmap:RoaringBitmap:jar:0.5.11:provided
+[INFO] | +- commons-net:commons-net:jar:2.2:provided
+[INFO] | +- org.scala-lang:scala-library:jar:2.11.8:provided
+[INFO] | +- org.json4s:json4s-jackson_2.11:jar:3.2.11:provided
+[INFO] | | \- org.json4s:json4s-core_2.11:jar:3.2.11:provided
+[INFO] | | +- org.json4s:json4s-ast_2.11:jar:3.2.11:provided
+[INFO] | | \- org.scala-lang:scalap:jar:2.11.0:provided
+[INFO] | | \- org.scala-lang:scala-compiler:jar:2.11.0:provided
+[INFO] | | +- org.scala-lang.modules:scala-xml_2.11:jar:1.0.1:provided
+[INFO] | | \- org.scala-lang.modules:scala-parser-combinators_2.11:jar:1.0.1:provided
+[INFO] | +- org.glassfish.jersey.core:jersey-client:jar:2.22.2:provided
+[INFO] | | +- javax.ws.rs:javax.ws.rs-api:jar:2.0.1:provided
+[INFO] | | +- org.glassfish.hk2:hk2-api:jar:2.4.0-b34:provided
+[INFO] | | | +- org.glassfish.hk2:hk2-utils:jar:2.4.0-b34:provided
+[INFO] | | | \- org.glassfish.hk2.external:aopalliance-repackaged:jar:2.4.0-b34:provided
+[INFO] | | +- org.glassfish.hk2.external:javax.inject:jar:2.4.0-b34:provided
+[INFO] | | \- org.glassfish.hk2:hk2-locator:jar:2.4.0-b34:provided
+[INFO] | +- org.glassfish.jersey.core:jersey-common:jar:2.22.2:provided
+[INFO] | | +- javax.annotation:javax.annotation-api:jar:1.2:provided
+[INFO] | | +- org.glassfish.jersey.bundles.repackaged:jersey-guava:jar:2.22.2:provided
+[INFO] | | \- org.glassfish.hk2:osgi-resource-locator:jar:1.0.1:provided
+[INFO] | +- org.glassfish.jersey.core:jersey-server:jar:2.22.2:provided
+[INFO] | | +- org.glassfish.jersey.media:jersey-media-jaxb:jar:2.22.2:provided
+[INFO] | | \- javax.validation:validation-api:jar:1.1.0.Final:provided
+[INFO] | +- org.glassfish.jersey.containers:jersey-container-servlet:jar:2.22.2:provided
+[INFO] | +- org.glassfish.jersey.containers:jersey-container-servlet-core:jar:2.22.2:provided
+[INFO] | +- io.netty:netty-all:jar:4.0.43.Final:provided
+[INFO] | +- io.netty:netty:jar:3.9.9.Final:provided
+[INFO] | +- com.clearspring.analytics:stream:jar:2.7.0:provided
+[INFO] | +- io.dropwizard.metrics:metrics-core:jar:3.1.2:provided
+[INFO] | +- io.dropwizard.metrics:metrics-jvm:jar:3.1.2:provided
+[INFO] | +- io.dropwizard.metrics:metrics-json:jar:3.1.2:provided
+[INFO] | +- io.dropwizard.metrics:metrics-graphite:jar:3.1.2:provided
+[INFO] | +- com.fasterxml.jackson.module:jackson-module-scala_2.11:jar:2.6.5:provided
+[INFO] | | +- org.scala-lang:scala-reflect:jar:2.11.7:provided
+[INFO] | | \- com.fasterxml.jackson.module:jackson-module-paranamer:jar:2.6.5:provided
+[INFO] | +- org.apache.ivy:ivy:jar:2.4.0:provided
+[INFO] | +- oro:oro:jar:2.0.8:provided
+[INFO] | +- net.razorvine:pyrolite:jar:4.13:provided
+[INFO] | +- net.sf.py4j:py4j:jar:0.10.4:provided
+[INFO] | +- org.apache.spark:spark-tags_2.11:jar:2.2.0:provided
+[INFO] | +- org.apache.commons:commons-crypto:jar:1.0.0:provided
+[INFO] | \- org.spark-project.spark:unused:jar:1.0.0:provided
+[INFO] +- org.apache.spark:spark-graphx_2.11:jar:2.2.0:provided
+[INFO] | +- org.apache.spark:spark-mllib-local_2.11:jar:2.2.0:provided
+[INFO] | | \- org.scalanlp:breeze_2.11:jar:0.13.1:provided
+[INFO] | | +- org.scalanlp:breeze-macros_2.11:jar:0.13.1:provided
+[INFO] | | +- net.sf.opencsv:opencsv:jar:2.3:provided
+[INFO] | | +- com.github.rwl:jtransforms:jar:2.4.0:provided
+[INFO] | | +- org.spire-math:spire_2.11:jar:0.13.0:provided
+[INFO] | | | +- org.spire-math:spire-macros_2.11:jar:0.13.0:provided
+[INFO] | | | \- org.typelevel:machinist_2.11:jar:0.6.1:provided
+[INFO] | | \- com.chuusai:shapeless_2.11:jar:2.3.2:provided
+[INFO] | | \- org.typelevel:macro-compat_2.11:jar:1.1.1:provided
+[INFO] | +- com.github.fommil.netlib:core:jar:1.1.2:provided
+[INFO] | \- net.sourceforge.f2j:arpack_combined_all:jar:0.1:provided
+[INFO] +- org.apache.spark:spark-sql_2.11:jar:2.2.0:provided
+[INFO] | +- com.univocity:univocity-parsers:jar:2.2.1:provided
+[INFO] | +- org.apache.spark:spark-sketch_2.11:jar:2.2.0:provided
+[INFO] | +- org.apache.spark:spark-catalyst_2.11:jar:2.2.0:provided
+[INFO] | | +- org.codehaus.janino:janino:jar:3.0.0:provided
+[INFO] | | +- org.codehaus.janino:commons-compiler:jar:3.0.0:provided
+[INFO] | | \- org.antlr:antlr4-runtime:jar:4.5.3:provided
+[INFO] | +- org.apache.parquet:parquet-column:jar:1.8.2:provided
+[INFO] | | +- org.apache.parquet:parquet-common:jar:1.8.2:provided
+[INFO] | | \- org.apache.parquet:parquet-encoding:jar:1.8.2:provided
+[INFO] | \- org.apache.parquet:parquet-hadoop:jar:1.8.2:provided
+[INFO] | +- org.apache.parquet:parquet-format:jar:2.3.1:provided
+[INFO] | \- org.apache.parquet:parquet-jackson:jar:1.8.2:provided
+[INFO] +- eu.dnetlib:dnet-openaireplus-mapping-utils:jar:6.2.18:test
+[INFO] | +- com.ximpleware:vtd-xml:jar:2.13.4:test (version selected from constraint [2.12,3.0.0))
+[INFO] | +- commons-codec:commons-codec:jar:1.9:provided
+[INFO] | +- dom4j:dom4j:jar:1.6.1:test (version selected from constraint [1.6.1,1.6.1])
+[INFO] | +- net.sf.supercsv:super-csv:jar:2.4.0:test
+[INFO] | +- eu.dnetlib:cnr-misc-utils:jar:1.0.6-SNAPSHOT:test (version selected from constraint [1.0.0,2.0.0))
+[INFO] | | +- jaxen:jaxen:jar:1.1.6:test
+[INFO] | | +- saxonica:saxon:jar:9.1.0.8:test
+[INFO] | | +- saxonica:saxon-dom:jar:9.1.0.8:test
+[INFO] | | +- jgrapht:jgrapht:jar:0.7.2:test
+[INFO] | | +- net.sf.ehcache:ehcache:jar:2.8.0:test
+[INFO] | | \- org.springframework:spring-test:jar:4.2.5.RELEASE:test (version selected from constraint [4.2.5.RELEASE,4.2.5.RELEASE])
+[INFO] | | \- org.springframework:spring-core:jar:4.2.5.RELEASE:test
+[INFO] | +- eu.dnetlib:dnet-hadoop-commons:jar:2.0.2-SNAPSHOT:test (version selected from constraint [2.0.0,3.0.0))
+[INFO] | | +- org.apache.hadoop:hadoop-core:jar:2.0.0-mr1-cdh4.7.0:test
+[INFO] | | | +- commons-el:commons-el:jar:1.0:test
+[INFO] | | | \- hsqldb:hsqldb:jar:1.8.0.10:test
+[INFO] | | \- org.springframework:spring-beans:jar:4.2.5.RELEASE:test (version selected from constraint [4.2.5.RELEASE,4.2.5.RELEASE])
+[INFO] | \- eu.dnetlib:dnet-index-solr-common:jar:1.3.1:test (version selected from constraint [1.0.0,1.3.1])
+[INFO] | \- org.apache.solr:solr-solrj:jar:4.9.0:test
+[INFO] | +- org.apache.httpcomponents:httpmime:jar:4.3.1:test
+[INFO] | \- org.noggit:noggit:jar:0.5:test
+[INFO] \- junit:junit:jar:4.9:test
+[INFO] \- org.hamcrest:hamcrest-core:jar:1.1:test
+[INFO] ------------------------------------------------------------------------
+[INFO] Reactor Summary:
+[INFO]
+[INFO] dnet-dedup 3.0.3-SNAPSHOT .......................... SUCCESS [ 1.152 s]
+[INFO] dnet-pace-core ..................................... SUCCESS [ 0.117 s]
+[INFO] dnet-dedup-test 3.0.3-SNAPSHOT ..................... SUCCESS [ 1.407 s]
+[INFO] ------------------------------------------------------------------------
+[INFO] BUILD SUCCESS
+[INFO] ------------------------------------------------------------------------
+[INFO] Total time: 3.216 s
+[INFO] Finished at: 2019-03-29T15:02:42+01:00
+[INFO] ------------------------------------------------------------------------
diff --git a/dnet-openaire-data-protos/.DS_Store b/dnet-openaire-data-protos/.DS_Store
new file mode 100644
index 000000000..638b0d702
Binary files /dev/null and b/dnet-openaire-data-protos/.DS_Store differ
diff --git a/dnet-openaire-data-protos/pom.xml b/dnet-openaire-data-protos/pom.xml
index 47ed2ea8c..f247374c9 100644
--- a/dnet-openaire-data-protos/pom.xml
+++ b/dnet-openaire-data-protos/pom.xml
@@ -10,7 +10,7 @@
eu.dnetlib
dnet-openaire-data-protos
jar
- 3.9.4-CUSTOM
+ 3.9.4-proto250
diff --git a/dnet-pace-core/pom.xml b/dnet-pace-core/pom.xml
index d88dea4e0..925a613db 100644
--- a/dnet-pace-core/pom.xml
+++ b/dnet-pace-core/pom.xml
@@ -64,6 +64,7 @@
com.fasterxml.jackson.core
jackson-databind
+
org.codehaus.jackson
jackson-mapper-asl
diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java
index b954df7d6..6b85cf49f 100644
--- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java
+++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java
@@ -1,19 +1,16 @@
package eu.dnetlib.pace.model;
-import java.io.Serializable;
-import java.lang.reflect.InvocationTargetException;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.gson.Gson;
import eu.dnetlib.pace.config.PaceConfig;
import eu.dnetlib.pace.config.Type;
-import eu.dnetlib.pace.distance.*;
-import eu.dnetlib.pace.distance.algo.*;
-import eu.dnetlib.pace.util.PaceException;
+import eu.dnetlib.pace.distance.DistanceAlgo;
+
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
/**
* The schema is composed by field definitions (FieldDef). Each field has a type, a name, and an associated distance algorithm.
@@ -60,6 +57,18 @@ public class FieldDef implements Serializable {
return name;
}
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public void setPath(String path) {
+ this.path = path;
+ }
+
+ public void setIgnoreMissing(boolean ignoreMissing) {
+ this.ignoreMissing = ignoreMissing;
+ }
+
public String getPath() {
return path;
}
diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java
index d47768b72..575b3c786 100644
--- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java
+++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java
@@ -32,7 +32,7 @@ public class ConfigTest extends AbstractPaceTest {
@Test
public void dedupConfigTest() {
- DedupConfig load = DedupConfig.load(readFromClasspath("result.pace.conf.json"));
+ DedupConfig load = DedupConfig.load(readFromClasspath("org.curr.conf"));
System.out.println(load.toString());
}
diff --git a/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/org.curr.conf b/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/org.curr.conf
new file mode 100644
index 000000000..fd4fbbe79
--- /dev/null
+++ b/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/org.curr.conf
@@ -0,0 +1,36 @@
+{
+ "wf" : {
+ "threshold" : "0.9",
+ "dedupRun" : "001",
+ "entityType" : "organization",
+ "orderField" : "legalname",
+ "queueMaxSize" : "2000",
+ "groupMaxSize" : "10",
+ "slidingWindowSize" : "200",
+ "rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ],
+ "includeChildren" : "true"
+ },
+ "pace" : {
+ "clustering" : [
+ { "name" : "sortedngrampairs", "fields" : [ "legalname" ], "params" : { "max" : 2, "ngramLen" : "3"} },
+ { "name" : "suffixprefix", "fields" : [ "legalname" ], "params" : { "max" : 1, "len" : "3" } },
+ { "name" : "urlclustering", "fields" : [ "websiteurl" ], "params" : { } }
+ ],
+ "strictConditions" : [
+ { "name" : "exactMatch", "fields" : [ "gridid" ] }
+ ],
+ "conditions" : [
+ { "name" : "exactMatch", "fields" : [ "country" ] },
+ { "name" : "DomainExactMatch", "fields" : [ "websiteurl" ] }
+ ],
+ "model" : [
+ { "name" : "legalname", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value" },
+ { "name" : "country", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/country/classid" },
+ { "name" : "legalshortname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.1", "ignoreMissing" : "true", "path" : "organization/metadata/legalshortname/value" },
+ { "name" : "legalname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.9", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value", "params" : {"windowSize" : 4, "threshold" : 0.5} },
+ { "name" : "websiteurl", "algo" : "Null", "type" : "URL", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/websiteurl/value", "params" : { "host" : 0.5, "path" : 0.5 } },
+ { "name" : "gridid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {grid}]/value" }
+ ],
+ "blacklists" : { }
+ }
+}
\ No newline at end of file