addition of a sparktester test, implementation of 2 different classes for testing in dnet-dedup-test module, addition of new terms in the vocabulary and change in the implementation of the JaroWinklerNormalizedName comparator

This commit is contained in:
miconis 2019-04-03 09:40:14 +02:00
parent e9894ed089
commit f738c2b641
19 changed files with 1249 additions and 65 deletions

BIN
.DS_Store vendored

Binary file not shown.

252
dependencies.txt Normal file
View File

@ -0,0 +1,252 @@
[INFO] Scanning for projects...
[INFO] ------------------------------------------------------------------------
[INFO] Reactor Build Order:
[INFO]
[INFO] dnet-dedup [pom]
[INFO] dnet-pace-core [jar]
[INFO] dnet-dedup-test [jar]
[INFO]
[INFO] -----------------------< eu.dnetlib:dnet-dedup >------------------------
[INFO] Building dnet-dedup 3.0.3-SNAPSHOT [1/3]
[INFO] --------------------------------[ pom ]---------------------------------
[INFO]
[INFO] --- maven-dependency-plugin:3.0.0:tree (default-cli) @ dnet-dedup ---
[INFO] eu.dnetlib:dnet-dedup:pom:3.0.3-SNAPSHOT
[INFO]
[INFO] ---------------------< eu.dnetlib:dnet-pace-core >----------------------
[INFO] Building dnet-pace-core 3.0.3-SNAPSHOT [2/3]
[INFO] --------------------------------[ jar ]---------------------------------
[INFO]
[INFO] --- maven-dependency-plugin:3.0.0:tree (default-cli) @ dnet-pace-core ---
[INFO] eu.dnetlib:dnet-pace-core:jar:3.0.3-SNAPSHOT
[INFO] +- edu.cmu:secondstring:jar:1.0.0:compile
[INFO] +- com.google.guava:guava:jar:15.0:compile
[INFO] +- com.google.code.gson:gson:jar:2.2.2:compile
[INFO] +- commons-lang:commons-lang:jar:2.6:compile
[INFO] +- commons-io:commons-io:jar:2.4:compile
[INFO] +- commons-collections:commons-collections:jar:3.2.1:compile
[INFO] +- com.googlecode.protobuf-java-format:protobuf-java-format:jar:1.2:compile
[INFO] +- org.antlr:stringtemplate:jar:3.2:compile
[INFO] | \- org.antlr:antlr:jar:2.7.7:compile
[INFO] +- commons-logging:commons-logging:jar:1.1.3:compile
[INFO] +- junit:junit:jar:4.9:test
[INFO] | \- org.hamcrest:hamcrest-core:jar:1.1:test
[INFO] +- org.reflections:reflections:jar:0.9.10:compile
[INFO] | +- org.javassist:javassist:jar:3.19.0-GA:compile
[INFO] | \- com.google.code.findbugs:annotations:jar:2.0.1:compile
[INFO] +- com.fasterxml.jackson.core:jackson-databind:jar:2.6.6:compile
[INFO] | +- com.fasterxml.jackson.core:jackson-annotations:jar:2.6.0:compile
[INFO] | \- com.fasterxml.jackson.core:jackson-core:jar:2.6.6:compile
[INFO] +- org.codehaus.jackson:jackson-mapper-asl:jar:1.9.13:compile
[INFO] | \- org.codehaus.jackson:jackson-core-asl:jar:1.9.13:compile
[INFO] \- org.apache.commons:commons-math3:jar:3.6.1:compile
[INFO]
[INFO] ---------------------< eu.dnetlib:dnet-dedup-test >---------------------
[INFO] Building dnet-dedup-test 3.0.3-SNAPSHOT [3/3]
[INFO] --------------------------------[ jar ]---------------------------------
[INFO]
[INFO] --- maven-dependency-plugin:3.0.0:tree (default-cli) @ dnet-dedup-test ---
[INFO] eu.dnetlib:dnet-dedup-test:jar:3.0.3-SNAPSHOT
[INFO] +- eu.dnetlib:dnet-pace-core:jar:3.0.3-SNAPSHOT:compile
[INFO] | +- edu.cmu:secondstring:jar:1.0.0:compile
[INFO] | +- com.google.guava:guava:jar:15.0:compile
[INFO] | +- com.google.code.gson:gson:jar:2.2.2:compile
[INFO] | +- commons-lang:commons-lang:jar:2.6:compile
[INFO] | +- commons-io:commons-io:jar:2.4:compile
[INFO] | +- commons-collections:commons-collections:jar:3.2.1:compile
[INFO] | +- com.googlecode.protobuf-java-format:protobuf-java-format:jar:1.2:compile
[INFO] | +- org.antlr:stringtemplate:jar:3.2:compile
[INFO] | | \- org.antlr:antlr:jar:2.7.7:compile
[INFO] | +- commons-logging:commons-logging:jar:1.1.3:compile
[INFO] | +- org.reflections:reflections:jar:0.9.10:compile
[INFO] | | +- org.javassist:javassist:jar:3.19.0-GA:compile
[INFO] | | \- com.google.code.findbugs:annotations:jar:2.0.1:compile
[INFO] | +- com.fasterxml.jackson.core:jackson-databind:jar:2.6.6:compile
[INFO] | | +- com.fasterxml.jackson.core:jackson-annotations:jar:2.6.0:compile
[INFO] | | \- com.fasterxml.jackson.core:jackson-core:jar:2.6.6:compile
[INFO] | +- org.codehaus.jackson:jackson-mapper-asl:jar:1.9.13:compile
[INFO] | | \- org.codehaus.jackson:jackson-core-asl:jar:1.9.13:compile
[INFO] | \- org.apache.commons:commons-math3:jar:3.6.1:compile
[INFO] +- eu.dnetlib:dnet-openaire-data-protos:jar:3.9.3-proto250:compile
[INFO] | +- com.google.protobuf:protobuf-java:jar:2.5.0:compile
[INFO] | \- log4j:log4j:jar:1.2.17:compile (version selected from constraint [1.2.17,1.2.17])
[INFO] +- org.apache.spark:spark-core_2.11:jar:2.2.0:provided
[INFO] | +- org.apache.avro:avro:jar:1.7.7:provided
[INFO] | | +- com.thoughtworks.paranamer:paranamer:jar:2.3:provided
[INFO] | | \- org.apache.commons:commons-compress:jar:1.4.1:provided
[INFO] | | \- org.tukaani:xz:jar:1.0:provided
[INFO] | +- org.apache.avro:avro-mapred:jar:hadoop2:1.7.7:provided
[INFO] | | +- org.apache.avro:avro-ipc:jar:1.7.7:provided
[INFO] | | \- org.apache.avro:avro-ipc:jar:tests:1.7.7:provided
[INFO] | +- com.twitter:chill_2.11:jar:0.8.0:provided
[INFO] | | \- com.esotericsoftware:kryo-shaded:jar:3.0.3:provided
[INFO] | | +- com.esotericsoftware:minlog:jar:1.3.0:provided
[INFO] | | \- org.objenesis:objenesis:jar:2.1:provided
[INFO] | +- com.twitter:chill-java:jar:0.8.0:provided
[INFO] | +- org.apache.xbean:xbean-asm5-shaded:jar:4.4:provided
[INFO] | +- org.apache.hadoop:hadoop-client:jar:2.6.5:provided
[INFO] | | +- org.apache.hadoop:hadoop-common:jar:2.6.5:provided
[INFO] | | | +- commons-cli:commons-cli:jar:1.2:provided
[INFO] | | | +- xmlenc:xmlenc:jar:0.52:provided
[INFO] | | | +- commons-httpclient:commons-httpclient:jar:3.1:provided
[INFO] | | | +- commons-configuration:commons-configuration:jar:1.6:provided
[INFO] | | | | +- commons-digester:commons-digester:jar:1.8:provided
[INFO] | | | | | \- commons-beanutils:commons-beanutils:jar:1.7.0:provided
[INFO] | | | | \- commons-beanutils:commons-beanutils-core:jar:1.8.0:provided
[INFO] | | | +- org.apache.hadoop:hadoop-auth:jar:2.6.5:provided
[INFO] | | | | \- org.apache.directory.server:apacheds-kerberos-codec:jar:2.0.0-M15:provided
[INFO] | | | | +- org.apache.directory.server:apacheds-i18n:jar:2.0.0-M15:provided
[INFO] | | | | +- org.apache.directory.api:api-asn1-api:jar:1.0.0-M20:provided
[INFO] | | | | \- org.apache.directory.api:api-util:jar:1.0.0-M20:provided
[INFO] | | | +- org.apache.curator:curator-client:jar:2.6.0:provided
[INFO] | | | \- org.htrace:htrace-core:jar:3.0.4:provided
[INFO] | | +- org.apache.hadoop:hadoop-hdfs:jar:2.6.5:provided
[INFO] | | | +- org.mortbay.jetty:jetty-util:jar:6.1.26:provided
[INFO] | | | \- xerces:xercesImpl:jar:2.9.1:provided
[INFO] | | | \- xml-apis:xml-apis:jar:1.3.04:provided
[INFO] | | +- org.apache.hadoop:hadoop-mapreduce-client-app:jar:2.6.5:provided
[INFO] | | | +- org.apache.hadoop:hadoop-mapreduce-client-common:jar:2.6.5:provided
[INFO] | | | | +- org.apache.hadoop:hadoop-yarn-client:jar:2.6.5:provided
[INFO] | | | | \- org.apache.hadoop:hadoop-yarn-server-common:jar:2.6.5:provided
[INFO] | | | \- org.apache.hadoop:hadoop-mapreduce-client-shuffle:jar:2.6.5:provided
[INFO] | | +- org.apache.hadoop:hadoop-yarn-api:jar:2.6.5:provided
[INFO] | | +- org.apache.hadoop:hadoop-mapreduce-client-core:jar:2.6.5:provided
[INFO] | | | \- org.apache.hadoop:hadoop-yarn-common:jar:2.6.5:provided
[INFO] | | | +- javax.xml.bind:jaxb-api:jar:2.2.2:provided
[INFO] | | | | \- javax.xml.stream:stax-api:jar:1.0-2:provided
[INFO] | | | +- org.codehaus.jackson:jackson-jaxrs:jar:1.9.13:provided
[INFO] | | | \- org.codehaus.jackson:jackson-xc:jar:1.9.13:provided
[INFO] | | +- org.apache.hadoop:hadoop-mapreduce-client-jobclient:jar:2.6.5:provided
[INFO] | | \- org.apache.hadoop:hadoop-annotations:jar:2.6.5:provided
[INFO] | +- org.apache.spark:spark-launcher_2.11:jar:2.2.0:provided
[INFO] | +- org.apache.spark:spark-network-common_2.11:jar:2.2.0:provided
[INFO] | | \- org.fusesource.leveldbjni:leveldbjni-all:jar:1.8:provided
[INFO] | +- org.apache.spark:spark-network-shuffle_2.11:jar:2.2.0:provided
[INFO] | +- org.apache.spark:spark-unsafe_2.11:jar:2.2.0:provided
[INFO] | +- net.java.dev.jets3t:jets3t:jar:0.9.3:provided
[INFO] | | +- org.apache.httpcomponents:httpcore:jar:4.3.3:provided
[INFO] | | +- org.apache.httpcomponents:httpclient:jar:4.3.6:provided
[INFO] | | +- javax.activation:activation:jar:1.1.1:provided
[INFO] | | +- mx4j:mx4j:jar:3.0.2:provided
[INFO] | | +- javax.mail:mail:jar:1.4.7:provided
[INFO] | | +- org.bouncycastle:bcprov-jdk15on:jar:1.51:provided
[INFO] | | \- com.jamesmurty.utils:java-xmlbuilder:jar:1.0:provided
[INFO] | | \- net.iharder:base64:jar:2.3.8:provided
[INFO] | +- org.apache.curator:curator-recipes:jar:2.6.0:provided
[INFO] | | +- org.apache.curator:curator-framework:jar:2.6.0:provided
[INFO] | | \- org.apache.zookeeper:zookeeper:jar:3.4.6:provided
[INFO] | +- javax.servlet:javax.servlet-api:jar:3.1.0:provided
[INFO] | +- org.apache.commons:commons-lang3:jar:3.5:provided
[INFO] | +- com.google.code.findbugs:jsr305:jar:1.3.9:provided
[INFO] | +- org.slf4j:slf4j-api:jar:1.7.16:provided
[INFO] | +- org.slf4j:jul-to-slf4j:jar:1.7.16:provided
[INFO] | +- org.slf4j:jcl-over-slf4j:jar:1.7.16:provided
[INFO] | +- org.slf4j:slf4j-log4j12:jar:1.7.16:provided
[INFO] | +- com.ning:compress-lzf:jar:1.0.3:provided
[INFO] | +- org.xerial.snappy:snappy-java:jar:1.1.2.6:provided
[INFO] | +- net.jpountz.lz4:lz4:jar:1.3.0:provided
[INFO] | +- org.roaringbitmap:RoaringBitmap:jar:0.5.11:provided
[INFO] | +- commons-net:commons-net:jar:2.2:provided
[INFO] | +- org.scala-lang:scala-library:jar:2.11.8:provided
[INFO] | +- org.json4s:json4s-jackson_2.11:jar:3.2.11:provided
[INFO] | | \- org.json4s:json4s-core_2.11:jar:3.2.11:provided
[INFO] | | +- org.json4s:json4s-ast_2.11:jar:3.2.11:provided
[INFO] | | \- org.scala-lang:scalap:jar:2.11.0:provided
[INFO] | | \- org.scala-lang:scala-compiler:jar:2.11.0:provided
[INFO] | | +- org.scala-lang.modules:scala-xml_2.11:jar:1.0.1:provided
[INFO] | | \- org.scala-lang.modules:scala-parser-combinators_2.11:jar:1.0.1:provided
[INFO] | +- org.glassfish.jersey.core:jersey-client:jar:2.22.2:provided
[INFO] | | +- javax.ws.rs:javax.ws.rs-api:jar:2.0.1:provided
[INFO] | | +- org.glassfish.hk2:hk2-api:jar:2.4.0-b34:provided
[INFO] | | | +- org.glassfish.hk2:hk2-utils:jar:2.4.0-b34:provided
[INFO] | | | \- org.glassfish.hk2.external:aopalliance-repackaged:jar:2.4.0-b34:provided
[INFO] | | +- org.glassfish.hk2.external:javax.inject:jar:2.4.0-b34:provided
[INFO] | | \- org.glassfish.hk2:hk2-locator:jar:2.4.0-b34:provided
[INFO] | +- org.glassfish.jersey.core:jersey-common:jar:2.22.2:provided
[INFO] | | +- javax.annotation:javax.annotation-api:jar:1.2:provided
[INFO] | | +- org.glassfish.jersey.bundles.repackaged:jersey-guava:jar:2.22.2:provided
[INFO] | | \- org.glassfish.hk2:osgi-resource-locator:jar:1.0.1:provided
[INFO] | +- org.glassfish.jersey.core:jersey-server:jar:2.22.2:provided
[INFO] | | +- org.glassfish.jersey.media:jersey-media-jaxb:jar:2.22.2:provided
[INFO] | | \- javax.validation:validation-api:jar:1.1.0.Final:provided
[INFO] | +- org.glassfish.jersey.containers:jersey-container-servlet:jar:2.22.2:provided
[INFO] | +- org.glassfish.jersey.containers:jersey-container-servlet-core:jar:2.22.2:provided
[INFO] | +- io.netty:netty-all:jar:4.0.43.Final:provided
[INFO] | +- io.netty:netty:jar:3.9.9.Final:provided
[INFO] | +- com.clearspring.analytics:stream:jar:2.7.0:provided
[INFO] | +- io.dropwizard.metrics:metrics-core:jar:3.1.2:provided
[INFO] | +- io.dropwizard.metrics:metrics-jvm:jar:3.1.2:provided
[INFO] | +- io.dropwizard.metrics:metrics-json:jar:3.1.2:provided
[INFO] | +- io.dropwizard.metrics:metrics-graphite:jar:3.1.2:provided
[INFO] | +- com.fasterxml.jackson.module:jackson-module-scala_2.11:jar:2.6.5:provided
[INFO] | | +- org.scala-lang:scala-reflect:jar:2.11.7:provided
[INFO] | | \- com.fasterxml.jackson.module:jackson-module-paranamer:jar:2.6.5:provided
[INFO] | +- org.apache.ivy:ivy:jar:2.4.0:provided
[INFO] | +- oro:oro:jar:2.0.8:provided
[INFO] | +- net.razorvine:pyrolite:jar:4.13:provided
[INFO] | +- net.sf.py4j:py4j:jar:0.10.4:provided
[INFO] | +- org.apache.spark:spark-tags_2.11:jar:2.2.0:provided
[INFO] | +- org.apache.commons:commons-crypto:jar:1.0.0:provided
[INFO] | \- org.spark-project.spark:unused:jar:1.0.0:provided
[INFO] +- org.apache.spark:spark-graphx_2.11:jar:2.2.0:provided
[INFO] | +- org.apache.spark:spark-mllib-local_2.11:jar:2.2.0:provided
[INFO] | | \- org.scalanlp:breeze_2.11:jar:0.13.1:provided
[INFO] | | +- org.scalanlp:breeze-macros_2.11:jar:0.13.1:provided
[INFO] | | +- net.sf.opencsv:opencsv:jar:2.3:provided
[INFO] | | +- com.github.rwl:jtransforms:jar:2.4.0:provided
[INFO] | | +- org.spire-math:spire_2.11:jar:0.13.0:provided
[INFO] | | | +- org.spire-math:spire-macros_2.11:jar:0.13.0:provided
[INFO] | | | \- org.typelevel:machinist_2.11:jar:0.6.1:provided
[INFO] | | \- com.chuusai:shapeless_2.11:jar:2.3.2:provided
[INFO] | | \- org.typelevel:macro-compat_2.11:jar:1.1.1:provided
[INFO] | +- com.github.fommil.netlib:core:jar:1.1.2:provided
[INFO] | \- net.sourceforge.f2j:arpack_combined_all:jar:0.1:provided
[INFO] +- org.apache.spark:spark-sql_2.11:jar:2.2.0:provided
[INFO] | +- com.univocity:univocity-parsers:jar:2.2.1:provided
[INFO] | +- org.apache.spark:spark-sketch_2.11:jar:2.2.0:provided
[INFO] | +- org.apache.spark:spark-catalyst_2.11:jar:2.2.0:provided
[INFO] | | +- org.codehaus.janino:janino:jar:3.0.0:provided
[INFO] | | +- org.codehaus.janino:commons-compiler:jar:3.0.0:provided
[INFO] | | \- org.antlr:antlr4-runtime:jar:4.5.3:provided
[INFO] | +- org.apache.parquet:parquet-column:jar:1.8.2:provided
[INFO] | | +- org.apache.parquet:parquet-common:jar:1.8.2:provided
[INFO] | | \- org.apache.parquet:parquet-encoding:jar:1.8.2:provided
[INFO] | \- org.apache.parquet:parquet-hadoop:jar:1.8.2:provided
[INFO] | +- org.apache.parquet:parquet-format:jar:2.3.1:provided
[INFO] | \- org.apache.parquet:parquet-jackson:jar:1.8.2:provided
[INFO] +- eu.dnetlib:dnet-openaireplus-mapping-utils:jar:6.2.18:test
[INFO] | +- com.ximpleware:vtd-xml:jar:2.13.4:test (version selected from constraint [2.12,3.0.0))
[INFO] | +- commons-codec:commons-codec:jar:1.9:provided
[INFO] | +- dom4j:dom4j:jar:1.6.1:test (version selected from constraint [1.6.1,1.6.1])
[INFO] | +- net.sf.supercsv:super-csv:jar:2.4.0:test
[INFO] | +- eu.dnetlib:cnr-misc-utils:jar:1.0.6-SNAPSHOT:test (version selected from constraint [1.0.0,2.0.0))
[INFO] | | +- jaxen:jaxen:jar:1.1.6:test
[INFO] | | +- saxonica:saxon:jar:9.1.0.8:test
[INFO] | | +- saxonica:saxon-dom:jar:9.1.0.8:test
[INFO] | | +- jgrapht:jgrapht:jar:0.7.2:test
[INFO] | | +- net.sf.ehcache:ehcache:jar:2.8.0:test
[INFO] | | \- org.springframework:spring-test:jar:4.2.5.RELEASE:test (version selected from constraint [4.2.5.RELEASE,4.2.5.RELEASE])
[INFO] | | \- org.springframework:spring-core:jar:4.2.5.RELEASE:test
[INFO] | +- eu.dnetlib:dnet-hadoop-commons:jar:2.0.2-SNAPSHOT:test (version selected from constraint [2.0.0,3.0.0))
[INFO] | | +- org.apache.hadoop:hadoop-core:jar:2.0.0-mr1-cdh4.7.0:test
[INFO] | | | +- commons-el:commons-el:jar:1.0:test
[INFO] | | | \- hsqldb:hsqldb:jar:1.8.0.10:test
[INFO] | | \- org.springframework:spring-beans:jar:4.2.5.RELEASE:test (version selected from constraint [4.2.5.RELEASE,4.2.5.RELEASE])
[INFO] | \- eu.dnetlib:dnet-index-solr-common:jar:1.3.1:test (version selected from constraint [1.0.0,1.3.1])
[INFO] | \- org.apache.solr:solr-solrj:jar:4.9.0:test
[INFO] | +- org.apache.httpcomponents:httpmime:jar:4.3.1:test
[INFO] | \- org.noggit:noggit:jar:0.5:test
[INFO] \- junit:junit:jar:4.9:test
[INFO] \- org.hamcrest:hamcrest-core:jar:1.1:test
[INFO] ------------------------------------------------------------------------
[INFO] Reactor Summary:
[INFO]
[INFO] dnet-dedup 3.0.3-SNAPSHOT .......................... SUCCESS [ 1.152 s]
[INFO] dnet-pace-core ..................................... SUCCESS [ 0.117 s]
[INFO] dnet-dedup-test 3.0.3-SNAPSHOT ..................... SUCCESS [ 1.407 s]
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 3.216 s
[INFO] Finished at: 2019-03-29T15:02:42+01:00
[INFO] ------------------------------------------------------------------------

Binary file not shown.

View File

@ -14,16 +14,17 @@
<packaging>jar</packaging>
<build>
<sourceDirectory>src/main/java</sourceDirectory>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.7</version>
<!--<configuration>-->
<!--<skip>true</skip>-->
<!--</configuration>-->
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
@ -31,11 +32,50 @@
<source>1.8</source>
<target>1.8</target>
<includes>
<include>src/main/java/**/*.java</include>
<include>src/main/java/**/*.scala</include>
<include>**/*.java</include>
</includes>
<!--<includes>-->
<!--<include>src/main/java/**/*.java</include>-->
<!--<include>src/main/java/**/*.scala</include>-->
<!--</includes>-->
</configuration>
</plugin>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>4.0.1</version>
<!--<executions>-->
<!--<execution>-->
<!--<goals>-->
<!--<goal>compile</goal>-->
<!--<goal>testCompile</goal>-->
<!--</goals>-->
<!--</execution>-->
<!--</executions>-->
<executions>
<execution>
<id>scala-compile-first</id>
<phase>initialize</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>scala-test-compile</id>
<phase>process-test-resources</phase>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
</configuration>
</plugin>
</plugins>
</build>
@ -78,12 +118,22 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>org.apache.oozie</groupId>
<artifactId>oozie-client</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
</dependency>
</dependencies>
</project>

Binary file not shown.

View File

@ -0,0 +1,128 @@
package eu.dnetlib;
import com.google.common.collect.Sets;
import eu.dnetlib.data.proto.DedupProtos;
import eu.dnetlib.graph.GraphProcessor;
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.util.BlockProcessor;
import eu.dnetlib.pace.utils.PaceUtils;
import eu.dnetlib.reporter.SparkCounter;
import eu.dnetlib.reporter.SparkReporter;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.graphx.Edge;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;
import java.io.IOException;
import java.io.StringWriter;
import java.net.URI;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.Set;
import java.util.stream.Collectors;
public class SparkLocalTest {
public static SparkCounter counter ;
public static void main(String[] args) {
final SparkSession spark = SparkSession
.builder()
.appName("Deduplication")
.master("local[*]")
.getOrCreate();
final JavaSparkContext context = new JavaSparkContext(spark.sparkContext());
final URL dataset = SparkTest.class.getResource("/eu/dnetlib/pace/organization.to.fix.json");
final JavaRDD<String> dataRDD = context.textFile(dataset.getPath());
counter = new SparkCounter(context);
//read the configuration from the classpath
final DedupConfig config = DedupConfig.load(Utility.readFromClasspath("/eu/dnetlib/pace/org.curr.conf"));
BlockProcessor.constructAccumulator(config);
BlockProcessor.accumulators.forEach(acc -> {
final String[] values = acc.split("::");
counter.incrementCounter(values[0], values[1], 0);
});
//create vertexes of the graph: <ID, MapDocument>
JavaPairRDD<String, MapDocument> mapDocs = dataRDD.mapToPair(it -> {
MapDocument mapDocument = PaceUtils.asMapDocument(config, it);
return new Tuple2<>(mapDocument.getIdentifier(), mapDocument);
});
RDD<Tuple2<Object, MapDocument>> vertexes = mapDocs.mapToPair(t -> new Tuple2<Object, MapDocument>( (long) t._1().hashCode(), t._2())).rdd();
//create relations between documents
JavaPairRDD<String, Iterable<MapDocument>> blocks = mapDocs.reduceByKey((a, b) -> a) //the reduce is just to be sure that we haven't document with same id
//Clustering: from <id, doc> to List<groupkey,doc>
.flatMapToPair(a -> {
final MapDocument currentDocument = a._2();
return Utility.getGroupingKeys(config, currentDocument).stream()
.map(it -> new Tuple2<>(it, currentDocument)).collect(Collectors.toList()).iterator();
}).groupByKey();//group documents basing on the key
//print blocks
blocks.foreach(b -> {
String print = b._1() + ": ";
for (MapDocument doc : b._2()) {
print += doc.getIdentifier() + " ";
}
System.out.println(print);
});
//create relations by comparing only elements in the same group
final JavaPairRDD<String, String> relationRDD = blocks.flatMapToPair(it -> {
final SparkReporter reporter = new SparkReporter(counter);
new BlockProcessor(config).process(it._1(), it._2(), reporter);
return reporter.getReport().iterator();
});
final RDD<Edge<String>> edgeRdd = relationRDD.map(it -> new Edge<>(it._1().hashCode(),it._2().hashCode(), "similarTo")).rdd();
JavaRDD<ConnectedComponent> ccs = GraphProcessor.findCCs(vertexes, edgeRdd, 20).toJavaRDD();
final JavaRDD<ConnectedComponent> connectedComponents = ccs.filter(cc -> cc.getDocs().size()>1);
final JavaRDD<ConnectedComponent> nonDeduplicated = ccs.filter(cc -> cc.getDocs().size()==1);
System.out.println("Non duplicates: " + nonDeduplicated.count());
System.out.println("Duplicates: " + connectedComponents.flatMap(cc -> cc.getDocs().iterator()).count());
System.out.println("Connected Components: " + connectedComponents.count());
counter.getAccumulators().values().forEach(it-> System.out.println(it.getGroup()+" "+it.getName()+" -->"+it.value()));
//print deduped
connectedComponents.foreach(cc -> {
System.out.println("cc = " + cc.getId());
for (MapDocument doc: cc.getDocs()) {
System.out.println(doc.getIdentifier() + "; ln: " + doc.getFieldMap().get("legalname").stringValue() + "; sn: " + doc.getFieldMap().get("legalshortname").stringValue());
}
});
//print nondeduped
nonDeduplicated.foreach(cc -> {
System.out.println("nd = " + cc.getId());
System.out.println(cc.getDocs().iterator().next().getFieldMap().get("legalname").stringValue() + "; sn: " + cc.getDocs().iterator().next().getFieldMap().get("legalshortname").stringValue());
});
//print ids
//// ccs.foreach(cc -> System.out.println(cc.getId()));
//// connectedComponents.saveAsTextFile("file:///Users/miconis/Downloads/dumps/organizations_dedup");
}
}

View File

@ -1,42 +1,41 @@
package eu.dnetlib;
import com.google.common.collect.Sets;
import eu.dnetlib.graph.GraphProcessor;
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.util.BlockProcessor;
import eu.dnetlib.pace.utils.PaceUtils;
import eu.dnetlib.reporter.SparkCounter;
import eu.dnetlib.reporter.SparkReporter;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.graphx.Edge;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;
import java.io.IOException;
import java.io.StringWriter;
import java.net.URL;
import java.util.Set;
import java.util.stream.Collectors;
public class SparkTest {
public static SparkCounter counter ;
public static void main(String[] args) {
final JavaSparkContext context = new JavaSparkContext(new SparkConf().setAppName("Deduplication").setMaster("yarn"));
public static void main(String[] args) throws IOException {
final URL dataset = SparkTest.class.getResource(args[1]);
final JavaRDD<String> dataRDD = context.textFile(dataset.getPath());
final SparkSession spark = SparkSession
.builder()
.appName("Deduplication")
.master("yarn")
.getOrCreate();
final JavaSparkContext context = new JavaSparkContext(spark.sparkContext());
final JavaRDD<String> dataRDD = Utility.loadDataFromHDFS(args[0], context);
counter = new SparkCounter(context);
//read the configuration from the classpath
final DedupConfig config = DedupConfig.load(readFromClasspath(args[0]));
final DedupConfig config = Utility.loadConfigFromHDFS(args[1]);
BlockProcessor.constructAccumulator(config);
BlockProcessor.accumulators.forEach(acc -> {
@ -59,7 +58,7 @@ public class SparkTest {
.flatMapToPair(a -> {
final MapDocument currentDocument = a._2();
return getGroupingKeys(config, currentDocument).stream()
return Utility.getGroupingKeys(config, currentDocument).stream()
.map(it -> new Tuple2<>(it, currentDocument)).collect(Collectors.toList()).iterator();
}).groupByKey();//group documents basing on the key
@ -105,24 +104,10 @@ public class SparkTest {
System.out.println(cc.getDocs().iterator().next().getFieldMap().get("legalname").stringValue() + "; sn: " + cc.getDocs().iterator().next().getFieldMap().get("legalshortname").stringValue());
});
//print ids
//// ccs.foreach(cc -> System.out.println(cc.getId()));
//// connectedComponents.saveAsTextFile("file:///Users/miconis/Downloads/dumps/organizations_dedup");
// print ids
// ccs.foreach(cc -> System.out.println(cc.getId()));
// connectedComponents.saveAsTextFile("file:///Users/miconis/Downloads/dumps/organizations_dedup");
}
static String readFromClasspath(final String filename) {
final StringWriter sw = new StringWriter();
try {
IOUtils.copy(SparkTest.class.getResourceAsStream(filename), sw);
return sw.toString();
} catch (final IOException e) {
throw new RuntimeException("cannot load resource from classpath: " + filename);
}
}
static Set<String> getGroupingKeys(DedupConfig conf, MapDocument doc) {
return Sets.newHashSet(BlacklistAwareClusteringCombiner.filterAndCombine(doc, conf));
}
}

View File

@ -0,0 +1,50 @@
package eu.dnetlib;
import com.google.common.collect.Sets;
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.MapDocument;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import java.io.IOException;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.util.Set;
public class Utility {
public static JavaRDD<String> loadDataFromHDFS(String path, JavaSparkContext context) {
return context.textFile(path);
}
public static DedupConfig loadConfigFromHDFS(String path) throws IOException {
Configuration conf = new Configuration();
// conf.set("fs.defaultFS", "");
FileSystem fileSystem = FileSystem.get(conf);
FSDataInputStream inputStream = new FSDataInputStream(fileSystem.open(new Path(path)));
return DedupConfig.load(IOUtils.toString(inputStream, StandardCharsets.UTF_8.name()));
}
static String readFromClasspath(final String filename) {
final StringWriter sw = new StringWriter();
try {
IOUtils.copy(SparkTest.class.getResourceAsStream(filename), sw);
return sw.toString();
} catch (final IOException e) {
throw new RuntimeException("cannot load resource from classpath: " + filename);
}
}
static Set<String> getGroupingKeys(DedupConfig conf, MapDocument doc) {
return Sets.newHashSet(BlacklistAwareClusteringCombiner.filterAndCombine(doc, conf));
}
}

View File

@ -6,8 +6,7 @@ import org.apache.oozie.client.OozieClientException;
import org.apache.oozie.client.WorkflowJob;
import org.junit.Test;
import java.io.IOException;
import java.io.StringWriter;
import java.io.*;
import java.util.Properties;
import static junit.framework.Assert.assertEquals;

View File

@ -0,0 +1,17 @@
package eu.dnetlib.pace;
import eu.dnetlib.SparkLocalTest;
import org.junit.Test;
import java.io.IOException;
public class SparkTester {
@Test
public void sparkLocalTest() throws IOException {
SparkLocalTest.main(new String[]{});
}
}

109
dnet-dedup.ipr Normal file
View File

@ -0,0 +1,109 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project version="4" relativePaths="false">
<component name="ProjectRootManager" version="2" assert-keyword="true" project-jdk-name="1.8" jdk-15="true"/>
<component name="CodeStyleManager">
<option name="USE_DEFAULT_CODE_STYLE_SCHEME" value="true"/>
<option name="CODE_STYLE_SCHEME" value=""/>
</component>
<component name="libraryTable"/>
<component name="CompilerConfiguration">
<option name="DEFAULT_COMPILER" value="Javac"/>
<option name="CLEAR_OUTPUT_DIRECTORY" value="false"/>
<!--
<wildcardResourcePatterns>
<entry name="${wildcardResourcePattern}"/>
</wildcardResourcePatterns>
-->
<wildcardResourcePatterns>
<entry name="!?*.java"/>
</wildcardResourcePatterns>
</component>
<component name="JavacSettings">
<option name="DEBUGGING_INFO" value="true"/>
<option name="GENERATE_NO_WARNINGS" value="false"/>
<option name="DEPRECATION" value="true"/>
<option name="ADDITIONAL_OPTIONS_STRING" value=""/>
<option name="MAXIMUM_HEAP_SIZE" value="128"/>
<option name="USE_GENERICS_COMPILER" value="false"/>
</component>
<component name="JikesSettings">
<option name="DEBUGGING_INFO" value="true"/>
<option name="DEPRECATION" value="true"/>
<option name="GENERATE_NO_WARNINGS" value="false"/>
<option name="GENERATE_MAKE_FILE_DEPENDENCIES" value="false"/>
<option name="DO_FULL_DEPENDENCE_CHECK" value="false"/>
<option name="IS_INCREMENTAL_MODE" value="false"/>
<option name="IS_EMACS_ERRORS_MODE" value="true"/>
<option name="ADDITIONAL_OPTIONS_STRING" value=""/>
<option name="MAXIMUM_HEAP_SIZE" value="128"/>
</component>
<component name="AntConfiguration">
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
<option name="FILTER_TARGETS" value="false"/>
</component>
<component name="JavadocGenerationManager">
<option name="OUTPUT_DIRECTORY"/>
<option name="OPTION_SCOPE" value="protected"/>
<option name="OPTION_HIERARCHY" value="false"/>
<option name="OPTION_NAVIGATOR" value="false"/>
<option name="OPTION_INDEX" value="false"/>
<option name="OPTION_SEPARATE_INDEX" value="false"/>
<option name="OPTION_USE_1_1" value="false"/>
<option name="OPTION_DOCUMENT_TAG_USE" value="false"/>
<option name="OPTION_DOCUMENT_TAG_AUTHOR" value="false"/>
<option name="OPTION_DOCUMENT_TAG_VERSION" value="false"/>
<option name="OPTION_DOCUMENT_TAG_DEPRECATED" value="false"/>
<option name="OPTION_DEPRECATED_LIST" value="false"/>
<option name="OTHER_OPTIONS"/>
<option name="HEAP_SIZE"/>
<option name="OPEN_IN_BROWSER" value="false"/>
</component>
<component name="JUnitProjectSettings">
<option name="TEST_RUNNER" value="UI"/>
</component>
<component name="EntryPointsManager">
<entry_points/>
</component>
<component name="DataSourceManager"/>
<component name="ExportToHTMLSettings">
<option name="PRINT_LINE_NUMBERS" value="false"/>
<option name="OPEN_IN_BROWSER" value="false"/>
<option name="OUTPUT_DIRECTORY"/>
</component>
<component name="ImportConfiguration">
<option name="VENDOR"/>
<option name="RELEASE_TAG"/>
<option name="LOG_MESSAGE"/>
<option name="CHECKOUT_AFTER_IMPORT" value="true"/>
</component>
<component name="ProjectModuleManager">
<modules>
<!-- module filepath="$$PROJECT_DIR$$/${pom.artifactId}.iml"/ -->
<module filepath="$PROJECT_DIR$/dnet-dedup.iml"/>
<module filepath="$PROJECT_DIR$/dnet-pace-core/dnet-pace-core.iml"/>
<module filepath="$PROJECT_DIR$/dnet-dedup-test/dnet-dedup-test.iml"/>
</modules>
</component>
<UsedPathMacros>
<!--<macro name="cargo"></macro>-->
</UsedPathMacros>
</project>

418
dnet-dedup.iws Normal file
View File

@ -0,0 +1,418 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project version="4" relativePaths="false">
<component name="LvcsProjectConfiguration">
<option name="ADD_LABEL_ON_PROJECT_OPEN" value="true"/>
<option name="ADD_LABEL_ON_PROJECT_COMPILATION" value="true"/>
<option name="ADD_LABEL_ON_FILE_PACKAGE_COMPILATION" value="true"/>
<option name="ADD_LABEL_ON_PROJECT_MAKE" value="true"/>
<option name="ADD_LABEL_ON_RUNNING" value="true"/>
<option name="ADD_LABEL_ON_DEBUGGING" value="true"/>
<option name="ADD_LABEL_ON_UNIT_TEST_PASSED" value="true"/>
<option name="ADD_LABEL_ON_UNIT_TEST_FAILED" value="true"/>
</component>
<component name="PropertiesComponent">
<property name="MemberChooser.copyJavadoc" value="false"/>
<property name="GoToClass.includeLibraries" value="false"/>
<property name="MemberChooser.showClasses" value="true"/>
<property name="MemberChooser.sorted" value="false"/>
<property name="GoToFile.includeJavaFiles" value="false"/>
<property name="GoToClass.toSaveIncludeLibraries" value="false"/>
</component>
<component name="ToolWindowManager">
<frame x="-4" y="-4" width="1032" height="746" extended-state="6"/>
<editor active="false"/>
<layout>
<window_info id="CVS" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="7"/>
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="0"/>
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="1"/>
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="1"/>
<window_info id="Messages" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.4" order="6"/>
<window_info id="Aspects" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="1"/>
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="2"/>
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="2"/>
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.4" order="4"/>
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="sliding" type="sliding" visible="false" weight="0.4" order="0"/>
<window_info id="Web" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="2"/>
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="0"/>
<window_info id="EJB" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="3"/>
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="5"/>
</layout>
</component>
<component name="ErrorTreeViewConfiguration">
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
<option name="HIDE_WARNINGS" value="false"/>
</component>
<component name="StructureViewFactory">
<option name="SORT_MODE" value="0"/>
<option name="GROUP_INHERITED" value="true"/>
<option name="AUTOSCROLL_MODE" value="true"/>
<option name="SHOW_FIELDS" value="true"/>
<option name="AUTOSCROLL_FROM_SOURCE" value="false"/>
<option name="GROUP_GETTERS_AND_SETTERS" value="true"/>
<option name="SHOW_INHERITED" value="false"/>
<option name="HIDE_NOT_PUBLIC" value="false"/>
</component>
<component name="ProjectViewSettings">
<navigator currentView="ProjectPane" flattenPackages="false" showMembers="false" showStructure="false" autoscrollToSource="false" splitterProportion="0.5"/>
<view id="ProjectPane">
<expanded_node type="directory" url="file://$PROJECT_DIR$"/>
</view>
<view id="SourcepathPane"/>
<view id="ClasspathPane"/>
</component>
<component name="Commander">
<leftPanel view="Project"/>
<rightPanel view="Project"/>
<splitter proportion="0.5"/>
</component>
<component name="AspectsView"/>
<component name="SelectInManager"/>
<component name="HierarchyBrowserManager">
<option name="SHOW_PACKAGES" value="false"/>
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
<option name="SORT_ALPHABETICALLY" value="false"/>
</component>
<component name="TodoView" selected-index="0">
<todo-panel id="selected-file">
<are-packages-shown value="false"/>
<flatten-packages value="false"/>
<is-autoscroll-to-source value="true"/>
</todo-panel>
<todo-panel id="all">
<are-packages-shown value="true"/>
<flatten-packages value="false"/>
<is-autoscroll-to-source value="true"/>
</todo-panel>
</component>
<component name="editorManager"/>
<component name="editorHistoryManager"/>
<component name="DaemonCodeAnalyzer">
<disable_hints/>
</component>
<component name="InspectionManager">
<option name="AUTOSCROLL_TO_SOURCE" value="false"/>
<option name="SPLITTER_PROPORTION" value="0.5"/>
<profile name="Default"/>
</component>
<component name="BookmarkManager"/>
<component name="DebuggerManager">
<line_breakpoints/>
<exception_breakpoints>
<breakpoint_any>
<option name="NOTIFY_CAUGHT" value="true"/>
<option name="NOTIFY_UNCAUGHT" value="true"/>
<option name="ENABLED" value="false"/>
<option name="SUSPEND_VM" value="true"/>
<option name="COUNT_FILTER_ENABLED" value="false"/>
<option name="COUNT_FILTER" value="0"/>
<option name="CONDITION_ENABLED" value="false"/>
<option name="CONDITION"/>
<option name="LOG_ENABLED" value="false"/>
<option name="LOG_EXPRESSION_ENABLED" value="false"/>
<option name="LOG_MESSAGE"/>
<option name="CLASS_FILTERS_ENABLED" value="false"/>
<option name="INVERSE_CLASS_FILLTERS" value="false"/>
<option name="SUSPEND_POLICY" value="SuspendAll"/>
</breakpoint_any>
</exception_breakpoints>
<field_breakpoints/>
<method_breakpoints/>
</component>
<component name="DebuggerSettings">
<option name="TRACING_FILTERS_ENABLED" value="true"/>
<option name="TOSTRING_CLASSES_ENABLED" value="false"/>
<option name="VALUE_LOOKUP_DELAY" value="700"/>
<option name="DEBUGGER_TRANSPORT" value="0"/>
<option name="FORCE_CLASSIC_VM" value="true"/>
<option name="HIDE_DEBUGGER_ON_PROCESS_TERMINATION" value="false"/>
<option name="SKIP_SYNTHETIC_METHODS" value="true"/>
<option name="SKIP_CONSTRUCTORS" value="false"/>
<option name="STEP_THREAD_SUSPEND_POLICY" value="SuspendThread"/>
<default_breakpoint_settings>
<option name="NOTIFY_CAUGHT" value="true"/>
<option name="NOTIFY_UNCAUGHT" value="true"/>
<option name="WATCH_MODIFICATION" value="true"/>
<option name="WATCH_ACCESS" value="true"/>
<option name="WATCH_ENTRY" value="true"/>
<option name="WATCH_EXIT" value="true"/>
<option name="ENABLED" value="true"/>
<option name="SUSPEND_VM" value="true"/>
<option name="COUNT_FILTER_ENABLED" value="false"/>
<option name="COUNT_FILTER" value="0"/>
<option name="CONDITION_ENABLED" value="false"/>
<option name="CONDITION"/>
<option name="LOG_ENABLED" value="false"/>
<option name="LOG_EXPRESSION_ENABLED" value="false"/>
<option name="LOG_MESSAGE"/>
<option name="CLASS_FILTERS_ENABLED" value="false"/>
<option name="INVERSE_CLASS_FILLTERS" value="false"/>
<option name="SUSPEND_POLICY" value="SuspendAll"/>
</default_breakpoint_settings>
<filter>
<option name="PATTERN" value="com.sun.*"/>
<option name="ENABLED" value="true"/>
</filter>
<filter>
<option name="PATTERN" value="java.*"/>
<option name="ENABLED" value="true"/>
</filter>
<filter>
<option name="PATTERN" value="javax.*"/>
<option name="ENABLED" value="true"/>
</filter>
<filter>
<option name="PATTERN" value="org.omg.*"/>
<option name="ENABLED" value="true"/>
</filter>
<filter>
<option name="PATTERN" value="sun.*"/>
<option name="ENABLED" value="true"/>
</filter>
<filter>
<option name="PATTERN" value="junit.*"/>
<option name="ENABLED" value="true"/>
</filter>
</component>
<component name="CompilerWorkspaceConfiguration">
<option name="COMPILE_IN_BACKGROUND" value="false"/>
<option name="AUTO_SHOW_ERRORS_IN_EDITOR" value="true"/>
</component>
<component name="RunManager">
<activeType name="Application"/>
<configuration selected="false" default="true" type="Applet" factoryName="Applet">
<module name=""/>
<option name="MAIN_CLASS_NAME"/>
<option name="HTML_FILE_NAME"/>
<option name="HTML_USED" value="false"/>
<option name="WIDTH" value="400"/>
<option name="HEIGHT" value="300"/>
<option name="POLICY_FILE" value="$APPLICATION_HOME_DIR$/bin/appletviewer.policy"/>
<option name="VM_PARAMETERS"/>
</configuration>
<configuration selected="false" default="true" type="Remote" factoryName="Remote">
<option name="USE_SOCKET_TRANSPORT" value="true"/>
<option name="SERVER_MODE" value="false"/>
<option name="SHMEM_ADDRESS" value="javadebug"/>
<option name="HOST" value="localhost"/>
<option name="PORT" value="5005"/>
</configuration>
<configuration selected="false" default="true" type="Application" factoryName="Application">
<option name="MAIN_CLASS_NAME"/>
<option name="VM_PARAMETERS"/>
<option name="PROGRAM_PARAMETERS"/>
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$"/>
<module name=""/>
</configuration>
<configuration selected="false" default="true" type="JUnit" factoryName="JUnit">
<module name=""/>
<option name="PACKAGE_NAME"/>
<option name="MAIN_CLASS_NAME"/>
<option name="METHOD_NAME"/>
<option name="TEST_OBJECT" value="class"/>
<option name="VM_PARAMETERS"/>
<option name="PARAMETERS"/>
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$"/>
<option name="ADDITIONAL_CLASS_PATH"/>
<option name="TEST_SEARCH_SCOPE">
<value defaultName="wholeProject"/>
</option>
</configuration>
</component>
<component name="VcsManagerConfiguration">
<option name="ACTIVE_VCS_NAME" value="git"/>
<option name="STATE" value="0"/>
</component>
<component name="VssConfiguration">
<CheckoutOptions>
<option name="COMMENT" value=""/>
<option name="DO_NOT_GET_LATEST_VERSION" value="false"/>
<option name="REPLACE_WRITABLE" value="false"/>
<option name="RECURSIVE" value="false"/>
</CheckoutOptions>
<CheckinOptions>
<option name="COMMENT" value=""/>
<option name="KEEP_CHECKED_OUT" value="false"/>
<option name="RECURSIVE" value="false"/>
</CheckinOptions>
<AddOptions>
<option name="COMMENT" value=""/>
<option name="STORE_ONLY_LATEST_VERSION" value="false"/>
<option name="CHECK_OUT_IMMEDIATELY" value="false"/>
<option name="FILE_TYPE" value="0"/>
</AddOptions>
<UndocheckoutOptions>
<option name="MAKE_WRITABLE" value="false"/>
<option name="REPLACE_LOCAL_COPY" value="0"/>
<option name="RECURSIVE" value="false"/>
</UndocheckoutOptions>
<DiffOptions>
<option name="IGNORE_WHITE_SPACE" value="false"/>
<option name="IGNORE_CASE" value="false"/>
</DiffOptions>
<GetOptions>
<option name="REPLACE_WRITABLE" value="0"/>
<option name="MAKE_WRITABLE" value="false"/>
<option name="RECURSIVE" value="false"/>
</GetOptions>
<option name="CLIENT_PATH" value=""/>
<option name="SRCSAFEINI_PATH" value=""/>
<option name="USER_NAME" value=""/>
<option name="PWD" value=""/>
<option name="SHOW_CHECKOUT_OPTIONS" value="true"/>
<option name="SHOW_ADD_OPTIONS" value="true"/>
<option name="SHOW_UNDOCHECKOUT_OPTIONS" value="true"/>
<option name="SHOW_DIFF_OPTIONS" value="true"/>
<option name="SHOW_GET_OPTIONS" value="true"/>
<option name="USE_EXTERNAL_DIFF" value="false"/>
<option name="EXTERNAL_DIFF_PATH" value=""/>
<option name="REUSE_LAST_COMMENT" value="false"/>
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
<option name="LAST_COMMIT_MESSAGE" value=""/>
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
</component>
<component name="CheckinPanelState"/>
<component name="WebViewSettings">
<webview flattenPackages="false" showMembers="false" autoscrollToSource="false"/>
</component>
<component name="EjbViewSettings">
<EjbView showMembers="false" autoscrollToSource="false"/>
</component>
<component name="AppServerRunManager"/>
<component name="StarteamConfiguration">
<option name="SERVER" value=""/>
<option name="PORT" value="49201"/>
<option name="USER" value=""/>
<option name="PASSWORD" value=""/>
<option name="PROJECT" value=""/>
<option name="VIEW" value=""/>
<option name="ALTERNATIVE_WORKING_PATH" value=""/>
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
<option name="LAST_COMMIT_MESSAGE" value=""/>
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
</component>
<component name="Cvs2Configuration">
<option name="ON_FILE_ADDING" value="0"/>
<option name="ON_FILE_REMOVING" value="0"/>
<option name="PRUNE_EMPTY_DIRECTORIES" value="true"/>
<option name="SHOW_UPDATE_OPTIONS" value="true"/>
<option name="SHOW_ADD_OPTIONS" value="true"/>
<option name="SHOW_REMOVE_OPTIONS" value="true"/>
<option name="MERGING_MODE" value="0"/>
<option name="MERGE_WITH_BRANCH1_NAME" value="HEAD"/>
<option name="MERGE_WITH_BRANCH2_NAME" value="HEAD"/>
<option name="RESET_STICKY" value="false"/>
<option name="CREATE_NEW_DIRECTORIES" value="true"/>
<option name="DEFAULT_TEXT_FILE_SUBSTITUTION" value="kv"/>
<option name="PROCESS_UNKNOWN_FILES" value="false"/>
<option name="PROCESS_DELETED_FILES" value="false"/>
<option name="SHOW_EDIT_DIALOG" value="true"/>
<option name="RESERVED_EDIT" value="false"/>
<option name="FILE_HISTORY_SPLITTER_PROPORTION" value="0.6"/>
<option name="SHOW_CHECKOUT_OPTIONS" value="true"/>
<option name="CHECKOUT_DATE_OR_REVISION_SETTINGS">
<value>
<option name="BRANCH" value=""/>
<option name="DATE" value=""/>
<option name="USE_BRANCH" value="false"/>
<option name="USE_DATE" value="false"/>
</value>
</option>
<option name="UPDATE_DATE_OR_REVISION_SETTINGS">
<value>
<option name="BRANCH" value=""/>
<option name="DATE" value=""/>
<option name="USE_BRANCH" value="false"/>
<option name="USE_DATE" value="false"/>
</value>
</option>
<option name="SHOW_CHANGES_REVISION_SETTINGS">
<value>
<option name="BRANCH" value=""/>
<option name="DATE" value=""/>
<option name="USE_BRANCH" value="false"/>
<option name="USE_DATE" value="false"/>
</value>
</option>
<option name="SHOW_OUTPUT" value="false"/>
<option name="SHOW_FILE_HISTORY_AS_TREE" value="false"/>
<option name="UPDATE_GROUP_BY_PACKAGES" value="false"/>
<option name="ADD_WATCH_INDEX" value="0"/>
<option name="REMOVE_WATCH_INDEX" value="0"/>
<option name="UPDATE_KEYWORD_SUBSTITUTION"/>
<option name="MAKE_NEW_FILES_READONLY" value="false"/>
<option name="SHOW_CORRUPTED_PROJECT_FILES" value="0"/>
<option name="TAG_AFTER_FILE_COMMIT" value="false"/>
<option name="TAG_AFTER_FILE_COMMIT_NAME" value=""/>
<option name="TAG_AFTER_PROJECT_COMMIT" value="false"/>
<option name="TAG_AFTER_PROJECT_COMMIT_NAME" value=""/>
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
<option name="FORCE_NON_EMPTY_COMMENT" value="false"/>
<option name="LAST_COMMIT_MESSAGE" value=""/>
<option name="SAVE_LAST_COMMIT_MESSAGE" value="true"/>
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
<option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false"/>
<option name="OPTIMIZE_IMPORTS_BEFORE_FILE_COMMIT" value="false"/>
<option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false"/>
<option name="REFORMAT_BEFORE_FILE_COMMIT" value="false"/>
<option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8"/>
<option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5"/>
</component>
<component name="CvsTabbedWindow"/>
<component name="SvnConfiguration">
<option name="USER" value=""/>
<option name="PASSWORD" value=""/>
<option name="AUTO_ADD_FILES" value="0"/>
<option name="AUTO_DEL_FILES" value="0"/>
</component>
<component name="PerforceConfiguration">
<option name="PORT" value="magic:1666"/>
<option name="USER" value=""/>
<option name="PASSWORD" value=""/>
<option name="CLIENT" value=""/>
<option name="TRACE" value="false"/>
<option name="PERFORCE_STATUS" value="true"/>
<option name="CHANGELIST_OPTION" value="false"/>
<option name="SYSTEMROOT" value=""/>
<option name="P4_EXECUTABLE" value="p4"/>
<option name="SHOW_BRANCH_HISTORY" value="false"/>
<option name="GENERATE_COMMENT" value="false"/>
<option name="SYNC_OPTION" value="Sync"/>
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
<option name="FORCE_NON_EMPTY_COMMENT" value="true"/>
<option name="LAST_COMMIT_MESSAGE" value=""/>
<option name="SAVE_LAST_COMMIT_MESSAGE" value="true"/>
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
<option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false"/>
<option name="OPTIMIZE_IMPORTS_BEFORE_FILE_COMMIT" value="false"/>
<option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false"/>
<option name="REFORMAT_BEFORE_FILE_COMMIT" value="false"/>
<option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8"/>
<option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5"/>
</component>
</project>

BIN
dnet-openaire-data-protos/.DS_Store vendored Normal file

Binary file not shown.

View File

@ -10,7 +10,7 @@
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-openaire-data-protos</artifactId>
<packaging>jar</packaging>
<version>3.9.4-CUSTOM</version>
<version>3.9.4-proto250</version>
<properties>
<!-- defined also in dnet-parent, here in case we need to override -->

View File

@ -64,6 +64,7 @@
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>

View File

@ -1,19 +1,16 @@
package eu.dnetlib.pace.model;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.gson.Gson;
import eu.dnetlib.pace.config.PaceConfig;
import eu.dnetlib.pace.config.Type;
import eu.dnetlib.pace.distance.*;
import eu.dnetlib.pace.distance.algo.*;
import eu.dnetlib.pace.util.PaceException;
import eu.dnetlib.pace.distance.DistanceAlgo;
import java.io.Serializable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* The schema is composed by field definitions (FieldDef). Each field has a type, a name, and an associated distance algorithm.
@ -60,6 +57,18 @@ public class FieldDef implements Serializable {
return name;
}
public void setName(String name) {
this.name = name;
}
public void setPath(String path) {
this.path = path;
}
public void setIgnoreMissing(boolean ignoreMissing) {
this.ignoreMissing = ignoreMissing;
}
public String getPath() {
return path;
}

View File

@ -32,7 +32,7 @@ public class ConfigTest extends AbstractPaceTest {
@Test
public void dedupConfigTest() {
DedupConfig load = DedupConfig.load(readFromClasspath("result.pace.conf.json"));
DedupConfig load = DedupConfig.load(readFromClasspath("org.curr.conf"));
System.out.println(load.toString());
}

View File

@ -0,0 +1,36 @@
{
"wf" : {
"threshold" : "0.9",
"dedupRun" : "001",
"entityType" : "organization",
"orderField" : "legalname",
"queueMaxSize" : "2000",
"groupMaxSize" : "10",
"slidingWindowSize" : "200",
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ],
"includeChildren" : "true"
},
"pace" : {
"clustering" : [
{ "name" : "sortedngrampairs", "fields" : [ "legalname" ], "params" : { "max" : 2, "ngramLen" : "3"} },
{ "name" : "suffixprefix", "fields" : [ "legalname" ], "params" : { "max" : 1, "len" : "3" } },
{ "name" : "urlclustering", "fields" : [ "websiteurl" ], "params" : { } }
],
"strictConditions" : [
{ "name" : "exactMatch", "fields" : [ "gridid" ] }
],
"conditions" : [
{ "name" : "exactMatch", "fields" : [ "country" ] },
{ "name" : "DomainExactMatch", "fields" : [ "websiteurl" ] }
],
"model" : [
{ "name" : "legalname", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value" },
{ "name" : "country", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/country/classid" },
{ "name" : "legalshortname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.1", "ignoreMissing" : "true", "path" : "organization/metadata/legalshortname/value" },
{ "name" : "legalname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.9", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value", "params" : {"windowSize" : 4, "threshold" : 0.5} },
{ "name" : "websiteurl", "algo" : "Null", "type" : "URL", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/websiteurl/value", "params" : { "host" : 0.5, "path" : 0.5 } },
{ "name" : "gridid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {grid}]/value" }
],
"blacklists" : { }
}
}

160
pom.xml
View File

@ -35,6 +35,8 @@
<url>https://issue.openaire.research-infrastructures.eu/projects/openaire</url>
</issueManagement>
<distributionManagement>
<repository>
<id>dnet45-releases</id>
@ -70,6 +72,18 @@
</snapshots>
</repository>
<repository>
<id>cloudera</id>
<name>Cloudera Repository</name>
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>
<build>
@ -77,22 +91,125 @@
<outputDirectory>target/classes</outputDirectory>
<finalName>${project.artifactId}-${project.version}</finalName>
<testOutputDirectory>target/test-classes</testOutputDirectory>
<!--*************************************************-->
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>${project.build.sourceEncoding}</encoding>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>3.0.1</version>
<executions>
<execution>
<id>attach-sources</id>
<phase>verify</phase>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.19.1</version>
<configuration>
<redirectTestOutputToFile>true</redirectTestOutputToFile>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version>
<configuration>
<detectLinks>true</detectLinks>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
<version>2.13</version>
<executions>
<execution>
<id>integration-test</id>
<goals>
<goal>integration-test</goal>
</goals>
</execution>
<execution>
<id>verify</id>
<goals>
<goal>verify</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>${project.build.sourceEncoding}</encoding>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>3.0.0</version>
<artifactId>maven-release-plugin</artifactId>
<version>2.5.3</version>
</plugin>
</plugins>
<!--***********************************************************************-->
<!--<plugins>-->
<!--<plugin>-->
<!--<groupId>org.apache.maven.plugins</groupId>-->
<!--<artifactId>maven-compiler-plugin</artifactId>-->
<!--<version>3.6.0</version>-->
<!--<configuration>-->
<!--<source>1.8</source>-->
<!--<target>1.8</target>-->
<!--<encoding>${project.build.sourceEncoding}</encoding>-->
<!--</configuration>-->
<!--</plugin>-->
<!--<plugin>-->
<!--<groupId>org.apache.maven.plugins</groupId>-->
<!--<artifactId>maven-dependency-plugin</artifactId>-->
<!--<version>3.0.0</version>-->
<!--</plugin>-->
<!--<plugin>-->
<!--<groupId>org.apache.maven.plugins</groupId>-->
<!--<artifactId>maven-failsafe-plugin</artifactId>-->
@ -113,7 +230,7 @@
<!--</executions>-->
<!--</plugin>-->
</plugins>
<!--</plugins>-->
</build>
@ -137,7 +254,7 @@
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-openaire-data-protos</artifactId>
<version>3.9.4-CUSTOM</version>
<version>3.9.3-proto250</version>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
@ -148,8 +265,9 @@
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.6.6</version>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
@ -196,16 +314,19 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-graphx_2.11</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
@ -219,6 +340,12 @@
<version>0.9.10</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>org.apache.oozie</groupId>
<artifactId>oozie-client</artifactId>
@ -227,6 +354,7 @@
</dependencies>
</dependencyManagement>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@ -236,6 +364,7 @@
<google.guava.version>15.0</google.guava.version>
<spark.version>2.2.0</spark.version>
<jackson.version>2.6.6</jackson.version>
<commons.lang.version>2.6</commons.lang.version>
<commons.io.version>2.4</commons.io.version>
@ -243,6 +372,7 @@
<commons.logging.version>1.1.3</commons.logging.version>
<junit.version>4.9</junit.version>
<scala.version>2.11.8</scala.version>
<maven.javadoc.failOnError>false</maven.javadoc.failOnError>
</properties>