addition of a sparktester test, implementation of 2 different classes for testing in dnet-dedup-test module, addition of new terms in the vocabulary and change in the implementation of the JaroWinklerNormalizedName comparator
This commit is contained in:
parent
e9894ed089
commit
f738c2b641
|
@ -0,0 +1,252 @@
|
|||
[INFO] Scanning for projects...
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
[INFO] Reactor Build Order:
|
||||
[INFO]
|
||||
[INFO] dnet-dedup [pom]
|
||||
[INFO] dnet-pace-core [jar]
|
||||
[INFO] dnet-dedup-test [jar]
|
||||
[INFO]
|
||||
[INFO] -----------------------< eu.dnetlib:dnet-dedup >------------------------
|
||||
[INFO] Building dnet-dedup 3.0.3-SNAPSHOT [1/3]
|
||||
[INFO] --------------------------------[ pom ]---------------------------------
|
||||
[INFO]
|
||||
[INFO] --- maven-dependency-plugin:3.0.0:tree (default-cli) @ dnet-dedup ---
|
||||
[INFO] eu.dnetlib:dnet-dedup:pom:3.0.3-SNAPSHOT
|
||||
[INFO]
|
||||
[INFO] ---------------------< eu.dnetlib:dnet-pace-core >----------------------
|
||||
[INFO] Building dnet-pace-core 3.0.3-SNAPSHOT [2/3]
|
||||
[INFO] --------------------------------[ jar ]---------------------------------
|
||||
[INFO]
|
||||
[INFO] --- maven-dependency-plugin:3.0.0:tree (default-cli) @ dnet-pace-core ---
|
||||
[INFO] eu.dnetlib:dnet-pace-core:jar:3.0.3-SNAPSHOT
|
||||
[INFO] +- edu.cmu:secondstring:jar:1.0.0:compile
|
||||
[INFO] +- com.google.guava:guava:jar:15.0:compile
|
||||
[INFO] +- com.google.code.gson:gson:jar:2.2.2:compile
|
||||
[INFO] +- commons-lang:commons-lang:jar:2.6:compile
|
||||
[INFO] +- commons-io:commons-io:jar:2.4:compile
|
||||
[INFO] +- commons-collections:commons-collections:jar:3.2.1:compile
|
||||
[INFO] +- com.googlecode.protobuf-java-format:protobuf-java-format:jar:1.2:compile
|
||||
[INFO] +- org.antlr:stringtemplate:jar:3.2:compile
|
||||
[INFO] | \- org.antlr:antlr:jar:2.7.7:compile
|
||||
[INFO] +- commons-logging:commons-logging:jar:1.1.3:compile
|
||||
[INFO] +- junit:junit:jar:4.9:test
|
||||
[INFO] | \- org.hamcrest:hamcrest-core:jar:1.1:test
|
||||
[INFO] +- org.reflections:reflections:jar:0.9.10:compile
|
||||
[INFO] | +- org.javassist:javassist:jar:3.19.0-GA:compile
|
||||
[INFO] | \- com.google.code.findbugs:annotations:jar:2.0.1:compile
|
||||
[INFO] +- com.fasterxml.jackson.core:jackson-databind:jar:2.6.6:compile
|
||||
[INFO] | +- com.fasterxml.jackson.core:jackson-annotations:jar:2.6.0:compile
|
||||
[INFO] | \- com.fasterxml.jackson.core:jackson-core:jar:2.6.6:compile
|
||||
[INFO] +- org.codehaus.jackson:jackson-mapper-asl:jar:1.9.13:compile
|
||||
[INFO] | \- org.codehaus.jackson:jackson-core-asl:jar:1.9.13:compile
|
||||
[INFO] \- org.apache.commons:commons-math3:jar:3.6.1:compile
|
||||
[INFO]
|
||||
[INFO] ---------------------< eu.dnetlib:dnet-dedup-test >---------------------
|
||||
[INFO] Building dnet-dedup-test 3.0.3-SNAPSHOT [3/3]
|
||||
[INFO] --------------------------------[ jar ]---------------------------------
|
||||
[INFO]
|
||||
[INFO] --- maven-dependency-plugin:3.0.0:tree (default-cli) @ dnet-dedup-test ---
|
||||
[INFO] eu.dnetlib:dnet-dedup-test:jar:3.0.3-SNAPSHOT
|
||||
[INFO] +- eu.dnetlib:dnet-pace-core:jar:3.0.3-SNAPSHOT:compile
|
||||
[INFO] | +- edu.cmu:secondstring:jar:1.0.0:compile
|
||||
[INFO] | +- com.google.guava:guava:jar:15.0:compile
|
||||
[INFO] | +- com.google.code.gson:gson:jar:2.2.2:compile
|
||||
[INFO] | +- commons-lang:commons-lang:jar:2.6:compile
|
||||
[INFO] | +- commons-io:commons-io:jar:2.4:compile
|
||||
[INFO] | +- commons-collections:commons-collections:jar:3.2.1:compile
|
||||
[INFO] | +- com.googlecode.protobuf-java-format:protobuf-java-format:jar:1.2:compile
|
||||
[INFO] | +- org.antlr:stringtemplate:jar:3.2:compile
|
||||
[INFO] | | \- org.antlr:antlr:jar:2.7.7:compile
|
||||
[INFO] | +- commons-logging:commons-logging:jar:1.1.3:compile
|
||||
[INFO] | +- org.reflections:reflections:jar:0.9.10:compile
|
||||
[INFO] | | +- org.javassist:javassist:jar:3.19.0-GA:compile
|
||||
[INFO] | | \- com.google.code.findbugs:annotations:jar:2.0.1:compile
|
||||
[INFO] | +- com.fasterxml.jackson.core:jackson-databind:jar:2.6.6:compile
|
||||
[INFO] | | +- com.fasterxml.jackson.core:jackson-annotations:jar:2.6.0:compile
|
||||
[INFO] | | \- com.fasterxml.jackson.core:jackson-core:jar:2.6.6:compile
|
||||
[INFO] | +- org.codehaus.jackson:jackson-mapper-asl:jar:1.9.13:compile
|
||||
[INFO] | | \- org.codehaus.jackson:jackson-core-asl:jar:1.9.13:compile
|
||||
[INFO] | \- org.apache.commons:commons-math3:jar:3.6.1:compile
|
||||
[INFO] +- eu.dnetlib:dnet-openaire-data-protos:jar:3.9.3-proto250:compile
|
||||
[INFO] | +- com.google.protobuf:protobuf-java:jar:2.5.0:compile
|
||||
[INFO] | \- log4j:log4j:jar:1.2.17:compile (version selected from constraint [1.2.17,1.2.17])
|
||||
[INFO] +- org.apache.spark:spark-core_2.11:jar:2.2.0:provided
|
||||
[INFO] | +- org.apache.avro:avro:jar:1.7.7:provided
|
||||
[INFO] | | +- com.thoughtworks.paranamer:paranamer:jar:2.3:provided
|
||||
[INFO] | | \- org.apache.commons:commons-compress:jar:1.4.1:provided
|
||||
[INFO] | | \- org.tukaani:xz:jar:1.0:provided
|
||||
[INFO] | +- org.apache.avro:avro-mapred:jar:hadoop2:1.7.7:provided
|
||||
[INFO] | | +- org.apache.avro:avro-ipc:jar:1.7.7:provided
|
||||
[INFO] | | \- org.apache.avro:avro-ipc:jar:tests:1.7.7:provided
|
||||
[INFO] | +- com.twitter:chill_2.11:jar:0.8.0:provided
|
||||
[INFO] | | \- com.esotericsoftware:kryo-shaded:jar:3.0.3:provided
|
||||
[INFO] | | +- com.esotericsoftware:minlog:jar:1.3.0:provided
|
||||
[INFO] | | \- org.objenesis:objenesis:jar:2.1:provided
|
||||
[INFO] | +- com.twitter:chill-java:jar:0.8.0:provided
|
||||
[INFO] | +- org.apache.xbean:xbean-asm5-shaded:jar:4.4:provided
|
||||
[INFO] | +- org.apache.hadoop:hadoop-client:jar:2.6.5:provided
|
||||
[INFO] | | +- org.apache.hadoop:hadoop-common:jar:2.6.5:provided
|
||||
[INFO] | | | +- commons-cli:commons-cli:jar:1.2:provided
|
||||
[INFO] | | | +- xmlenc:xmlenc:jar:0.52:provided
|
||||
[INFO] | | | +- commons-httpclient:commons-httpclient:jar:3.1:provided
|
||||
[INFO] | | | +- commons-configuration:commons-configuration:jar:1.6:provided
|
||||
[INFO] | | | | +- commons-digester:commons-digester:jar:1.8:provided
|
||||
[INFO] | | | | | \- commons-beanutils:commons-beanutils:jar:1.7.0:provided
|
||||
[INFO] | | | | \- commons-beanutils:commons-beanutils-core:jar:1.8.0:provided
|
||||
[INFO] | | | +- org.apache.hadoop:hadoop-auth:jar:2.6.5:provided
|
||||
[INFO] | | | | \- org.apache.directory.server:apacheds-kerberos-codec:jar:2.0.0-M15:provided
|
||||
[INFO] | | | | +- org.apache.directory.server:apacheds-i18n:jar:2.0.0-M15:provided
|
||||
[INFO] | | | | +- org.apache.directory.api:api-asn1-api:jar:1.0.0-M20:provided
|
||||
[INFO] | | | | \- org.apache.directory.api:api-util:jar:1.0.0-M20:provided
|
||||
[INFO] | | | +- org.apache.curator:curator-client:jar:2.6.0:provided
|
||||
[INFO] | | | \- org.htrace:htrace-core:jar:3.0.4:provided
|
||||
[INFO] | | +- org.apache.hadoop:hadoop-hdfs:jar:2.6.5:provided
|
||||
[INFO] | | | +- org.mortbay.jetty:jetty-util:jar:6.1.26:provided
|
||||
[INFO] | | | \- xerces:xercesImpl:jar:2.9.1:provided
|
||||
[INFO] | | | \- xml-apis:xml-apis:jar:1.3.04:provided
|
||||
[INFO] | | +- org.apache.hadoop:hadoop-mapreduce-client-app:jar:2.6.5:provided
|
||||
[INFO] | | | +- org.apache.hadoop:hadoop-mapreduce-client-common:jar:2.6.5:provided
|
||||
[INFO] | | | | +- org.apache.hadoop:hadoop-yarn-client:jar:2.6.5:provided
|
||||
[INFO] | | | | \- org.apache.hadoop:hadoop-yarn-server-common:jar:2.6.5:provided
|
||||
[INFO] | | | \- org.apache.hadoop:hadoop-mapreduce-client-shuffle:jar:2.6.5:provided
|
||||
[INFO] | | +- org.apache.hadoop:hadoop-yarn-api:jar:2.6.5:provided
|
||||
[INFO] | | +- org.apache.hadoop:hadoop-mapreduce-client-core:jar:2.6.5:provided
|
||||
[INFO] | | | \- org.apache.hadoop:hadoop-yarn-common:jar:2.6.5:provided
|
||||
[INFO] | | | +- javax.xml.bind:jaxb-api:jar:2.2.2:provided
|
||||
[INFO] | | | | \- javax.xml.stream:stax-api:jar:1.0-2:provided
|
||||
[INFO] | | | +- org.codehaus.jackson:jackson-jaxrs:jar:1.9.13:provided
|
||||
[INFO] | | | \- org.codehaus.jackson:jackson-xc:jar:1.9.13:provided
|
||||
[INFO] | | +- org.apache.hadoop:hadoop-mapreduce-client-jobclient:jar:2.6.5:provided
|
||||
[INFO] | | \- org.apache.hadoop:hadoop-annotations:jar:2.6.5:provided
|
||||
[INFO] | +- org.apache.spark:spark-launcher_2.11:jar:2.2.0:provided
|
||||
[INFO] | +- org.apache.spark:spark-network-common_2.11:jar:2.2.0:provided
|
||||
[INFO] | | \- org.fusesource.leveldbjni:leveldbjni-all:jar:1.8:provided
|
||||
[INFO] | +- org.apache.spark:spark-network-shuffle_2.11:jar:2.2.0:provided
|
||||
[INFO] | +- org.apache.spark:spark-unsafe_2.11:jar:2.2.0:provided
|
||||
[INFO] | +- net.java.dev.jets3t:jets3t:jar:0.9.3:provided
|
||||
[INFO] | | +- org.apache.httpcomponents:httpcore:jar:4.3.3:provided
|
||||
[INFO] | | +- org.apache.httpcomponents:httpclient:jar:4.3.6:provided
|
||||
[INFO] | | +- javax.activation:activation:jar:1.1.1:provided
|
||||
[INFO] | | +- mx4j:mx4j:jar:3.0.2:provided
|
||||
[INFO] | | +- javax.mail:mail:jar:1.4.7:provided
|
||||
[INFO] | | +- org.bouncycastle:bcprov-jdk15on:jar:1.51:provided
|
||||
[INFO] | | \- com.jamesmurty.utils:java-xmlbuilder:jar:1.0:provided
|
||||
[INFO] | | \- net.iharder:base64:jar:2.3.8:provided
|
||||
[INFO] | +- org.apache.curator:curator-recipes:jar:2.6.0:provided
|
||||
[INFO] | | +- org.apache.curator:curator-framework:jar:2.6.0:provided
|
||||
[INFO] | | \- org.apache.zookeeper:zookeeper:jar:3.4.6:provided
|
||||
[INFO] | +- javax.servlet:javax.servlet-api:jar:3.1.0:provided
|
||||
[INFO] | +- org.apache.commons:commons-lang3:jar:3.5:provided
|
||||
[INFO] | +- com.google.code.findbugs:jsr305:jar:1.3.9:provided
|
||||
[INFO] | +- org.slf4j:slf4j-api:jar:1.7.16:provided
|
||||
[INFO] | +- org.slf4j:jul-to-slf4j:jar:1.7.16:provided
|
||||
[INFO] | +- org.slf4j:jcl-over-slf4j:jar:1.7.16:provided
|
||||
[INFO] | +- org.slf4j:slf4j-log4j12:jar:1.7.16:provided
|
||||
[INFO] | +- com.ning:compress-lzf:jar:1.0.3:provided
|
||||
[INFO] | +- org.xerial.snappy:snappy-java:jar:1.1.2.6:provided
|
||||
[INFO] | +- net.jpountz.lz4:lz4:jar:1.3.0:provided
|
||||
[INFO] | +- org.roaringbitmap:RoaringBitmap:jar:0.5.11:provided
|
||||
[INFO] | +- commons-net:commons-net:jar:2.2:provided
|
||||
[INFO] | +- org.scala-lang:scala-library:jar:2.11.8:provided
|
||||
[INFO] | +- org.json4s:json4s-jackson_2.11:jar:3.2.11:provided
|
||||
[INFO] | | \- org.json4s:json4s-core_2.11:jar:3.2.11:provided
|
||||
[INFO] | | +- org.json4s:json4s-ast_2.11:jar:3.2.11:provided
|
||||
[INFO] | | \- org.scala-lang:scalap:jar:2.11.0:provided
|
||||
[INFO] | | \- org.scala-lang:scala-compiler:jar:2.11.0:provided
|
||||
[INFO] | | +- org.scala-lang.modules:scala-xml_2.11:jar:1.0.1:provided
|
||||
[INFO] | | \- org.scala-lang.modules:scala-parser-combinators_2.11:jar:1.0.1:provided
|
||||
[INFO] | +- org.glassfish.jersey.core:jersey-client:jar:2.22.2:provided
|
||||
[INFO] | | +- javax.ws.rs:javax.ws.rs-api:jar:2.0.1:provided
|
||||
[INFO] | | +- org.glassfish.hk2:hk2-api:jar:2.4.0-b34:provided
|
||||
[INFO] | | | +- org.glassfish.hk2:hk2-utils:jar:2.4.0-b34:provided
|
||||
[INFO] | | | \- org.glassfish.hk2.external:aopalliance-repackaged:jar:2.4.0-b34:provided
|
||||
[INFO] | | +- org.glassfish.hk2.external:javax.inject:jar:2.4.0-b34:provided
|
||||
[INFO] | | \- org.glassfish.hk2:hk2-locator:jar:2.4.0-b34:provided
|
||||
[INFO] | +- org.glassfish.jersey.core:jersey-common:jar:2.22.2:provided
|
||||
[INFO] | | +- javax.annotation:javax.annotation-api:jar:1.2:provided
|
||||
[INFO] | | +- org.glassfish.jersey.bundles.repackaged:jersey-guava:jar:2.22.2:provided
|
||||
[INFO] | | \- org.glassfish.hk2:osgi-resource-locator:jar:1.0.1:provided
|
||||
[INFO] | +- org.glassfish.jersey.core:jersey-server:jar:2.22.2:provided
|
||||
[INFO] | | +- org.glassfish.jersey.media:jersey-media-jaxb:jar:2.22.2:provided
|
||||
[INFO] | | \- javax.validation:validation-api:jar:1.1.0.Final:provided
|
||||
[INFO] | +- org.glassfish.jersey.containers:jersey-container-servlet:jar:2.22.2:provided
|
||||
[INFO] | +- org.glassfish.jersey.containers:jersey-container-servlet-core:jar:2.22.2:provided
|
||||
[INFO] | +- io.netty:netty-all:jar:4.0.43.Final:provided
|
||||
[INFO] | +- io.netty:netty:jar:3.9.9.Final:provided
|
||||
[INFO] | +- com.clearspring.analytics:stream:jar:2.7.0:provided
|
||||
[INFO] | +- io.dropwizard.metrics:metrics-core:jar:3.1.2:provided
|
||||
[INFO] | +- io.dropwizard.metrics:metrics-jvm:jar:3.1.2:provided
|
||||
[INFO] | +- io.dropwizard.metrics:metrics-json:jar:3.1.2:provided
|
||||
[INFO] | +- io.dropwizard.metrics:metrics-graphite:jar:3.1.2:provided
|
||||
[INFO] | +- com.fasterxml.jackson.module:jackson-module-scala_2.11:jar:2.6.5:provided
|
||||
[INFO] | | +- org.scala-lang:scala-reflect:jar:2.11.7:provided
|
||||
[INFO] | | \- com.fasterxml.jackson.module:jackson-module-paranamer:jar:2.6.5:provided
|
||||
[INFO] | +- org.apache.ivy:ivy:jar:2.4.0:provided
|
||||
[INFO] | +- oro:oro:jar:2.0.8:provided
|
||||
[INFO] | +- net.razorvine:pyrolite:jar:4.13:provided
|
||||
[INFO] | +- net.sf.py4j:py4j:jar:0.10.4:provided
|
||||
[INFO] | +- org.apache.spark:spark-tags_2.11:jar:2.2.0:provided
|
||||
[INFO] | +- org.apache.commons:commons-crypto:jar:1.0.0:provided
|
||||
[INFO] | \- org.spark-project.spark:unused:jar:1.0.0:provided
|
||||
[INFO] +- org.apache.spark:spark-graphx_2.11:jar:2.2.0:provided
|
||||
[INFO] | +- org.apache.spark:spark-mllib-local_2.11:jar:2.2.0:provided
|
||||
[INFO] | | \- org.scalanlp:breeze_2.11:jar:0.13.1:provided
|
||||
[INFO] | | +- org.scalanlp:breeze-macros_2.11:jar:0.13.1:provided
|
||||
[INFO] | | +- net.sf.opencsv:opencsv:jar:2.3:provided
|
||||
[INFO] | | +- com.github.rwl:jtransforms:jar:2.4.0:provided
|
||||
[INFO] | | +- org.spire-math:spire_2.11:jar:0.13.0:provided
|
||||
[INFO] | | | +- org.spire-math:spire-macros_2.11:jar:0.13.0:provided
|
||||
[INFO] | | | \- org.typelevel:machinist_2.11:jar:0.6.1:provided
|
||||
[INFO] | | \- com.chuusai:shapeless_2.11:jar:2.3.2:provided
|
||||
[INFO] | | \- org.typelevel:macro-compat_2.11:jar:1.1.1:provided
|
||||
[INFO] | +- com.github.fommil.netlib:core:jar:1.1.2:provided
|
||||
[INFO] | \- net.sourceforge.f2j:arpack_combined_all:jar:0.1:provided
|
||||
[INFO] +- org.apache.spark:spark-sql_2.11:jar:2.2.0:provided
|
||||
[INFO] | +- com.univocity:univocity-parsers:jar:2.2.1:provided
|
||||
[INFO] | +- org.apache.spark:spark-sketch_2.11:jar:2.2.0:provided
|
||||
[INFO] | +- org.apache.spark:spark-catalyst_2.11:jar:2.2.0:provided
|
||||
[INFO] | | +- org.codehaus.janino:janino:jar:3.0.0:provided
|
||||
[INFO] | | +- org.codehaus.janino:commons-compiler:jar:3.0.0:provided
|
||||
[INFO] | | \- org.antlr:antlr4-runtime:jar:4.5.3:provided
|
||||
[INFO] | +- org.apache.parquet:parquet-column:jar:1.8.2:provided
|
||||
[INFO] | | +- org.apache.parquet:parquet-common:jar:1.8.2:provided
|
||||
[INFO] | | \- org.apache.parquet:parquet-encoding:jar:1.8.2:provided
|
||||
[INFO] | \- org.apache.parquet:parquet-hadoop:jar:1.8.2:provided
|
||||
[INFO] | +- org.apache.parquet:parquet-format:jar:2.3.1:provided
|
||||
[INFO] | \- org.apache.parquet:parquet-jackson:jar:1.8.2:provided
|
||||
[INFO] +- eu.dnetlib:dnet-openaireplus-mapping-utils:jar:6.2.18:test
|
||||
[INFO] | +- com.ximpleware:vtd-xml:jar:2.13.4:test (version selected from constraint [2.12,3.0.0))
|
||||
[INFO] | +- commons-codec:commons-codec:jar:1.9:provided
|
||||
[INFO] | +- dom4j:dom4j:jar:1.6.1:test (version selected from constraint [1.6.1,1.6.1])
|
||||
[INFO] | +- net.sf.supercsv:super-csv:jar:2.4.0:test
|
||||
[INFO] | +- eu.dnetlib:cnr-misc-utils:jar:1.0.6-SNAPSHOT:test (version selected from constraint [1.0.0,2.0.0))
|
||||
[INFO] | | +- jaxen:jaxen:jar:1.1.6:test
|
||||
[INFO] | | +- saxonica:saxon:jar:9.1.0.8:test
|
||||
[INFO] | | +- saxonica:saxon-dom:jar:9.1.0.8:test
|
||||
[INFO] | | +- jgrapht:jgrapht:jar:0.7.2:test
|
||||
[INFO] | | +- net.sf.ehcache:ehcache:jar:2.8.0:test
|
||||
[INFO] | | \- org.springframework:spring-test:jar:4.2.5.RELEASE:test (version selected from constraint [4.2.5.RELEASE,4.2.5.RELEASE])
|
||||
[INFO] | | \- org.springframework:spring-core:jar:4.2.5.RELEASE:test
|
||||
[INFO] | +- eu.dnetlib:dnet-hadoop-commons:jar:2.0.2-SNAPSHOT:test (version selected from constraint [2.0.0,3.0.0))
|
||||
[INFO] | | +- org.apache.hadoop:hadoop-core:jar:2.0.0-mr1-cdh4.7.0:test
|
||||
[INFO] | | | +- commons-el:commons-el:jar:1.0:test
|
||||
[INFO] | | | \- hsqldb:hsqldb:jar:1.8.0.10:test
|
||||
[INFO] | | \- org.springframework:spring-beans:jar:4.2.5.RELEASE:test (version selected from constraint [4.2.5.RELEASE,4.2.5.RELEASE])
|
||||
[INFO] | \- eu.dnetlib:dnet-index-solr-common:jar:1.3.1:test (version selected from constraint [1.0.0,1.3.1])
|
||||
[INFO] | \- org.apache.solr:solr-solrj:jar:4.9.0:test
|
||||
[INFO] | +- org.apache.httpcomponents:httpmime:jar:4.3.1:test
|
||||
[INFO] | \- org.noggit:noggit:jar:0.5:test
|
||||
[INFO] \- junit:junit:jar:4.9:test
|
||||
[INFO] \- org.hamcrest:hamcrest-core:jar:1.1:test
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
[INFO] Reactor Summary:
|
||||
[INFO]
|
||||
[INFO] dnet-dedup 3.0.3-SNAPSHOT .......................... SUCCESS [ 1.152 s]
|
||||
[INFO] dnet-pace-core ..................................... SUCCESS [ 0.117 s]
|
||||
[INFO] dnet-dedup-test 3.0.3-SNAPSHOT ..................... SUCCESS [ 1.407 s]
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
[INFO] BUILD SUCCESS
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
[INFO] Total time: 3.216 s
|
||||
[INFO] Finished at: 2019-03-29T15:02:42+01:00
|
||||
[INFO] ------------------------------------------------------------------------
|
Binary file not shown.
|
@ -14,16 +14,17 @@
|
|||
<packaging>jar</packaging>
|
||||
|
||||
<build>
|
||||
<sourceDirectory>src/main/java</sourceDirectory>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-deploy-plugin</artifactId>
|
||||
<version>2.7</version>
|
||||
<!--<configuration>-->
|
||||
<!--<skip>true</skip>-->
|
||||
<!--</configuration>-->
|
||||
<configuration>
|
||||
<skip>true</skip>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
|
@ -31,11 +32,50 @@
|
|||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
<includes>
|
||||
<include>src/main/java/**/*.java</include>
|
||||
<include>src/main/java/**/*.scala</include>
|
||||
<include>**/*.java</include>
|
||||
</includes>
|
||||
<!--<includes>-->
|
||||
<!--<include>src/main/java/**/*.java</include>-->
|
||||
<!--<include>src/main/java/**/*.scala</include>-->
|
||||
<!--</includes>-->
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>net.alchim31.maven</groupId>
|
||||
<artifactId>scala-maven-plugin</artifactId>
|
||||
<version>4.0.1</version>
|
||||
<!--<executions>-->
|
||||
<!--<execution>-->
|
||||
<!--<goals>-->
|
||||
<!--<goal>compile</goal>-->
|
||||
<!--<goal>testCompile</goal>-->
|
||||
<!--</goals>-->
|
||||
<!--</execution>-->
|
||||
<!--</executions>-->
|
||||
<executions>
|
||||
<execution>
|
||||
<id>scala-compile-first</id>
|
||||
<phase>initialize</phase>
|
||||
<goals>
|
||||
<goal>add-source</goal>
|
||||
<goal>compile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>scala-test-compile</id>
|
||||
<phase>process-test-resources</phase>
|
||||
<goals>
|
||||
<goal>testCompile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<scalaVersion>${scala.version}</scalaVersion>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
|
@ -78,12 +118,22 @@
|
|||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.oozie</groupId>
|
||||
<artifactId>oozie-client</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.scala-lang</groupId>
|
||||
<artifactId>scala-library</artifactId>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
</project>
|
Binary file not shown.
|
@ -0,0 +1,128 @@
|
|||
package eu.dnetlib;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import eu.dnetlib.data.proto.DedupProtos;
|
||||
import eu.dnetlib.graph.GraphProcessor;
|
||||
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.util.BlockProcessor;
|
||||
import eu.dnetlib.pace.utils.PaceUtils;
|
||||
import eu.dnetlib.reporter.SparkCounter;
|
||||
import eu.dnetlib.reporter.SparkReporter;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.spark.api.java.JavaPairRDD;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.graphx.Edge;
|
||||
import org.apache.spark.rdd.RDD;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import scala.Tuple2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
import java.net.URI;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class SparkLocalTest {
|
||||
public static SparkCounter counter ;
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
final SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("Deduplication")
|
||||
.master("local[*]")
|
||||
.getOrCreate();
|
||||
|
||||
final JavaSparkContext context = new JavaSparkContext(spark.sparkContext());
|
||||
|
||||
final URL dataset = SparkTest.class.getResource("/eu/dnetlib/pace/organization.to.fix.json");
|
||||
final JavaRDD<String> dataRDD = context.textFile(dataset.getPath());
|
||||
|
||||
counter = new SparkCounter(context);
|
||||
|
||||
//read the configuration from the classpath
|
||||
final DedupConfig config = DedupConfig.load(Utility.readFromClasspath("/eu/dnetlib/pace/org.curr.conf"));
|
||||
|
||||
BlockProcessor.constructAccumulator(config);
|
||||
BlockProcessor.accumulators.forEach(acc -> {
|
||||
|
||||
final String[] values = acc.split("::");
|
||||
counter.incrementCounter(values[0], values[1], 0);
|
||||
|
||||
});
|
||||
|
||||
//create vertexes of the graph: <ID, MapDocument>
|
||||
JavaPairRDD<String, MapDocument> mapDocs = dataRDD.mapToPair(it -> {
|
||||
MapDocument mapDocument = PaceUtils.asMapDocument(config, it);
|
||||
return new Tuple2<>(mapDocument.getIdentifier(), mapDocument);
|
||||
});
|
||||
RDD<Tuple2<Object, MapDocument>> vertexes = mapDocs.mapToPair(t -> new Tuple2<Object, MapDocument>( (long) t._1().hashCode(), t._2())).rdd();
|
||||
|
||||
//create relations between documents
|
||||
JavaPairRDD<String, Iterable<MapDocument>> blocks = mapDocs.reduceByKey((a, b) -> a) //the reduce is just to be sure that we haven't document with same id
|
||||
//Clustering: from <id, doc> to List<groupkey,doc>
|
||||
.flatMapToPair(a -> {
|
||||
final MapDocument currentDocument = a._2();
|
||||
|
||||
return Utility.getGroupingKeys(config, currentDocument).stream()
|
||||
.map(it -> new Tuple2<>(it, currentDocument)).collect(Collectors.toList()).iterator();
|
||||
}).groupByKey();//group documents basing on the key
|
||||
|
||||
//print blocks
|
||||
blocks.foreach(b -> {
|
||||
String print = b._1() + ": ";
|
||||
for (MapDocument doc : b._2()) {
|
||||
print += doc.getIdentifier() + " ";
|
||||
}
|
||||
System.out.println(print);
|
||||
});
|
||||
|
||||
//create relations by comparing only elements in the same group
|
||||
final JavaPairRDD<String, String> relationRDD = blocks.flatMapToPair(it -> {
|
||||
final SparkReporter reporter = new SparkReporter(counter);
|
||||
new BlockProcessor(config).process(it._1(), it._2(), reporter);
|
||||
return reporter.getReport().iterator();
|
||||
});
|
||||
|
||||
final RDD<Edge<String>> edgeRdd = relationRDD.map(it -> new Edge<>(it._1().hashCode(),it._2().hashCode(), "similarTo")).rdd();
|
||||
|
||||
JavaRDD<ConnectedComponent> ccs = GraphProcessor.findCCs(vertexes, edgeRdd, 20).toJavaRDD();
|
||||
|
||||
final JavaRDD<ConnectedComponent> connectedComponents = ccs.filter(cc -> cc.getDocs().size()>1);
|
||||
final JavaRDD<ConnectedComponent> nonDeduplicated = ccs.filter(cc -> cc.getDocs().size()==1);
|
||||
|
||||
System.out.println("Non duplicates: " + nonDeduplicated.count());
|
||||
System.out.println("Duplicates: " + connectedComponents.flatMap(cc -> cc.getDocs().iterator()).count());
|
||||
System.out.println("Connected Components: " + connectedComponents.count());
|
||||
|
||||
counter.getAccumulators().values().forEach(it-> System.out.println(it.getGroup()+" "+it.getName()+" -->"+it.value()));
|
||||
|
||||
//print deduped
|
||||
connectedComponents.foreach(cc -> {
|
||||
System.out.println("cc = " + cc.getId());
|
||||
for (MapDocument doc: cc.getDocs()) {
|
||||
System.out.println(doc.getIdentifier() + "; ln: " + doc.getFieldMap().get("legalname").stringValue() + "; sn: " + doc.getFieldMap().get("legalshortname").stringValue());
|
||||
}
|
||||
});
|
||||
//print nondeduped
|
||||
nonDeduplicated.foreach(cc -> {
|
||||
System.out.println("nd = " + cc.getId());
|
||||
System.out.println(cc.getDocs().iterator().next().getFieldMap().get("legalname").stringValue() + "; sn: " + cc.getDocs().iterator().next().getFieldMap().get("legalshortname").stringValue());
|
||||
});
|
||||
|
||||
//print ids
|
||||
//// ccs.foreach(cc -> System.out.println(cc.getId()));
|
||||
//// connectedComponents.saveAsTextFile("file:///Users/miconis/Downloads/dumps/organizations_dedup");
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -1,42 +1,41 @@
|
|||
package eu.dnetlib;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import eu.dnetlib.graph.GraphProcessor;
|
||||
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.util.BlockProcessor;
|
||||
import eu.dnetlib.pace.utils.PaceUtils;
|
||||
import eu.dnetlib.reporter.SparkCounter;
|
||||
import eu.dnetlib.reporter.SparkReporter;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaPairRDD;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.graphx.Edge;
|
||||
import org.apache.spark.rdd.RDD;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import scala.Tuple2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
import java.net.URL;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class SparkTest {
|
||||
public static SparkCounter counter ;
|
||||
|
||||
public static void main(String[] args) {
|
||||
final JavaSparkContext context = new JavaSparkContext(new SparkConf().setAppName("Deduplication").setMaster("yarn"));
|
||||
public static void main(String[] args) throws IOException {
|
||||
|
||||
final URL dataset = SparkTest.class.getResource(args[1]);
|
||||
final JavaRDD<String> dataRDD = context.textFile(dataset.getPath());
|
||||
final SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("Deduplication")
|
||||
.master("yarn")
|
||||
.getOrCreate();
|
||||
|
||||
final JavaSparkContext context = new JavaSparkContext(spark.sparkContext());
|
||||
|
||||
final JavaRDD<String> dataRDD = Utility.loadDataFromHDFS(args[0], context);
|
||||
|
||||
counter = new SparkCounter(context);
|
||||
|
||||
//read the configuration from the classpath
|
||||
final DedupConfig config = DedupConfig.load(readFromClasspath(args[0]));
|
||||
final DedupConfig config = Utility.loadConfigFromHDFS(args[1]);
|
||||
|
||||
BlockProcessor.constructAccumulator(config);
|
||||
BlockProcessor.accumulators.forEach(acc -> {
|
||||
|
@ -59,7 +58,7 @@ public class SparkTest {
|
|||
.flatMapToPair(a -> {
|
||||
final MapDocument currentDocument = a._2();
|
||||
|
||||
return getGroupingKeys(config, currentDocument).stream()
|
||||
return Utility.getGroupingKeys(config, currentDocument).stream()
|
||||
.map(it -> new Tuple2<>(it, currentDocument)).collect(Collectors.toList()).iterator();
|
||||
}).groupByKey();//group documents basing on the key
|
||||
|
||||
|
@ -105,24 +104,10 @@ public class SparkTest {
|
|||
System.out.println(cc.getDocs().iterator().next().getFieldMap().get("legalname").stringValue() + "; sn: " + cc.getDocs().iterator().next().getFieldMap().get("legalshortname").stringValue());
|
||||
});
|
||||
|
||||
//print ids
|
||||
//// ccs.foreach(cc -> System.out.println(cc.getId()));
|
||||
//// connectedComponents.saveAsTextFile("file:///Users/miconis/Downloads/dumps/organizations_dedup");
|
||||
// print ids
|
||||
// ccs.foreach(cc -> System.out.println(cc.getId()));
|
||||
// connectedComponents.saveAsTextFile("file:///Users/miconis/Downloads/dumps/organizations_dedup");
|
||||
|
||||
}
|
||||
|
||||
static String readFromClasspath(final String filename) {
|
||||
final StringWriter sw = new StringWriter();
|
||||
try {
|
||||
IOUtils.copy(SparkTest.class.getResourceAsStream(filename), sw);
|
||||
return sw.toString();
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeException("cannot load resource from classpath: " + filename);
|
||||
}
|
||||
}
|
||||
|
||||
static Set<String> getGroupingKeys(DedupConfig conf, MapDocument doc) {
|
||||
return Sets.newHashSet(BlacklistAwareClusteringCombiner.filterAndCombine(doc, conf));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
package eu.dnetlib;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Set;
|
||||
|
||||
public class Utility {
|
||||
|
||||
public static JavaRDD<String> loadDataFromHDFS(String path, JavaSparkContext context) {
|
||||
return context.textFile(path);
|
||||
}
|
||||
|
||||
public static DedupConfig loadConfigFromHDFS(String path) throws IOException {
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
// conf.set("fs.defaultFS", "");
|
||||
FileSystem fileSystem = FileSystem.get(conf);
|
||||
FSDataInputStream inputStream = new FSDataInputStream(fileSystem.open(new Path(path)));
|
||||
|
||||
return DedupConfig.load(IOUtils.toString(inputStream, StandardCharsets.UTF_8.name()));
|
||||
|
||||
}
|
||||
|
||||
static String readFromClasspath(final String filename) {
|
||||
final StringWriter sw = new StringWriter();
|
||||
try {
|
||||
IOUtils.copy(SparkTest.class.getResourceAsStream(filename), sw);
|
||||
return sw.toString();
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeException("cannot load resource from classpath: " + filename);
|
||||
}
|
||||
}
|
||||
|
||||
static Set<String> getGroupingKeys(DedupConfig conf, MapDocument doc) {
|
||||
return Sets.newHashSet(BlacklistAwareClusteringCombiner.filterAndCombine(doc, conf));
|
||||
}
|
||||
}
|
|
@ -6,8 +6,7 @@ import org.apache.oozie.client.OozieClientException;
|
|||
import org.apache.oozie.client.WorkflowJob;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
import java.io.*;
|
||||
import java.util.Properties;
|
||||
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
package eu.dnetlib.pace;
|
||||
|
||||
import eu.dnetlib.SparkLocalTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class SparkTester {
|
||||
|
||||
@Test
|
||||
public void sparkLocalTest() throws IOException {
|
||||
|
||||
SparkLocalTest.main(new String[]{});
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,109 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<project version="4" relativePaths="false">
|
||||
<component name="ProjectRootManager" version="2" assert-keyword="true" project-jdk-name="1.8" jdk-15="true"/>
|
||||
<component name="CodeStyleManager">
|
||||
<option name="USE_DEFAULT_CODE_STYLE_SCHEME" value="true"/>
|
||||
<option name="CODE_STYLE_SCHEME" value=""/>
|
||||
</component>
|
||||
<component name="libraryTable"/>
|
||||
<component name="CompilerConfiguration">
|
||||
<option name="DEFAULT_COMPILER" value="Javac"/>
|
||||
<option name="CLEAR_OUTPUT_DIRECTORY" value="false"/>
|
||||
<!--
|
||||
<wildcardResourcePatterns>
|
||||
<entry name="${wildcardResourcePattern}"/>
|
||||
</wildcardResourcePatterns>
|
||||
-->
|
||||
<wildcardResourcePatterns>
|
||||
<entry name="!?*.java"/>
|
||||
</wildcardResourcePatterns>
|
||||
</component>
|
||||
<component name="JavacSettings">
|
||||
<option name="DEBUGGING_INFO" value="true"/>
|
||||
<option name="GENERATE_NO_WARNINGS" value="false"/>
|
||||
<option name="DEPRECATION" value="true"/>
|
||||
<option name="ADDITIONAL_OPTIONS_STRING" value=""/>
|
||||
<option name="MAXIMUM_HEAP_SIZE" value="128"/>
|
||||
<option name="USE_GENERICS_COMPILER" value="false"/>
|
||||
</component>
|
||||
<component name="JikesSettings">
|
||||
<option name="DEBUGGING_INFO" value="true"/>
|
||||
<option name="DEPRECATION" value="true"/>
|
||||
<option name="GENERATE_NO_WARNINGS" value="false"/>
|
||||
<option name="GENERATE_MAKE_FILE_DEPENDENCIES" value="false"/>
|
||||
<option name="DO_FULL_DEPENDENCE_CHECK" value="false"/>
|
||||
<option name="IS_INCREMENTAL_MODE" value="false"/>
|
||||
<option name="IS_EMACS_ERRORS_MODE" value="true"/>
|
||||
<option name="ADDITIONAL_OPTIONS_STRING" value=""/>
|
||||
<option name="MAXIMUM_HEAP_SIZE" value="128"/>
|
||||
</component>
|
||||
<component name="AntConfiguration">
|
||||
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
|
||||
<option name="FILTER_TARGETS" value="false"/>
|
||||
</component>
|
||||
<component name="JavadocGenerationManager">
|
||||
<option name="OUTPUT_DIRECTORY"/>
|
||||
<option name="OPTION_SCOPE" value="protected"/>
|
||||
<option name="OPTION_HIERARCHY" value="false"/>
|
||||
<option name="OPTION_NAVIGATOR" value="false"/>
|
||||
<option name="OPTION_INDEX" value="false"/>
|
||||
<option name="OPTION_SEPARATE_INDEX" value="false"/>
|
||||
<option name="OPTION_USE_1_1" value="false"/>
|
||||
<option name="OPTION_DOCUMENT_TAG_USE" value="false"/>
|
||||
<option name="OPTION_DOCUMENT_TAG_AUTHOR" value="false"/>
|
||||
<option name="OPTION_DOCUMENT_TAG_VERSION" value="false"/>
|
||||
<option name="OPTION_DOCUMENT_TAG_DEPRECATED" value="false"/>
|
||||
<option name="OPTION_DEPRECATED_LIST" value="false"/>
|
||||
<option name="OTHER_OPTIONS"/>
|
||||
<option name="HEAP_SIZE"/>
|
||||
<option name="OPEN_IN_BROWSER" value="false"/>
|
||||
</component>
|
||||
<component name="JUnitProjectSettings">
|
||||
<option name="TEST_RUNNER" value="UI"/>
|
||||
</component>
|
||||
<component name="EntryPointsManager">
|
||||
<entry_points/>
|
||||
</component>
|
||||
<component name="DataSourceManager"/>
|
||||
<component name="ExportToHTMLSettings">
|
||||
<option name="PRINT_LINE_NUMBERS" value="false"/>
|
||||
<option name="OPEN_IN_BROWSER" value="false"/>
|
||||
<option name="OUTPUT_DIRECTORY"/>
|
||||
</component>
|
||||
<component name="ImportConfiguration">
|
||||
<option name="VENDOR"/>
|
||||
<option name="RELEASE_TAG"/>
|
||||
<option name="LOG_MESSAGE"/>
|
||||
<option name="CHECKOUT_AFTER_IMPORT" value="true"/>
|
||||
</component>
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<!-- module filepath="$$PROJECT_DIR$$/${pom.artifactId}.iml"/ -->
|
||||
<module filepath="$PROJECT_DIR$/dnet-dedup.iml"/>
|
||||
<module filepath="$PROJECT_DIR$/dnet-pace-core/dnet-pace-core.iml"/>
|
||||
<module filepath="$PROJECT_DIR$/dnet-dedup-test/dnet-dedup-test.iml"/>
|
||||
</modules>
|
||||
</component>
|
||||
<UsedPathMacros>
|
||||
<!--<macro name="cargo"></macro>-->
|
||||
</UsedPathMacros>
|
||||
</project>
|
|
@ -0,0 +1,418 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<project version="4" relativePaths="false">
|
||||
<component name="LvcsProjectConfiguration">
|
||||
<option name="ADD_LABEL_ON_PROJECT_OPEN" value="true"/>
|
||||
<option name="ADD_LABEL_ON_PROJECT_COMPILATION" value="true"/>
|
||||
<option name="ADD_LABEL_ON_FILE_PACKAGE_COMPILATION" value="true"/>
|
||||
<option name="ADD_LABEL_ON_PROJECT_MAKE" value="true"/>
|
||||
<option name="ADD_LABEL_ON_RUNNING" value="true"/>
|
||||
<option name="ADD_LABEL_ON_DEBUGGING" value="true"/>
|
||||
<option name="ADD_LABEL_ON_UNIT_TEST_PASSED" value="true"/>
|
||||
<option name="ADD_LABEL_ON_UNIT_TEST_FAILED" value="true"/>
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
<property name="MemberChooser.copyJavadoc" value="false"/>
|
||||
<property name="GoToClass.includeLibraries" value="false"/>
|
||||
<property name="MemberChooser.showClasses" value="true"/>
|
||||
<property name="MemberChooser.sorted" value="false"/>
|
||||
<property name="GoToFile.includeJavaFiles" value="false"/>
|
||||
<property name="GoToClass.toSaveIncludeLibraries" value="false"/>
|
||||
</component>
|
||||
<component name="ToolWindowManager">
|
||||
<frame x="-4" y="-4" width="1032" height="746" extended-state="6"/>
|
||||
<editor active="false"/>
|
||||
<layout>
|
||||
<window_info id="CVS" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
|
||||
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="7"/>
|
||||
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="0"/>
|
||||
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="1"/>
|
||||
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="1"/>
|
||||
<window_info id="Messages" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
|
||||
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.4" order="6"/>
|
||||
<window_info id="Aspects" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
|
||||
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="1"/>
|
||||
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="2"/>
|
||||
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="2"/>
|
||||
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.4" order="4"/>
|
||||
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="sliding" type="sliding" visible="false" weight="0.4" order="0"/>
|
||||
<window_info id="Web" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="2"/>
|
||||
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="0"/>
|
||||
<window_info id="EJB" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="3"/>
|
||||
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="5"/>
|
||||
</layout>
|
||||
</component>
|
||||
<component name="ErrorTreeViewConfiguration">
|
||||
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
|
||||
<option name="HIDE_WARNINGS" value="false"/>
|
||||
</component>
|
||||
<component name="StructureViewFactory">
|
||||
<option name="SORT_MODE" value="0"/>
|
||||
<option name="GROUP_INHERITED" value="true"/>
|
||||
<option name="AUTOSCROLL_MODE" value="true"/>
|
||||
<option name="SHOW_FIELDS" value="true"/>
|
||||
<option name="AUTOSCROLL_FROM_SOURCE" value="false"/>
|
||||
<option name="GROUP_GETTERS_AND_SETTERS" value="true"/>
|
||||
<option name="SHOW_INHERITED" value="false"/>
|
||||
<option name="HIDE_NOT_PUBLIC" value="false"/>
|
||||
</component>
|
||||
<component name="ProjectViewSettings">
|
||||
<navigator currentView="ProjectPane" flattenPackages="false" showMembers="false" showStructure="false" autoscrollToSource="false" splitterProportion="0.5"/>
|
||||
<view id="ProjectPane">
|
||||
<expanded_node type="directory" url="file://$PROJECT_DIR$"/>
|
||||
</view>
|
||||
<view id="SourcepathPane"/>
|
||||
<view id="ClasspathPane"/>
|
||||
</component>
|
||||
<component name="Commander">
|
||||
<leftPanel view="Project"/>
|
||||
<rightPanel view="Project"/>
|
||||
<splitter proportion="0.5"/>
|
||||
</component>
|
||||
<component name="AspectsView"/>
|
||||
<component name="SelectInManager"/>
|
||||
<component name="HierarchyBrowserManager">
|
||||
<option name="SHOW_PACKAGES" value="false"/>
|
||||
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
|
||||
<option name="SORT_ALPHABETICALLY" value="false"/>
|
||||
</component>
|
||||
<component name="TodoView" selected-index="0">
|
||||
<todo-panel id="selected-file">
|
||||
<are-packages-shown value="false"/>
|
||||
<flatten-packages value="false"/>
|
||||
<is-autoscroll-to-source value="true"/>
|
||||
</todo-panel>
|
||||
<todo-panel id="all">
|
||||
<are-packages-shown value="true"/>
|
||||
<flatten-packages value="false"/>
|
||||
<is-autoscroll-to-source value="true"/>
|
||||
</todo-panel>
|
||||
</component>
|
||||
<component name="editorManager"/>
|
||||
<component name="editorHistoryManager"/>
|
||||
<component name="DaemonCodeAnalyzer">
|
||||
<disable_hints/>
|
||||
</component>
|
||||
<component name="InspectionManager">
|
||||
<option name="AUTOSCROLL_TO_SOURCE" value="false"/>
|
||||
<option name="SPLITTER_PROPORTION" value="0.5"/>
|
||||
<profile name="Default"/>
|
||||
</component>
|
||||
<component name="BookmarkManager"/>
|
||||
<component name="DebuggerManager">
|
||||
<line_breakpoints/>
|
||||
<exception_breakpoints>
|
||||
<breakpoint_any>
|
||||
<option name="NOTIFY_CAUGHT" value="true"/>
|
||||
<option name="NOTIFY_UNCAUGHT" value="true"/>
|
||||
<option name="ENABLED" value="false"/>
|
||||
<option name="SUSPEND_VM" value="true"/>
|
||||
<option name="COUNT_FILTER_ENABLED" value="false"/>
|
||||
<option name="COUNT_FILTER" value="0"/>
|
||||
<option name="CONDITION_ENABLED" value="false"/>
|
||||
<option name="CONDITION"/>
|
||||
<option name="LOG_ENABLED" value="false"/>
|
||||
<option name="LOG_EXPRESSION_ENABLED" value="false"/>
|
||||
<option name="LOG_MESSAGE"/>
|
||||
<option name="CLASS_FILTERS_ENABLED" value="false"/>
|
||||
<option name="INVERSE_CLASS_FILLTERS" value="false"/>
|
||||
<option name="SUSPEND_POLICY" value="SuspendAll"/>
|
||||
</breakpoint_any>
|
||||
</exception_breakpoints>
|
||||
<field_breakpoints/>
|
||||
<method_breakpoints/>
|
||||
</component>
|
||||
<component name="DebuggerSettings">
|
||||
<option name="TRACING_FILTERS_ENABLED" value="true"/>
|
||||
<option name="TOSTRING_CLASSES_ENABLED" value="false"/>
|
||||
<option name="VALUE_LOOKUP_DELAY" value="700"/>
|
||||
<option name="DEBUGGER_TRANSPORT" value="0"/>
|
||||
<option name="FORCE_CLASSIC_VM" value="true"/>
|
||||
<option name="HIDE_DEBUGGER_ON_PROCESS_TERMINATION" value="false"/>
|
||||
<option name="SKIP_SYNTHETIC_METHODS" value="true"/>
|
||||
<option name="SKIP_CONSTRUCTORS" value="false"/>
|
||||
<option name="STEP_THREAD_SUSPEND_POLICY" value="SuspendThread"/>
|
||||
<default_breakpoint_settings>
|
||||
<option name="NOTIFY_CAUGHT" value="true"/>
|
||||
<option name="NOTIFY_UNCAUGHT" value="true"/>
|
||||
<option name="WATCH_MODIFICATION" value="true"/>
|
||||
<option name="WATCH_ACCESS" value="true"/>
|
||||
<option name="WATCH_ENTRY" value="true"/>
|
||||
<option name="WATCH_EXIT" value="true"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
<option name="SUSPEND_VM" value="true"/>
|
||||
<option name="COUNT_FILTER_ENABLED" value="false"/>
|
||||
<option name="COUNT_FILTER" value="0"/>
|
||||
<option name="CONDITION_ENABLED" value="false"/>
|
||||
<option name="CONDITION"/>
|
||||
<option name="LOG_ENABLED" value="false"/>
|
||||
<option name="LOG_EXPRESSION_ENABLED" value="false"/>
|
||||
<option name="LOG_MESSAGE"/>
|
||||
<option name="CLASS_FILTERS_ENABLED" value="false"/>
|
||||
<option name="INVERSE_CLASS_FILLTERS" value="false"/>
|
||||
<option name="SUSPEND_POLICY" value="SuspendAll"/>
|
||||
</default_breakpoint_settings>
|
||||
<filter>
|
||||
<option name="PATTERN" value="com.sun.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
<filter>
|
||||
<option name="PATTERN" value="java.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
<filter>
|
||||
<option name="PATTERN" value="javax.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
<filter>
|
||||
<option name="PATTERN" value="org.omg.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
<filter>
|
||||
<option name="PATTERN" value="sun.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
<filter>
|
||||
<option name="PATTERN" value="junit.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
</component>
|
||||
<component name="CompilerWorkspaceConfiguration">
|
||||
<option name="COMPILE_IN_BACKGROUND" value="false"/>
|
||||
<option name="AUTO_SHOW_ERRORS_IN_EDITOR" value="true"/>
|
||||
</component>
|
||||
<component name="RunManager">
|
||||
<activeType name="Application"/>
|
||||
<configuration selected="false" default="true" type="Applet" factoryName="Applet">
|
||||
<module name=""/>
|
||||
<option name="MAIN_CLASS_NAME"/>
|
||||
<option name="HTML_FILE_NAME"/>
|
||||
<option name="HTML_USED" value="false"/>
|
||||
<option name="WIDTH" value="400"/>
|
||||
<option name="HEIGHT" value="300"/>
|
||||
<option name="POLICY_FILE" value="$APPLICATION_HOME_DIR$/bin/appletviewer.policy"/>
|
||||
<option name="VM_PARAMETERS"/>
|
||||
</configuration>
|
||||
<configuration selected="false" default="true" type="Remote" factoryName="Remote">
|
||||
<option name="USE_SOCKET_TRANSPORT" value="true"/>
|
||||
<option name="SERVER_MODE" value="false"/>
|
||||
<option name="SHMEM_ADDRESS" value="javadebug"/>
|
||||
<option name="HOST" value="localhost"/>
|
||||
<option name="PORT" value="5005"/>
|
||||
</configuration>
|
||||
<configuration selected="false" default="true" type="Application" factoryName="Application">
|
||||
<option name="MAIN_CLASS_NAME"/>
|
||||
<option name="VM_PARAMETERS"/>
|
||||
<option name="PROGRAM_PARAMETERS"/>
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$"/>
|
||||
<module name=""/>
|
||||
</configuration>
|
||||
<configuration selected="false" default="true" type="JUnit" factoryName="JUnit">
|
||||
<module name=""/>
|
||||
<option name="PACKAGE_NAME"/>
|
||||
<option name="MAIN_CLASS_NAME"/>
|
||||
<option name="METHOD_NAME"/>
|
||||
<option name="TEST_OBJECT" value="class"/>
|
||||
<option name="VM_PARAMETERS"/>
|
||||
<option name="PARAMETERS"/>
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$"/>
|
||||
<option name="ADDITIONAL_CLASS_PATH"/>
|
||||
<option name="TEST_SEARCH_SCOPE">
|
||||
<value defaultName="wholeProject"/>
|
||||
</option>
|
||||
</configuration>
|
||||
</component>
|
||||
<component name="VcsManagerConfiguration">
|
||||
<option name="ACTIVE_VCS_NAME" value="git"/>
|
||||
<option name="STATE" value="0"/>
|
||||
</component>
|
||||
<component name="VssConfiguration">
|
||||
<CheckoutOptions>
|
||||
<option name="COMMENT" value=""/>
|
||||
<option name="DO_NOT_GET_LATEST_VERSION" value="false"/>
|
||||
<option name="REPLACE_WRITABLE" value="false"/>
|
||||
<option name="RECURSIVE" value="false"/>
|
||||
</CheckoutOptions>
|
||||
<CheckinOptions>
|
||||
<option name="COMMENT" value=""/>
|
||||
<option name="KEEP_CHECKED_OUT" value="false"/>
|
||||
<option name="RECURSIVE" value="false"/>
|
||||
</CheckinOptions>
|
||||
<AddOptions>
|
||||
<option name="COMMENT" value=""/>
|
||||
<option name="STORE_ONLY_LATEST_VERSION" value="false"/>
|
||||
<option name="CHECK_OUT_IMMEDIATELY" value="false"/>
|
||||
<option name="FILE_TYPE" value="0"/>
|
||||
</AddOptions>
|
||||
<UndocheckoutOptions>
|
||||
<option name="MAKE_WRITABLE" value="false"/>
|
||||
<option name="REPLACE_LOCAL_COPY" value="0"/>
|
||||
<option name="RECURSIVE" value="false"/>
|
||||
</UndocheckoutOptions>
|
||||
<DiffOptions>
|
||||
<option name="IGNORE_WHITE_SPACE" value="false"/>
|
||||
<option name="IGNORE_CASE" value="false"/>
|
||||
</DiffOptions>
|
||||
<GetOptions>
|
||||
<option name="REPLACE_WRITABLE" value="0"/>
|
||||
<option name="MAKE_WRITABLE" value="false"/>
|
||||
<option name="RECURSIVE" value="false"/>
|
||||
</GetOptions>
|
||||
<option name="CLIENT_PATH" value=""/>
|
||||
<option name="SRCSAFEINI_PATH" value=""/>
|
||||
<option name="USER_NAME" value=""/>
|
||||
<option name="PWD" value=""/>
|
||||
<option name="SHOW_CHECKOUT_OPTIONS" value="true"/>
|
||||
<option name="SHOW_ADD_OPTIONS" value="true"/>
|
||||
<option name="SHOW_UNDOCHECKOUT_OPTIONS" value="true"/>
|
||||
<option name="SHOW_DIFF_OPTIONS" value="true"/>
|
||||
<option name="SHOW_GET_OPTIONS" value="true"/>
|
||||
<option name="USE_EXTERNAL_DIFF" value="false"/>
|
||||
<option name="EXTERNAL_DIFF_PATH" value=""/>
|
||||
<option name="REUSE_LAST_COMMENT" value="false"/>
|
||||
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
|
||||
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
|
||||
<option name="LAST_COMMIT_MESSAGE" value=""/>
|
||||
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
|
||||
</component>
|
||||
<component name="CheckinPanelState"/>
|
||||
<component name="WebViewSettings">
|
||||
<webview flattenPackages="false" showMembers="false" autoscrollToSource="false"/>
|
||||
</component>
|
||||
<component name="EjbViewSettings">
|
||||
<EjbView showMembers="false" autoscrollToSource="false"/>
|
||||
</component>
|
||||
<component name="AppServerRunManager"/>
|
||||
<component name="StarteamConfiguration">
|
||||
<option name="SERVER" value=""/>
|
||||
<option name="PORT" value="49201"/>
|
||||
<option name="USER" value=""/>
|
||||
<option name="PASSWORD" value=""/>
|
||||
<option name="PROJECT" value=""/>
|
||||
<option name="VIEW" value=""/>
|
||||
<option name="ALTERNATIVE_WORKING_PATH" value=""/>
|
||||
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
|
||||
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
|
||||
<option name="LAST_COMMIT_MESSAGE" value=""/>
|
||||
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
|
||||
</component>
|
||||
<component name="Cvs2Configuration">
|
||||
<option name="ON_FILE_ADDING" value="0"/>
|
||||
<option name="ON_FILE_REMOVING" value="0"/>
|
||||
<option name="PRUNE_EMPTY_DIRECTORIES" value="true"/>
|
||||
<option name="SHOW_UPDATE_OPTIONS" value="true"/>
|
||||
<option name="SHOW_ADD_OPTIONS" value="true"/>
|
||||
<option name="SHOW_REMOVE_OPTIONS" value="true"/>
|
||||
<option name="MERGING_MODE" value="0"/>
|
||||
<option name="MERGE_WITH_BRANCH1_NAME" value="HEAD"/>
|
||||
<option name="MERGE_WITH_BRANCH2_NAME" value="HEAD"/>
|
||||
<option name="RESET_STICKY" value="false"/>
|
||||
<option name="CREATE_NEW_DIRECTORIES" value="true"/>
|
||||
<option name="DEFAULT_TEXT_FILE_SUBSTITUTION" value="kv"/>
|
||||
<option name="PROCESS_UNKNOWN_FILES" value="false"/>
|
||||
<option name="PROCESS_DELETED_FILES" value="false"/>
|
||||
<option name="SHOW_EDIT_DIALOG" value="true"/>
|
||||
<option name="RESERVED_EDIT" value="false"/>
|
||||
<option name="FILE_HISTORY_SPLITTER_PROPORTION" value="0.6"/>
|
||||
<option name="SHOW_CHECKOUT_OPTIONS" value="true"/>
|
||||
<option name="CHECKOUT_DATE_OR_REVISION_SETTINGS">
|
||||
<value>
|
||||
<option name="BRANCH" value=""/>
|
||||
<option name="DATE" value=""/>
|
||||
<option name="USE_BRANCH" value="false"/>
|
||||
<option name="USE_DATE" value="false"/>
|
||||
</value>
|
||||
</option>
|
||||
<option name="UPDATE_DATE_OR_REVISION_SETTINGS">
|
||||
<value>
|
||||
<option name="BRANCH" value=""/>
|
||||
<option name="DATE" value=""/>
|
||||
<option name="USE_BRANCH" value="false"/>
|
||||
<option name="USE_DATE" value="false"/>
|
||||
</value>
|
||||
</option>
|
||||
<option name="SHOW_CHANGES_REVISION_SETTINGS">
|
||||
<value>
|
||||
<option name="BRANCH" value=""/>
|
||||
<option name="DATE" value=""/>
|
||||
<option name="USE_BRANCH" value="false"/>
|
||||
<option name="USE_DATE" value="false"/>
|
||||
</value>
|
||||
</option>
|
||||
<option name="SHOW_OUTPUT" value="false"/>
|
||||
<option name="SHOW_FILE_HISTORY_AS_TREE" value="false"/>
|
||||
<option name="UPDATE_GROUP_BY_PACKAGES" value="false"/>
|
||||
<option name="ADD_WATCH_INDEX" value="0"/>
|
||||
<option name="REMOVE_WATCH_INDEX" value="0"/>
|
||||
<option name="UPDATE_KEYWORD_SUBSTITUTION"/>
|
||||
<option name="MAKE_NEW_FILES_READONLY" value="false"/>
|
||||
<option name="SHOW_CORRUPTED_PROJECT_FILES" value="0"/>
|
||||
<option name="TAG_AFTER_FILE_COMMIT" value="false"/>
|
||||
<option name="TAG_AFTER_FILE_COMMIT_NAME" value=""/>
|
||||
<option name="TAG_AFTER_PROJECT_COMMIT" value="false"/>
|
||||
<option name="TAG_AFTER_PROJECT_COMMIT_NAME" value=""/>
|
||||
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
|
||||
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
|
||||
<option name="FORCE_NON_EMPTY_COMMENT" value="false"/>
|
||||
<option name="LAST_COMMIT_MESSAGE" value=""/>
|
||||
<option name="SAVE_LAST_COMMIT_MESSAGE" value="true"/>
|
||||
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
|
||||
<option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false"/>
|
||||
<option name="OPTIMIZE_IMPORTS_BEFORE_FILE_COMMIT" value="false"/>
|
||||
<option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false"/>
|
||||
<option name="REFORMAT_BEFORE_FILE_COMMIT" value="false"/>
|
||||
<option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8"/>
|
||||
<option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5"/>
|
||||
</component>
|
||||
<component name="CvsTabbedWindow"/>
|
||||
<component name="SvnConfiguration">
|
||||
<option name="USER" value=""/>
|
||||
<option name="PASSWORD" value=""/>
|
||||
<option name="AUTO_ADD_FILES" value="0"/>
|
||||
<option name="AUTO_DEL_FILES" value="0"/>
|
||||
</component>
|
||||
<component name="PerforceConfiguration">
|
||||
<option name="PORT" value="magic:1666"/>
|
||||
<option name="USER" value=""/>
|
||||
<option name="PASSWORD" value=""/>
|
||||
<option name="CLIENT" value=""/>
|
||||
<option name="TRACE" value="false"/>
|
||||
<option name="PERFORCE_STATUS" value="true"/>
|
||||
<option name="CHANGELIST_OPTION" value="false"/>
|
||||
<option name="SYSTEMROOT" value=""/>
|
||||
<option name="P4_EXECUTABLE" value="p4"/>
|
||||
<option name="SHOW_BRANCH_HISTORY" value="false"/>
|
||||
<option name="GENERATE_COMMENT" value="false"/>
|
||||
<option name="SYNC_OPTION" value="Sync"/>
|
||||
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
|
||||
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
|
||||
<option name="FORCE_NON_EMPTY_COMMENT" value="true"/>
|
||||
<option name="LAST_COMMIT_MESSAGE" value=""/>
|
||||
<option name="SAVE_LAST_COMMIT_MESSAGE" value="true"/>
|
||||
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
|
||||
<option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false"/>
|
||||
<option name="OPTIMIZE_IMPORTS_BEFORE_FILE_COMMIT" value="false"/>
|
||||
<option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false"/>
|
||||
<option name="REFORMAT_BEFORE_FILE_COMMIT" value="false"/>
|
||||
<option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8"/>
|
||||
<option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5"/>
|
||||
</component>
|
||||
</project>
|
Binary file not shown.
|
@ -10,7 +10,7 @@
|
|||
<groupId>eu.dnetlib</groupId>
|
||||
<artifactId>dnet-openaire-data-protos</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<version>3.9.4-CUSTOM</version>
|
||||
<version>3.9.4-proto250</version>
|
||||
|
||||
<properties>
|
||||
<!-- defined also in dnet-parent, here in case we need to override -->
|
||||
|
|
|
@ -64,6 +64,7 @@
|
|||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-mapper-asl</artifactId>
|
||||
|
|
|
@ -1,19 +1,16 @@
|
|||
package eu.dnetlib.pace.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.common.base.Splitter;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.gson.Gson;
|
||||
import eu.dnetlib.pace.config.PaceConfig;
|
||||
import eu.dnetlib.pace.config.Type;
|
||||
import eu.dnetlib.pace.distance.*;
|
||||
import eu.dnetlib.pace.distance.algo.*;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import eu.dnetlib.pace.distance.DistanceAlgo;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* The schema is composed by field definitions (FieldDef). Each field has a type, a name, and an associated distance algorithm.
|
||||
|
@ -60,6 +57,18 @@ public class FieldDef implements Serializable {
|
|||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public void setPath(String path) {
|
||||
this.path = path;
|
||||
}
|
||||
|
||||
public void setIgnoreMissing(boolean ignoreMissing) {
|
||||
this.ignoreMissing = ignoreMissing;
|
||||
}
|
||||
|
||||
public String getPath() {
|
||||
return path;
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ public class ConfigTest extends AbstractPaceTest {
|
|||
@Test
|
||||
public void dedupConfigTest() {
|
||||
|
||||
DedupConfig load = DedupConfig.load(readFromClasspath("result.pace.conf.json"));
|
||||
DedupConfig load = DedupConfig.load(readFromClasspath("org.curr.conf"));
|
||||
|
||||
System.out.println(load.toString());
|
||||
}
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
{
|
||||
"wf" : {
|
||||
"threshold" : "0.9",
|
||||
"dedupRun" : "001",
|
||||
"entityType" : "organization",
|
||||
"orderField" : "legalname",
|
||||
"queueMaxSize" : "2000",
|
||||
"groupMaxSize" : "10",
|
||||
"slidingWindowSize" : "200",
|
||||
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ],
|
||||
"includeChildren" : "true"
|
||||
},
|
||||
"pace" : {
|
||||
"clustering" : [
|
||||
{ "name" : "sortedngrampairs", "fields" : [ "legalname" ], "params" : { "max" : 2, "ngramLen" : "3"} },
|
||||
{ "name" : "suffixprefix", "fields" : [ "legalname" ], "params" : { "max" : 1, "len" : "3" } },
|
||||
{ "name" : "urlclustering", "fields" : [ "websiteurl" ], "params" : { } }
|
||||
],
|
||||
"strictConditions" : [
|
||||
{ "name" : "exactMatch", "fields" : [ "gridid" ] }
|
||||
],
|
||||
"conditions" : [
|
||||
{ "name" : "exactMatch", "fields" : [ "country" ] },
|
||||
{ "name" : "DomainExactMatch", "fields" : [ "websiteurl" ] }
|
||||
],
|
||||
"model" : [
|
||||
{ "name" : "legalname", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value" },
|
||||
{ "name" : "country", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/country/classid" },
|
||||
{ "name" : "legalshortname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.1", "ignoreMissing" : "true", "path" : "organization/metadata/legalshortname/value" },
|
||||
{ "name" : "legalname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.9", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value", "params" : {"windowSize" : 4, "threshold" : 0.5} },
|
||||
{ "name" : "websiteurl", "algo" : "Null", "type" : "URL", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/websiteurl/value", "params" : { "host" : 0.5, "path" : 0.5 } },
|
||||
{ "name" : "gridid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {grid}]/value" }
|
||||
],
|
||||
"blacklists" : { }
|
||||
}
|
||||
}
|
160
pom.xml
160
pom.xml
|
@ -35,6 +35,8 @@
|
|||
<url>https://issue.openaire.research-infrastructures.eu/projects/openaire</url>
|
||||
</issueManagement>
|
||||
|
||||
|
||||
|
||||
<distributionManagement>
|
||||
<repository>
|
||||
<id>dnet45-releases</id>
|
||||
|
@ -70,6 +72,18 @@
|
|||
</snapshots>
|
||||
</repository>
|
||||
|
||||
<repository>
|
||||
<id>cloudera</id>
|
||||
<name>Cloudera Repository</name>
|
||||
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
|
||||
<releases>
|
||||
<enabled>true</enabled>
|
||||
</releases>
|
||||
<snapshots>
|
||||
<enabled>false</enabled>
|
||||
</snapshots>
|
||||
</repository>
|
||||
|
||||
</repositories>
|
||||
|
||||
<build>
|
||||
|
@ -77,22 +91,125 @@
|
|||
<outputDirectory>target/classes</outputDirectory>
|
||||
<finalName>${project.artifactId}-${project.version}</finalName>
|
||||
<testOutputDirectory>target/test-classes</testOutputDirectory>
|
||||
|
||||
|
||||
|
||||
|
||||
<!--*************************************************-->
|
||||
|
||||
<pluginManagement>
|
||||
<plugins>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.6.0</version>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
<encoding>${project.build.sourceEncoding}</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<version>3.0.2</version>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-source-plugin</artifactId>
|
||||
<version>3.0.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>attach-sources</id>
|
||||
<phase>verify</phase>
|
||||
<goals>
|
||||
<goal>jar-no-fork</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>2.19.1</version>
|
||||
<configuration>
|
||||
<redirectTestOutputToFile>true</redirectTestOutputToFile>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-javadoc-plugin</artifactId>
|
||||
<version>2.10.4</version>
|
||||
<configuration>
|
||||
<detectLinks>true</detectLinks>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-dependency-plugin</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-failsafe-plugin</artifactId>
|
||||
<version>2.13</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>integration-test</id>
|
||||
<goals>
|
||||
<goal>integration-test</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>verify</id>
|
||||
<goals>
|
||||
<goal>verify</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
|
||||
<plugins>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.6.0</version>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
<encoding>${project.build.sourceEncoding}</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-dependency-plugin</artifactId>
|
||||
<version>3.0.0</version>
|
||||
<artifactId>maven-release-plugin</artifactId>
|
||||
<version>2.5.3</version>
|
||||
</plugin>
|
||||
|
||||
</plugins>
|
||||
|
||||
<!--***********************************************************************-->
|
||||
|
||||
|
||||
|
||||
|
||||
<!--<plugins>-->
|
||||
<!--<plugin>-->
|
||||
<!--<groupId>org.apache.maven.plugins</groupId>-->
|
||||
<!--<artifactId>maven-compiler-plugin</artifactId>-->
|
||||
<!--<version>3.6.0</version>-->
|
||||
<!--<configuration>-->
|
||||
<!--<source>1.8</source>-->
|
||||
<!--<target>1.8</target>-->
|
||||
<!--<encoding>${project.build.sourceEncoding}</encoding>-->
|
||||
<!--</configuration>-->
|
||||
<!--</plugin>-->
|
||||
<!--<plugin>-->
|
||||
<!--<groupId>org.apache.maven.plugins</groupId>-->
|
||||
<!--<artifactId>maven-dependency-plugin</artifactId>-->
|
||||
<!--<version>3.0.0</version>-->
|
||||
<!--</plugin>-->
|
||||
<!--<plugin>-->
|
||||
<!--<groupId>org.apache.maven.plugins</groupId>-->
|
||||
<!--<artifactId>maven-failsafe-plugin</artifactId>-->
|
||||
|
@ -113,7 +230,7 @@
|
|||
<!--</executions>-->
|
||||
<!--</plugin>-->
|
||||
|
||||
</plugins>
|
||||
<!--</plugins>-->
|
||||
|
||||
</build>
|
||||
|
||||
|
@ -137,7 +254,7 @@
|
|||
<dependency>
|
||||
<groupId>eu.dnetlib</groupId>
|
||||
<artifactId>dnet-openaire-data-protos</artifactId>
|
||||
<version>3.9.4-CUSTOM</version>
|
||||
<version>3.9.3-proto250</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>eu.dnetlib</groupId>
|
||||
|
@ -148,8 +265,9 @@
|
|||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
<version>2.6.6</version>
|
||||
<version>${jackson.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-mapper-asl</artifactId>
|
||||
|
@ -196,16 +314,19 @@
|
|||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-graphx_2.11</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
|
@ -219,6 +340,12 @@
|
|||
<version>0.9.10</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.scala-lang</groupId>
|
||||
<artifactId>scala-library</artifactId>
|
||||
<version>${scala.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.oozie</groupId>
|
||||
<artifactId>oozie-client</artifactId>
|
||||
|
@ -227,6 +354,7 @@
|
|||
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
<properties>
|
||||
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
|
@ -236,6 +364,7 @@
|
|||
<google.guava.version>15.0</google.guava.version>
|
||||
|
||||
<spark.version>2.2.0</spark.version>
|
||||
<jackson.version>2.6.6</jackson.version>
|
||||
|
||||
<commons.lang.version>2.6</commons.lang.version>
|
||||
<commons.io.version>2.4</commons.io.version>
|
||||
|
@ -243,6 +372,7 @@
|
|||
<commons.logging.version>1.1.3</commons.logging.version>
|
||||
|
||||
<junit.version>4.9</junit.version>
|
||||
<scala.version>2.11.8</scala.version>
|
||||
|
||||
<maven.javadoc.failOnError>false</maven.javadoc.failOnError>
|
||||
</properties>
|
||||
|
|
Loading…
Reference in New Issue