forked from D-Net/dnet-hadoop
implementation of the integration test, addition of document blocks to group entities after clustering
This commit is contained in:
parent
f7a3bdf3f8
commit
e8db8f2abb
252
dependencies.txt
252
dependencies.txt
|
@ -1,252 +0,0 @@
|
||||||
[INFO] Scanning for projects...
|
|
||||||
[INFO] ------------------------------------------------------------------------
|
|
||||||
[INFO] Reactor Build Order:
|
|
||||||
[INFO]
|
|
||||||
[INFO] dnet-dedup [pom]
|
|
||||||
[INFO] dnet-pace-core [jar]
|
|
||||||
[INFO] dnet-dedup-test [jar]
|
|
||||||
[INFO]
|
|
||||||
[INFO] -----------------------< eu.dnetlib:dnet-dedup >------------------------
|
|
||||||
[INFO] Building dnet-dedup 3.0.3-SNAPSHOT [1/3]
|
|
||||||
[INFO] --------------------------------[ pom ]---------------------------------
|
|
||||||
[INFO]
|
|
||||||
[INFO] --- maven-dependency-plugin:3.0.0:tree (default-cli) @ dnet-dedup ---
|
|
||||||
[INFO] eu.dnetlib:dnet-dedup:pom:3.0.3-SNAPSHOT
|
|
||||||
[INFO]
|
|
||||||
[INFO] ---------------------< eu.dnetlib:dnet-pace-core >----------------------
|
|
||||||
[INFO] Building dnet-pace-core 3.0.3-SNAPSHOT [2/3]
|
|
||||||
[INFO] --------------------------------[ jar ]---------------------------------
|
|
||||||
[INFO]
|
|
||||||
[INFO] --- maven-dependency-plugin:3.0.0:tree (default-cli) @ dnet-pace-core ---
|
|
||||||
[INFO] eu.dnetlib:dnet-pace-core:jar:3.0.3-SNAPSHOT
|
|
||||||
[INFO] +- edu.cmu:secondstring:jar:1.0.0:compile
|
|
||||||
[INFO] +- com.google.guava:guava:jar:15.0:compile
|
|
||||||
[INFO] +- com.google.code.gson:gson:jar:2.2.2:compile
|
|
||||||
[INFO] +- commons-lang:commons-lang:jar:2.6:compile
|
|
||||||
[INFO] +- commons-io:commons-io:jar:2.4:compile
|
|
||||||
[INFO] +- commons-collections:commons-collections:jar:3.2.1:compile
|
|
||||||
[INFO] +- com.googlecode.protobuf-java-format:protobuf-java-format:jar:1.2:compile
|
|
||||||
[INFO] +- org.antlr:stringtemplate:jar:3.2:compile
|
|
||||||
[INFO] | \- org.antlr:antlr:jar:2.7.7:compile
|
|
||||||
[INFO] +- commons-logging:commons-logging:jar:1.1.3:compile
|
|
||||||
[INFO] +- junit:junit:jar:4.9:test
|
|
||||||
[INFO] | \- org.hamcrest:hamcrest-core:jar:1.1:test
|
|
||||||
[INFO] +- org.reflections:reflections:jar:0.9.10:compile
|
|
||||||
[INFO] | +- org.javassist:javassist:jar:3.19.0-GA:compile
|
|
||||||
[INFO] | \- com.google.code.findbugs:annotations:jar:2.0.1:compile
|
|
||||||
[INFO] +- com.fasterxml.jackson.core:jackson-databind:jar:2.6.6:compile
|
|
||||||
[INFO] | +- com.fasterxml.jackson.core:jackson-annotations:jar:2.6.0:compile
|
|
||||||
[INFO] | \- com.fasterxml.jackson.core:jackson-core:jar:2.6.6:compile
|
|
||||||
[INFO] +- org.codehaus.jackson:jackson-mapper-asl:jar:1.9.13:compile
|
|
||||||
[INFO] | \- org.codehaus.jackson:jackson-core-asl:jar:1.9.13:compile
|
|
||||||
[INFO] \- org.apache.commons:commons-math3:jar:3.6.1:compile
|
|
||||||
[INFO]
|
|
||||||
[INFO] ---------------------< eu.dnetlib:dnet-dedup-test >---------------------
|
|
||||||
[INFO] Building dnet-dedup-test 3.0.3-SNAPSHOT [3/3]
|
|
||||||
[INFO] --------------------------------[ jar ]---------------------------------
|
|
||||||
[INFO]
|
|
||||||
[INFO] --- maven-dependency-plugin:3.0.0:tree (default-cli) @ dnet-dedup-test ---
|
|
||||||
[INFO] eu.dnetlib:dnet-dedup-test:jar:3.0.3-SNAPSHOT
|
|
||||||
[INFO] +- eu.dnetlib:dnet-pace-core:jar:3.0.3-SNAPSHOT:compile
|
|
||||||
[INFO] | +- edu.cmu:secondstring:jar:1.0.0:compile
|
|
||||||
[INFO] | +- com.google.guava:guava:jar:15.0:compile
|
|
||||||
[INFO] | +- com.google.code.gson:gson:jar:2.2.2:compile
|
|
||||||
[INFO] | +- commons-lang:commons-lang:jar:2.6:compile
|
|
||||||
[INFO] | +- commons-io:commons-io:jar:2.4:compile
|
|
||||||
[INFO] | +- commons-collections:commons-collections:jar:3.2.1:compile
|
|
||||||
[INFO] | +- com.googlecode.protobuf-java-format:protobuf-java-format:jar:1.2:compile
|
|
||||||
[INFO] | +- org.antlr:stringtemplate:jar:3.2:compile
|
|
||||||
[INFO] | | \- org.antlr:antlr:jar:2.7.7:compile
|
|
||||||
[INFO] | +- commons-logging:commons-logging:jar:1.1.3:compile
|
|
||||||
[INFO] | +- org.reflections:reflections:jar:0.9.10:compile
|
|
||||||
[INFO] | | +- org.javassist:javassist:jar:3.19.0-GA:compile
|
|
||||||
[INFO] | | \- com.google.code.findbugs:annotations:jar:2.0.1:compile
|
|
||||||
[INFO] | +- com.fasterxml.jackson.core:jackson-databind:jar:2.6.6:compile
|
|
||||||
[INFO] | | +- com.fasterxml.jackson.core:jackson-annotations:jar:2.6.0:compile
|
|
||||||
[INFO] | | \- com.fasterxml.jackson.core:jackson-core:jar:2.6.6:compile
|
|
||||||
[INFO] | +- org.codehaus.jackson:jackson-mapper-asl:jar:1.9.13:compile
|
|
||||||
[INFO] | | \- org.codehaus.jackson:jackson-core-asl:jar:1.9.13:compile
|
|
||||||
[INFO] | \- org.apache.commons:commons-math3:jar:3.6.1:compile
|
|
||||||
[INFO] +- eu.dnetlib:dnet-openaire-data-protos:jar:3.9.3-proto250:compile
|
|
||||||
[INFO] | +- com.google.protobuf:protobuf-java:jar:2.5.0:compile
|
|
||||||
[INFO] | \- log4j:log4j:jar:1.2.17:compile (version selected from constraint [1.2.17,1.2.17])
|
|
||||||
[INFO] +- org.apache.spark:spark-core_2.11:jar:2.2.0:provided
|
|
||||||
[INFO] | +- org.apache.avro:avro:jar:1.7.7:provided
|
|
||||||
[INFO] | | +- com.thoughtworks.paranamer:paranamer:jar:2.3:provided
|
|
||||||
[INFO] | | \- org.apache.commons:commons-compress:jar:1.4.1:provided
|
|
||||||
[INFO] | | \- org.tukaani:xz:jar:1.0:provided
|
|
||||||
[INFO] | +- org.apache.avro:avro-mapred:jar:hadoop2:1.7.7:provided
|
|
||||||
[INFO] | | +- org.apache.avro:avro-ipc:jar:1.7.7:provided
|
|
||||||
[INFO] | | \- org.apache.avro:avro-ipc:jar:tests:1.7.7:provided
|
|
||||||
[INFO] | +- com.twitter:chill_2.11:jar:0.8.0:provided
|
|
||||||
[INFO] | | \- com.esotericsoftware:kryo-shaded:jar:3.0.3:provided
|
|
||||||
[INFO] | | +- com.esotericsoftware:minlog:jar:1.3.0:provided
|
|
||||||
[INFO] | | \- org.objenesis:objenesis:jar:2.1:provided
|
|
||||||
[INFO] | +- com.twitter:chill-java:jar:0.8.0:provided
|
|
||||||
[INFO] | +- org.apache.xbean:xbean-asm5-shaded:jar:4.4:provided
|
|
||||||
[INFO] | +- org.apache.hadoop:hadoop-client:jar:2.6.5:provided
|
|
||||||
[INFO] | | +- org.apache.hadoop:hadoop-common:jar:2.6.5:provided
|
|
||||||
[INFO] | | | +- commons-cli:commons-cli:jar:1.2:provided
|
|
||||||
[INFO] | | | +- xmlenc:xmlenc:jar:0.52:provided
|
|
||||||
[INFO] | | | +- commons-httpclient:commons-httpclient:jar:3.1:provided
|
|
||||||
[INFO] | | | +- commons-configuration:commons-configuration:jar:1.6:provided
|
|
||||||
[INFO] | | | | +- commons-digester:commons-digester:jar:1.8:provided
|
|
||||||
[INFO] | | | | | \- commons-beanutils:commons-beanutils:jar:1.7.0:provided
|
|
||||||
[INFO] | | | | \- commons-beanutils:commons-beanutils-core:jar:1.8.0:provided
|
|
||||||
[INFO] | | | +- org.apache.hadoop:hadoop-auth:jar:2.6.5:provided
|
|
||||||
[INFO] | | | | \- org.apache.directory.server:apacheds-kerberos-codec:jar:2.0.0-M15:provided
|
|
||||||
[INFO] | | | | +- org.apache.directory.server:apacheds-i18n:jar:2.0.0-M15:provided
|
|
||||||
[INFO] | | | | +- org.apache.directory.api:api-asn1-api:jar:1.0.0-M20:provided
|
|
||||||
[INFO] | | | | \- org.apache.directory.api:api-util:jar:1.0.0-M20:provided
|
|
||||||
[INFO] | | | +- org.apache.curator:curator-client:jar:2.6.0:provided
|
|
||||||
[INFO] | | | \- org.htrace:htrace-core:jar:3.0.4:provided
|
|
||||||
[INFO] | | +- org.apache.hadoop:hadoop-hdfs:jar:2.6.5:provided
|
|
||||||
[INFO] | | | +- org.mortbay.jetty:jetty-util:jar:6.1.26:provided
|
|
||||||
[INFO] | | | \- xerces:xercesImpl:jar:2.9.1:provided
|
|
||||||
[INFO] | | | \- xml-apis:xml-apis:jar:1.3.04:provided
|
|
||||||
[INFO] | | +- org.apache.hadoop:hadoop-mapreduce-client-app:jar:2.6.5:provided
|
|
||||||
[INFO] | | | +- org.apache.hadoop:hadoop-mapreduce-client-common:jar:2.6.5:provided
|
|
||||||
[INFO] | | | | +- org.apache.hadoop:hadoop-yarn-client:jar:2.6.5:provided
|
|
||||||
[INFO] | | | | \- org.apache.hadoop:hadoop-yarn-server-common:jar:2.6.5:provided
|
|
||||||
[INFO] | | | \- org.apache.hadoop:hadoop-mapreduce-client-shuffle:jar:2.6.5:provided
|
|
||||||
[INFO] | | +- org.apache.hadoop:hadoop-yarn-api:jar:2.6.5:provided
|
|
||||||
[INFO] | | +- org.apache.hadoop:hadoop-mapreduce-client-core:jar:2.6.5:provided
|
|
||||||
[INFO] | | | \- org.apache.hadoop:hadoop-yarn-common:jar:2.6.5:provided
|
|
||||||
[INFO] | | | +- javax.xml.bind:jaxb-api:jar:2.2.2:provided
|
|
||||||
[INFO] | | | | \- javax.xml.stream:stax-api:jar:1.0-2:provided
|
|
||||||
[INFO] | | | +- org.codehaus.jackson:jackson-jaxrs:jar:1.9.13:provided
|
|
||||||
[INFO] | | | \- org.codehaus.jackson:jackson-xc:jar:1.9.13:provided
|
|
||||||
[INFO] | | +- org.apache.hadoop:hadoop-mapreduce-client-jobclient:jar:2.6.5:provided
|
|
||||||
[INFO] | | \- org.apache.hadoop:hadoop-annotations:jar:2.6.5:provided
|
|
||||||
[INFO] | +- org.apache.spark:spark-launcher_2.11:jar:2.2.0:provided
|
|
||||||
[INFO] | +- org.apache.spark:spark-network-common_2.11:jar:2.2.0:provided
|
|
||||||
[INFO] | | \- org.fusesource.leveldbjni:leveldbjni-all:jar:1.8:provided
|
|
||||||
[INFO] | +- org.apache.spark:spark-network-shuffle_2.11:jar:2.2.0:provided
|
|
||||||
[INFO] | +- org.apache.spark:spark-unsafe_2.11:jar:2.2.0:provided
|
|
||||||
[INFO] | +- net.java.dev.jets3t:jets3t:jar:0.9.3:provided
|
|
||||||
[INFO] | | +- org.apache.httpcomponents:httpcore:jar:4.3.3:provided
|
|
||||||
[INFO] | | +- org.apache.httpcomponents:httpclient:jar:4.3.6:provided
|
|
||||||
[INFO] | | +- javax.activation:activation:jar:1.1.1:provided
|
|
||||||
[INFO] | | +- mx4j:mx4j:jar:3.0.2:provided
|
|
||||||
[INFO] | | +- javax.mail:mail:jar:1.4.7:provided
|
|
||||||
[INFO] | | +- org.bouncycastle:bcprov-jdk15on:jar:1.51:provided
|
|
||||||
[INFO] | | \- com.jamesmurty.utils:java-xmlbuilder:jar:1.0:provided
|
|
||||||
[INFO] | | \- net.iharder:base64:jar:2.3.8:provided
|
|
||||||
[INFO] | +- org.apache.curator:curator-recipes:jar:2.6.0:provided
|
|
||||||
[INFO] | | +- org.apache.curator:curator-framework:jar:2.6.0:provided
|
|
||||||
[INFO] | | \- org.apache.zookeeper:zookeeper:jar:3.4.6:provided
|
|
||||||
[INFO] | +- javax.servlet:javax.servlet-api:jar:3.1.0:provided
|
|
||||||
[INFO] | +- org.apache.commons:commons-lang3:jar:3.5:provided
|
|
||||||
[INFO] | +- com.google.code.findbugs:jsr305:jar:1.3.9:provided
|
|
||||||
[INFO] | +- org.slf4j:slf4j-api:jar:1.7.16:provided
|
|
||||||
[INFO] | +- org.slf4j:jul-to-slf4j:jar:1.7.16:provided
|
|
||||||
[INFO] | +- org.slf4j:jcl-over-slf4j:jar:1.7.16:provided
|
|
||||||
[INFO] | +- org.slf4j:slf4j-log4j12:jar:1.7.16:provided
|
|
||||||
[INFO] | +- com.ning:compress-lzf:jar:1.0.3:provided
|
|
||||||
[INFO] | +- org.xerial.snappy:snappy-java:jar:1.1.2.6:provided
|
|
||||||
[INFO] | +- net.jpountz.lz4:lz4:jar:1.3.0:provided
|
|
||||||
[INFO] | +- org.roaringbitmap:RoaringBitmap:jar:0.5.11:provided
|
|
||||||
[INFO] | +- commons-net:commons-net:jar:2.2:provided
|
|
||||||
[INFO] | +- org.scala-lang:scala-library:jar:2.11.8:provided
|
|
||||||
[INFO] | +- org.json4s:json4s-jackson_2.11:jar:3.2.11:provided
|
|
||||||
[INFO] | | \- org.json4s:json4s-core_2.11:jar:3.2.11:provided
|
|
||||||
[INFO] | | +- org.json4s:json4s-ast_2.11:jar:3.2.11:provided
|
|
||||||
[INFO] | | \- org.scala-lang:scalap:jar:2.11.0:provided
|
|
||||||
[INFO] | | \- org.scala-lang:scala-compiler:jar:2.11.0:provided
|
|
||||||
[INFO] | | +- org.scala-lang.modules:scala-xml_2.11:jar:1.0.1:provided
|
|
||||||
[INFO] | | \- org.scala-lang.modules:scala-parser-combinators_2.11:jar:1.0.1:provided
|
|
||||||
[INFO] | +- org.glassfish.jersey.core:jersey-client:jar:2.22.2:provided
|
|
||||||
[INFO] | | +- javax.ws.rs:javax.ws.rs-api:jar:2.0.1:provided
|
|
||||||
[INFO] | | +- org.glassfish.hk2:hk2-api:jar:2.4.0-b34:provided
|
|
||||||
[INFO] | | | +- org.glassfish.hk2:hk2-utils:jar:2.4.0-b34:provided
|
|
||||||
[INFO] | | | \- org.glassfish.hk2.external:aopalliance-repackaged:jar:2.4.0-b34:provided
|
|
||||||
[INFO] | | +- org.glassfish.hk2.external:javax.inject:jar:2.4.0-b34:provided
|
|
||||||
[INFO] | | \- org.glassfish.hk2:hk2-locator:jar:2.4.0-b34:provided
|
|
||||||
[INFO] | +- org.glassfish.jersey.core:jersey-common:jar:2.22.2:provided
|
|
||||||
[INFO] | | +- javax.annotation:javax.annotation-api:jar:1.2:provided
|
|
||||||
[INFO] | | +- org.glassfish.jersey.bundles.repackaged:jersey-guava:jar:2.22.2:provided
|
|
||||||
[INFO] | | \- org.glassfish.hk2:osgi-resource-locator:jar:1.0.1:provided
|
|
||||||
[INFO] | +- org.glassfish.jersey.core:jersey-server:jar:2.22.2:provided
|
|
||||||
[INFO] | | +- org.glassfish.jersey.media:jersey-media-jaxb:jar:2.22.2:provided
|
|
||||||
[INFO] | | \- javax.validation:validation-api:jar:1.1.0.Final:provided
|
|
||||||
[INFO] | +- org.glassfish.jersey.containers:jersey-container-servlet:jar:2.22.2:provided
|
|
||||||
[INFO] | +- org.glassfish.jersey.containers:jersey-container-servlet-core:jar:2.22.2:provided
|
|
||||||
[INFO] | +- io.netty:netty-all:jar:4.0.43.Final:provided
|
|
||||||
[INFO] | +- io.netty:netty:jar:3.9.9.Final:provided
|
|
||||||
[INFO] | +- com.clearspring.analytics:stream:jar:2.7.0:provided
|
|
||||||
[INFO] | +- io.dropwizard.metrics:metrics-core:jar:3.1.2:provided
|
|
||||||
[INFO] | +- io.dropwizard.metrics:metrics-jvm:jar:3.1.2:provided
|
|
||||||
[INFO] | +- io.dropwizard.metrics:metrics-json:jar:3.1.2:provided
|
|
||||||
[INFO] | +- io.dropwizard.metrics:metrics-graphite:jar:3.1.2:provided
|
|
||||||
[INFO] | +- com.fasterxml.jackson.module:jackson-module-scala_2.11:jar:2.6.5:provided
|
|
||||||
[INFO] | | +- org.scala-lang:scala-reflect:jar:2.11.7:provided
|
|
||||||
[INFO] | | \- com.fasterxml.jackson.module:jackson-module-paranamer:jar:2.6.5:provided
|
|
||||||
[INFO] | +- org.apache.ivy:ivy:jar:2.4.0:provided
|
|
||||||
[INFO] | +- oro:oro:jar:2.0.8:provided
|
|
||||||
[INFO] | +- net.razorvine:pyrolite:jar:4.13:provided
|
|
||||||
[INFO] | +- net.sf.py4j:py4j:jar:0.10.4:provided
|
|
||||||
[INFO] | +- org.apache.spark:spark-tags_2.11:jar:2.2.0:provided
|
|
||||||
[INFO] | +- org.apache.commons:commons-crypto:jar:1.0.0:provided
|
|
||||||
[INFO] | \- org.spark-project.spark:unused:jar:1.0.0:provided
|
|
||||||
[INFO] +- org.apache.spark:spark-graphx_2.11:jar:2.2.0:provided
|
|
||||||
[INFO] | +- org.apache.spark:spark-mllib-local_2.11:jar:2.2.0:provided
|
|
||||||
[INFO] | | \- org.scalanlp:breeze_2.11:jar:0.13.1:provided
|
|
||||||
[INFO] | | +- org.scalanlp:breeze-macros_2.11:jar:0.13.1:provided
|
|
||||||
[INFO] | | +- net.sf.opencsv:opencsv:jar:2.3:provided
|
|
||||||
[INFO] | | +- com.github.rwl:jtransforms:jar:2.4.0:provided
|
|
||||||
[INFO] | | +- org.spire-math:spire_2.11:jar:0.13.0:provided
|
|
||||||
[INFO] | | | +- org.spire-math:spire-macros_2.11:jar:0.13.0:provided
|
|
||||||
[INFO] | | | \- org.typelevel:machinist_2.11:jar:0.6.1:provided
|
|
||||||
[INFO] | | \- com.chuusai:shapeless_2.11:jar:2.3.2:provided
|
|
||||||
[INFO] | | \- org.typelevel:macro-compat_2.11:jar:1.1.1:provided
|
|
||||||
[INFO] | +- com.github.fommil.netlib:core:jar:1.1.2:provided
|
|
||||||
[INFO] | \- net.sourceforge.f2j:arpack_combined_all:jar:0.1:provided
|
|
||||||
[INFO] +- org.apache.spark:spark-sql_2.11:jar:2.2.0:provided
|
|
||||||
[INFO] | +- com.univocity:univocity-parsers:jar:2.2.1:provided
|
|
||||||
[INFO] | +- org.apache.spark:spark-sketch_2.11:jar:2.2.0:provided
|
|
||||||
[INFO] | +- org.apache.spark:spark-catalyst_2.11:jar:2.2.0:provided
|
|
||||||
[INFO] | | +- org.codehaus.janino:janino:jar:3.0.0:provided
|
|
||||||
[INFO] | | +- org.codehaus.janino:commons-compiler:jar:3.0.0:provided
|
|
||||||
[INFO] | | \- org.antlr:antlr4-runtime:jar:4.5.3:provided
|
|
||||||
[INFO] | +- org.apache.parquet:parquet-column:jar:1.8.2:provided
|
|
||||||
[INFO] | | +- org.apache.parquet:parquet-common:jar:1.8.2:provided
|
|
||||||
[INFO] | | \- org.apache.parquet:parquet-encoding:jar:1.8.2:provided
|
|
||||||
[INFO] | \- org.apache.parquet:parquet-hadoop:jar:1.8.2:provided
|
|
||||||
[INFO] | +- org.apache.parquet:parquet-format:jar:2.3.1:provided
|
|
||||||
[INFO] | \- org.apache.parquet:parquet-jackson:jar:1.8.2:provided
|
|
||||||
[INFO] +- eu.dnetlib:dnet-openaireplus-mapping-utils:jar:6.2.18:test
|
|
||||||
[INFO] | +- com.ximpleware:vtd-xml:jar:2.13.4:test (version selected from constraint [2.12,3.0.0))
|
|
||||||
[INFO] | +- commons-codec:commons-codec:jar:1.9:provided
|
|
||||||
[INFO] | +- dom4j:dom4j:jar:1.6.1:test (version selected from constraint [1.6.1,1.6.1])
|
|
||||||
[INFO] | +- net.sf.supercsv:super-csv:jar:2.4.0:test
|
|
||||||
[INFO] | +- eu.dnetlib:cnr-misc-utils:jar:1.0.6-SNAPSHOT:test (version selected from constraint [1.0.0,2.0.0))
|
|
||||||
[INFO] | | +- jaxen:jaxen:jar:1.1.6:test
|
|
||||||
[INFO] | | +- saxonica:saxon:jar:9.1.0.8:test
|
|
||||||
[INFO] | | +- saxonica:saxon-dom:jar:9.1.0.8:test
|
|
||||||
[INFO] | | +- jgrapht:jgrapht:jar:0.7.2:test
|
|
||||||
[INFO] | | +- net.sf.ehcache:ehcache:jar:2.8.0:test
|
|
||||||
[INFO] | | \- org.springframework:spring-test:jar:4.2.5.RELEASE:test (version selected from constraint [4.2.5.RELEASE,4.2.5.RELEASE])
|
|
||||||
[INFO] | | \- org.springframework:spring-core:jar:4.2.5.RELEASE:test
|
|
||||||
[INFO] | +- eu.dnetlib:dnet-hadoop-commons:jar:2.0.2-SNAPSHOT:test (version selected from constraint [2.0.0,3.0.0))
|
|
||||||
[INFO] | | +- org.apache.hadoop:hadoop-core:jar:2.0.0-mr1-cdh4.7.0:test
|
|
||||||
[INFO] | | | +- commons-el:commons-el:jar:1.0:test
|
|
||||||
[INFO] | | | \- hsqldb:hsqldb:jar:1.8.0.10:test
|
|
||||||
[INFO] | | \- org.springframework:spring-beans:jar:4.2.5.RELEASE:test (version selected from constraint [4.2.5.RELEASE,4.2.5.RELEASE])
|
|
||||||
[INFO] | \- eu.dnetlib:dnet-index-solr-common:jar:1.3.1:test (version selected from constraint [1.0.0,1.3.1])
|
|
||||||
[INFO] | \- org.apache.solr:solr-solrj:jar:4.9.0:test
|
|
||||||
[INFO] | +- org.apache.httpcomponents:httpmime:jar:4.3.1:test
|
|
||||||
[INFO] | \- org.noggit:noggit:jar:0.5:test
|
|
||||||
[INFO] \- junit:junit:jar:4.9:test
|
|
||||||
[INFO] \- org.hamcrest:hamcrest-core:jar:1.1:test
|
|
||||||
[INFO] ------------------------------------------------------------------------
|
|
||||||
[INFO] Reactor Summary:
|
|
||||||
[INFO]
|
|
||||||
[INFO] dnet-dedup 3.0.3-SNAPSHOT .......................... SUCCESS [ 1.152 s]
|
|
||||||
[INFO] dnet-pace-core ..................................... SUCCESS [ 0.117 s]
|
|
||||||
[INFO] dnet-dedup-test 3.0.3-SNAPSHOT ..................... SUCCESS [ 1.407 s]
|
|
||||||
[INFO] ------------------------------------------------------------------------
|
|
||||||
[INFO] BUILD SUCCESS
|
|
||||||
[INFO] ------------------------------------------------------------------------
|
|
||||||
[INFO] Total time: 3.216 s
|
|
||||||
[INFO] Finished at: 2019-03-29T15:02:42+01:00
|
|
||||||
[INFO] ------------------------------------------------------------------------
|
|
|
@ -22,6 +22,7 @@ public class UrlClustering extends AbstractPaceFunctions implements ClusteringFu
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Collection<String> apply(List<Field> fields) {
|
public Collection<String> apply(List<Field> fields) {
|
||||||
|
try {
|
||||||
return fields.stream()
|
return fields.stream()
|
||||||
.filter(f -> !f.isEmpty())
|
.filter(f -> !f.isEmpty())
|
||||||
.map(Field::stringValue)
|
.map(Field::stringValue)
|
||||||
|
@ -29,13 +30,17 @@ public class UrlClustering extends AbstractPaceFunctions implements ClusteringFu
|
||||||
.map(URL::getHost)
|
.map(URL::getHost)
|
||||||
.collect(Collectors.toCollection(HashSet::new));
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
}
|
}
|
||||||
|
catch (IllegalStateException e){
|
||||||
|
return new HashSet<>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, Integer> getParams() {
|
public Map<String, Integer> getParams() {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private URL asUrl(final String value) {
|
private URL asUrl(String value) {
|
||||||
try {
|
try {
|
||||||
return new URL(value);
|
return new URL(value);
|
||||||
} catch (MalformedURLException e) {
|
} catch (MalformedURLException e) {
|
||||||
|
@ -44,4 +49,5 @@ public class UrlClustering extends AbstractPaceFunctions implements ClusteringFu
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,7 +6,6 @@ import eu.dnetlib.pace.condition.ConditionAlgo;
|
||||||
import eu.dnetlib.pace.model.ClusteringDef;
|
import eu.dnetlib.pace.model.ClusteringDef;
|
||||||
import eu.dnetlib.pace.model.CondDef;
|
import eu.dnetlib.pace.model.CondDef;
|
||||||
import eu.dnetlib.pace.model.FieldDef;
|
import eu.dnetlib.pace.model.FieldDef;
|
||||||
import eu.dnetlib.pace.model.TreeNodeDef;
|
|
||||||
import eu.dnetlib.pace.util.PaceResolver;
|
import eu.dnetlib.pace.util.PaceResolver;
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
import org.codehaus.jackson.annotate.JsonIgnore;
|
import org.codehaus.jackson.annotate.JsonIgnore;
|
||||||
|
@ -24,11 +23,9 @@ public class PaceConfig implements Serializable {
|
||||||
private List<ClusteringDef> clustering;
|
private List<ClusteringDef> clustering;
|
||||||
private Map<String, List<String>> blacklists;
|
private Map<String, List<String>> blacklists;
|
||||||
|
|
||||||
private Map<String, TreeNodeDef> decisionTree;
|
|
||||||
|
|
||||||
private Map<String, FieldDef> modelMap;
|
private Map<String, FieldDef> modelMap;
|
||||||
|
|
||||||
public static PaceResolver paceResolver;
|
// public PaceResolver paceResolver;
|
||||||
|
|
||||||
public PaceConfig() {}
|
public PaceConfig() {}
|
||||||
|
|
||||||
|
@ -38,7 +35,7 @@ public class PaceConfig implements Serializable {
|
||||||
modelMap.put(fd.getName(), fd);
|
modelMap.put(fd.getName(), fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
paceResolver = new PaceResolver();
|
// paceResolver = new PaceResolver();
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<FieldDef> getModel() {
|
public List<FieldDef> getModel() {
|
||||||
|
@ -61,14 +58,6 @@ public class PaceConfig implements Serializable {
|
||||||
return conditions;
|
return conditions;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, TreeNodeDef> getDecisionTree() {
|
|
||||||
return decisionTree;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDecisionTree(Map<String, TreeNodeDef> decisionTree) {
|
|
||||||
this.decisionTree = decisionTree;
|
|
||||||
}
|
|
||||||
|
|
||||||
@JsonIgnore
|
@JsonIgnore
|
||||||
public List<ConditionAlgo> getConditionAlgos() {
|
public List<ConditionAlgo> getConditionAlgos() {
|
||||||
return asConditionAlgos(getConditions());
|
return asConditionAlgos(getConditions());
|
||||||
|
|
|
@ -32,6 +32,9 @@ public class LevensteinTitleIgnoreVersion extends SecondStringDistanceAlgo {
|
||||||
ca = ca.replaceAll("\\d", "").replaceAll(getRomans(ca), "").trim();
|
ca = ca.replaceAll("\\d", "").replaceAll(getRomans(ca), "").trim();
|
||||||
cb = cb.replaceAll("\\d", "").replaceAll(getRomans(cb), "").trim();
|
cb = cb.replaceAll("\\d", "").replaceAll(getRomans(cb), "").trim();
|
||||||
|
|
||||||
|
ca = filterAllStopWords(ca);
|
||||||
|
cb = filterAllStopWords(cb);
|
||||||
|
|
||||||
final String cca = finalCleanup(ca);
|
final String cca = finalCleanup(ca);
|
||||||
final String ccb = finalCleanup(cb);
|
final String ccb = finalCleanup(cb);
|
||||||
|
|
||||||
|
|
|
@ -2,12 +2,17 @@ package eu.dnetlib.pace.model;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import eu.dnetlib.pace.clustering.*;
|
import eu.dnetlib.pace.clustering.*;
|
||||||
import eu.dnetlib.pace.config.PaceConfig;
|
import eu.dnetlib.pace.config.PaceConfig;
|
||||||
import eu.dnetlib.pace.util.PaceException;
|
import eu.dnetlib.pace.util.PaceException;
|
||||||
|
import eu.dnetlib.pace.util.PaceResolver;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.codehaus.jackson.map.ObjectMapper;
|
import org.codehaus.jackson.map.ObjectMapper;
|
||||||
|
|
||||||
public class ClusteringDef implements Serializable {
|
public class ClusteringDef implements Serializable {
|
||||||
|
@ -18,6 +23,8 @@ public class ClusteringDef implements Serializable {
|
||||||
|
|
||||||
private Map<String, Integer> params;
|
private Map<String, Integer> params;
|
||||||
|
|
||||||
|
PaceResolver paceResolver = new PaceResolver();
|
||||||
|
|
||||||
public ClusteringDef() {}
|
public ClusteringDef() {}
|
||||||
|
|
||||||
public String getName() {
|
public String getName() {
|
||||||
|
@ -30,7 +37,7 @@ public class ClusteringDef implements Serializable {
|
||||||
|
|
||||||
public ClusteringFunction clusteringFunction() {
|
public ClusteringFunction clusteringFunction() {
|
||||||
try {
|
try {
|
||||||
return PaceConfig.paceResolver.getClusteringFunction(getName(), params);
|
return paceResolver.getClusteringFunction(getName(), params);
|
||||||
} catch (PaceException e) {
|
} catch (PaceException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
return null;
|
return null;
|
||||||
|
|
|
@ -7,6 +7,7 @@ import java.util.List;
|
||||||
import eu.dnetlib.pace.condition.*;
|
import eu.dnetlib.pace.condition.*;
|
||||||
import eu.dnetlib.pace.config.PaceConfig;
|
import eu.dnetlib.pace.config.PaceConfig;
|
||||||
import eu.dnetlib.pace.util.PaceException;
|
import eu.dnetlib.pace.util.PaceException;
|
||||||
|
import eu.dnetlib.pace.util.PaceResolver;
|
||||||
import org.codehaus.jackson.map.ObjectMapper;
|
import org.codehaus.jackson.map.ObjectMapper;
|
||||||
|
|
||||||
public class CondDef implements Serializable {
|
public class CondDef implements Serializable {
|
||||||
|
@ -15,10 +16,12 @@ public class CondDef implements Serializable {
|
||||||
|
|
||||||
private List<String> fields;
|
private List<String> fields;
|
||||||
|
|
||||||
|
PaceResolver paceResolver = new PaceResolver();
|
||||||
|
|
||||||
public CondDef() {}
|
public CondDef() {}
|
||||||
|
|
||||||
public ConditionAlgo conditionAlgo(final List<FieldDef> fields){
|
public ConditionAlgo conditionAlgo(final List<FieldDef> fields){
|
||||||
return PaceConfig.paceResolver.getConditionAlgo(getName(), fields);
|
return paceResolver.getConditionAlgo(getName(), fields);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getName() {
|
public String getName() {
|
||||||
|
|
|
@ -6,6 +6,7 @@ import com.google.gson.Gson;
|
||||||
import eu.dnetlib.pace.config.PaceConfig;
|
import eu.dnetlib.pace.config.PaceConfig;
|
||||||
import eu.dnetlib.pace.config.Type;
|
import eu.dnetlib.pace.config.Type;
|
||||||
import eu.dnetlib.pace.distance.DistanceAlgo;
|
import eu.dnetlib.pace.distance.DistanceAlgo;
|
||||||
|
import eu.dnetlib.pace.util.PaceResolver;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -33,6 +34,8 @@ public class FieldDef implements Serializable {
|
||||||
|
|
||||||
private double weight;
|
private double weight;
|
||||||
|
|
||||||
|
PaceResolver paceResolver = new PaceResolver();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets maximum size for the repeatable fields in the model. -1 for unbounded size.
|
* Sets maximum size for the repeatable fields in the model. -1 for unbounded size.
|
||||||
*/
|
*/
|
||||||
|
@ -85,7 +88,7 @@ public class FieldDef implements Serializable {
|
||||||
params.put("length", getLength());
|
params.put("length", getLength());
|
||||||
*/
|
*/
|
||||||
params.put("weight", getWeight());
|
params.put("weight", getWeight());
|
||||||
return PaceConfig.paceResolver.getDistanceAlgo(getAlgo(), params);
|
return paceResolver.getDistanceAlgo(getAlgo(), params);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isIgnoreMissing() {
|
public boolean isIgnoreMissing() {
|
||||||
|
|
|
@ -1,145 +0,0 @@
|
||||||
package eu.dnetlib.pace.model;
|
|
||||||
|
|
||||||
import eu.dnetlib.pace.config.PaceConfig;
|
|
||||||
import eu.dnetlib.pace.tree.Comparator;
|
|
||||||
import eu.dnetlib.pace.tree.support.AggType;
|
|
||||||
import eu.dnetlib.pace.util.PaceException;
|
|
||||||
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
|
|
||||||
import org.codehaus.jackson.map.ObjectMapper;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class TreeNodeDef implements Serializable {
|
|
||||||
|
|
||||||
private List<FieldConf> fields; //list of fields involved in the tree node (contains comparators to be used and field on which apply the comparator)
|
|
||||||
private AggType aggregation; //how to aggregate similarity measures for every field
|
|
||||||
|
|
||||||
private double threshold; //threshold on the similarity measure
|
|
||||||
|
|
||||||
private String positive; //specifies the next node in case of positive result: similarity>=th
|
|
||||||
private String negative; //specifies the next node in case of negative result: similarity<th
|
|
||||||
private String undefined; //specifies the next node in case of undefined result: similarity=-1
|
|
||||||
|
|
||||||
boolean ignoreMissing = true; //specifies what to do in case of missing field
|
|
||||||
|
|
||||||
public TreeNodeDef() {
|
|
||||||
}
|
|
||||||
|
|
||||||
//compute the similarity measure between two documents
|
|
||||||
public double evaluate(MapDocument doc1, MapDocument doc2) {
|
|
||||||
|
|
||||||
DescriptiveStatistics stats = new DescriptiveStatistics();
|
|
||||||
|
|
||||||
for (FieldConf fieldConf : fields) {
|
|
||||||
|
|
||||||
double weight = fieldConf.getWeight();
|
|
||||||
|
|
||||||
double similarity = comparator(fieldConf).compare(doc1.getFieldMap().get(fieldConf.getField()), doc2.getFieldMap().get(fieldConf.getField()));
|
|
||||||
|
|
||||||
//if similarity is -1 means that a comparator gave undefined, do not add result to the stats
|
|
||||||
if (similarity != -1) {
|
|
||||||
stats.addValue(weight * similarity);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (!ignoreMissing) //if the missing value has not to be ignored, return -1
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (aggregation){
|
|
||||||
|
|
||||||
case AVG:
|
|
||||||
return stats.getMean();
|
|
||||||
case SUM:
|
|
||||||
return stats.getSum();
|
|
||||||
case MAX:
|
|
||||||
return stats.getMax();
|
|
||||||
case MIN:
|
|
||||||
return stats.getMin();
|
|
||||||
default:
|
|
||||||
return 0.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private Comparator comparator(final FieldConf field){
|
|
||||||
|
|
||||||
return PaceConfig.paceResolver.getComparator(field.getComparator(), field.getParams());
|
|
||||||
}
|
|
||||||
|
|
||||||
public TreeNodeDef(List<FieldConf> fields, double threshold, AggType aggregation, String positive, String negative, String undefined) {
|
|
||||||
this.fields = fields;
|
|
||||||
this.threshold = threshold;
|
|
||||||
this.aggregation = aggregation;
|
|
||||||
this.positive = positive;
|
|
||||||
this.negative = negative;
|
|
||||||
this.undefined = undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isIgnoreMissing() {
|
|
||||||
return ignoreMissing;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setIgnoreMissing(boolean ignoreMissing) {
|
|
||||||
this.ignoreMissing = ignoreMissing;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<FieldConf> getFields() {
|
|
||||||
return fields;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setFields(List<FieldConf> fields) {
|
|
||||||
this.fields = fields;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double getThreshold() {
|
|
||||||
return threshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setThreshold(double threshold) {
|
|
||||||
this.threshold = threshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
public AggType getAggregation() {
|
|
||||||
return aggregation;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setAggregation(AggType aggregation) {
|
|
||||||
this.aggregation = aggregation;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getPositive() {
|
|
||||||
return positive;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setPositive(String positive) {
|
|
||||||
this.positive = positive;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getNegative() {
|
|
||||||
return negative;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setNegative(String negative) {
|
|
||||||
this.negative = negative;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getUndefined() {
|
|
||||||
return undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setUndefined(String undefined) {
|
|
||||||
this.undefined = undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
try {
|
|
||||||
return new ObjectMapper().writeValueAsString(this);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new PaceException("Impossible to convert to JSON: ", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,33 +0,0 @@
|
||||||
package eu.dnetlib.pace.tree;
|
|
||||||
|
|
||||||
import eu.dnetlib.pace.model.Field;
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
abstract class AbstractComparator implements Comparator {
|
|
||||||
|
|
||||||
Map<String, Number> params;
|
|
||||||
|
|
||||||
public AbstractComparator(Map<String, Number> params){
|
|
||||||
this.params = params;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public double compare(Field a, Field b) {
|
|
||||||
return 0.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static double stringSimilarity(String s1, String s2) {
|
|
||||||
String longer = s1, shorter = s2;
|
|
||||||
if (s1.length() < s2.length()) { // longer should always have greater length
|
|
||||||
longer = s2; shorter = s1;
|
|
||||||
}
|
|
||||||
int longerLength = longer.length();
|
|
||||||
if (longerLength == 0) //if strings have 0 length return 0 (no similarity)
|
|
||||||
return 0.0;
|
|
||||||
|
|
||||||
return (longerLength - StringUtils.getLevenshteinDistance(longer, shorter)) / (double) longerLength;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,42 +0,0 @@
|
||||||
package eu.dnetlib.pace.tree;
|
|
||||||
|
|
||||||
import eu.dnetlib.pace.model.Field;
|
|
||||||
import eu.dnetlib.pace.model.FieldList;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
@ComparatorClass("coauthorsMatch")
|
|
||||||
public class CoauthorsMatch extends AbstractComparator {
|
|
||||||
|
|
||||||
public CoauthorsMatch(Map<String, Number> params) {
|
|
||||||
super(params);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public double compare(Field a, Field b) {
|
|
||||||
|
|
||||||
final List<String> c1 = ((FieldList) a).stringList();
|
|
||||||
final List<String> c2 = ((FieldList) b).stringList();
|
|
||||||
|
|
||||||
int size1 = c1.size();
|
|
||||||
int size2 = c2.size();
|
|
||||||
|
|
||||||
//few coauthors or too many coauthors
|
|
||||||
if (size1 < params.getOrDefault("minCoauthors", 5).intValue() || size2 < params.getOrDefault("minCoauthors", 5).intValue() || (size1+size2 > params.getOrDefault("maxCoauthors", 200).intValue()))
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
int coauthorship = 0;
|
|
||||||
for (String ca1: c1){
|
|
||||||
|
|
||||||
for (String ca2: c2){
|
|
||||||
|
|
||||||
if (stringSimilarity(ca1.replaceAll("\\.","").replaceAll(" ",""), ca2.replaceAll("\\.","").replaceAll(" ",""))>= params.getOrDefault("simTh", 0.7).doubleValue())
|
|
||||||
coauthorship++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return coauthorship;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,10 +0,0 @@
|
||||||
package eu.dnetlib.pace.tree;
|
|
||||||
|
|
||||||
import eu.dnetlib.pace.model.Field;
|
|
||||||
|
|
||||||
public interface Comparator {
|
|
||||||
|
|
||||||
//compare two fields and returns: the distace measure, -1 if undefined
|
|
||||||
public double compare(Field a, Field b);
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,14 +0,0 @@
|
||||||
package eu.dnetlib.pace.tree;
|
|
||||||
|
|
||||||
import java.lang.annotation.ElementType;
|
|
||||||
import java.lang.annotation.Retention;
|
|
||||||
import java.lang.annotation.RetentionPolicy;
|
|
||||||
import java.lang.annotation.Target;
|
|
||||||
|
|
||||||
@Retention(RetentionPolicy.RUNTIME)
|
|
||||||
@Target(ElementType.TYPE)
|
|
||||||
public @interface ComparatorClass {
|
|
||||||
|
|
||||||
public String value();
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,25 +0,0 @@
|
||||||
package eu.dnetlib.pace.tree;
|
|
||||||
|
|
||||||
import eu.dnetlib.pace.model.Field;
|
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
@ComparatorClass("exactMatch")
|
|
||||||
public class ExactMatch extends AbstractComparator {
|
|
||||||
|
|
||||||
public ExactMatch(Map<String, Number> params) {
|
|
||||||
super(params);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public double compare(Field a, Field b) {
|
|
||||||
|
|
||||||
if (a.stringValue().isEmpty() || b.stringValue().isEmpty())
|
|
||||||
return -1;
|
|
||||||
else if (a.stringValue().equals(b.stringValue()))
|
|
||||||
return 1;
|
|
||||||
else
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,31 +0,0 @@
|
||||||
package eu.dnetlib.pace.tree;
|
|
||||||
|
|
||||||
import eu.dnetlib.pace.model.Field;
|
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
@ComparatorClass("similar")
|
|
||||||
public class SimilarMatch extends AbstractComparator {
|
|
||||||
|
|
||||||
public SimilarMatch(Map<String, Number> params) {
|
|
||||||
super(params);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public double compare(Field a, Field b) {
|
|
||||||
|
|
||||||
if (a.stringValue().isEmpty() || b.stringValue().isEmpty())
|
|
||||||
return -1; //undefined if one name is missing
|
|
||||||
|
|
||||||
//take only the first name
|
|
||||||
String firstname1 = a.stringValue().split(" ")[0];
|
|
||||||
String firstname2 = b.stringValue().split(" ")[0];
|
|
||||||
|
|
||||||
if (firstname1.toLowerCase().trim().replaceAll("\\.","").replaceAll("\\s","").length()<=2 || firstname2.toLowerCase().replaceAll("\\.", "").replaceAll("\\s","").length()<=2) //too short names (considered similar)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
return stringSimilarity(firstname1,firstname2);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,36 +0,0 @@
|
||||||
package eu.dnetlib.pace.tree;
|
|
||||||
|
|
||||||
import eu.dnetlib.pace.model.Field;
|
|
||||||
import eu.dnetlib.pace.model.FieldListImpl;
|
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
@ComparatorClass("topicsMatch")
|
|
||||||
public class TopicsMatch extends AbstractComparator {
|
|
||||||
|
|
||||||
public TopicsMatch(Map<String, Number> params) {
|
|
||||||
super(params);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public double compare(Field a, Field b) {
|
|
||||||
|
|
||||||
double[] t1 = ((FieldListImpl) a).doubleArray();
|
|
||||||
double[] t2 = ((FieldListImpl) b).doubleArray();
|
|
||||||
|
|
||||||
if (t1 == null || t2 == null)
|
|
||||||
return -1; //0 similarity if no topics in one of the authors or in both
|
|
||||||
|
|
||||||
double area = 0.0;
|
|
||||||
|
|
||||||
double min_value[] = new double[t1.length];
|
|
||||||
for(int i=0; i<t1.length; i++){
|
|
||||||
|
|
||||||
min_value[i] = (t1[i]<t2[i])?t1[i]:t2[i];
|
|
||||||
area += min_value[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
return area;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,22 +0,0 @@
|
||||||
package eu.dnetlib.pace.tree;
|
|
||||||
|
|
||||||
import eu.dnetlib.pace.model.Field;
|
|
||||||
import eu.dnetlib.pace.model.FieldList;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
@ComparatorClass("undefined")
|
|
||||||
public class UndefinedNode implements Comparator {
|
|
||||||
|
|
||||||
Map<String, Number> params;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public double compare(Field a, Field b) {
|
|
||||||
|
|
||||||
final List<String> sa = ((FieldList) a).stringList();
|
|
||||||
final List<String> sb = ((FieldList) b).stringList();
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,21 +0,0 @@
|
||||||
package eu.dnetlib.pace.tree.support;
|
|
||||||
|
|
||||||
import eu.dnetlib.pace.util.PaceException;
|
|
||||||
|
|
||||||
public enum AggType {
|
|
||||||
|
|
||||||
AVG,
|
|
||||||
SUM,
|
|
||||||
MAX,
|
|
||||||
MIN;
|
|
||||||
|
|
||||||
public static AggType getEnum(String value) {
|
|
||||||
|
|
||||||
try {
|
|
||||||
return AggType.valueOf(value);
|
|
||||||
}
|
|
||||||
catch (IllegalArgumentException e) {
|
|
||||||
throw new PaceException("Undefined aggregation type", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,20 +0,0 @@
|
||||||
package eu.dnetlib.pace.tree.support;
|
|
||||||
|
|
||||||
public enum MatchType {
|
|
||||||
|
|
||||||
ORCID_MATCH,
|
|
||||||
COAUTHORS_MATCH,
|
|
||||||
TOPICS_MATCH,
|
|
||||||
NO_MATCH,
|
|
||||||
UNDEFINED;
|
|
||||||
|
|
||||||
public static MatchType getEnum(String value) {
|
|
||||||
|
|
||||||
try {
|
|
||||||
return MatchType.valueOf(value);
|
|
||||||
}
|
|
||||||
catch (IllegalArgumentException e) {
|
|
||||||
return MatchType.UNDEFINED;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -7,8 +7,6 @@ import eu.dnetlib.pace.condition.ConditionClass;
|
||||||
import eu.dnetlib.pace.distance.DistanceAlgo;
|
import eu.dnetlib.pace.distance.DistanceAlgo;
|
||||||
import eu.dnetlib.pace.distance.DistanceClass;
|
import eu.dnetlib.pace.distance.DistanceClass;
|
||||||
import eu.dnetlib.pace.model.FieldDef;
|
import eu.dnetlib.pace.model.FieldDef;
|
||||||
import eu.dnetlib.pace.tree.Comparator;
|
|
||||||
import eu.dnetlib.pace.tree.ComparatorClass;
|
|
||||||
import org.reflections.Reflections;
|
import org.reflections.Reflections;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
@ -22,7 +20,6 @@ public class PaceResolver implements Serializable {
|
||||||
private final Map<String, Class<ClusteringFunction>> clusteringFunctions;
|
private final Map<String, Class<ClusteringFunction>> clusteringFunctions;
|
||||||
private final Map<String, Class<ConditionAlgo>> conditionAlgos;
|
private final Map<String, Class<ConditionAlgo>> conditionAlgos;
|
||||||
private final Map<String, Class<DistanceAlgo>> distanceAlgos;
|
private final Map<String, Class<DistanceAlgo>> distanceAlgos;
|
||||||
private final Map<String, Class<Comparator>> comparators;
|
|
||||||
|
|
||||||
public PaceResolver() {
|
public PaceResolver() {
|
||||||
|
|
||||||
|
@ -37,10 +34,6 @@ public class PaceResolver implements Serializable {
|
||||||
this.distanceAlgos = new Reflections("eu.dnetlib").getTypesAnnotatedWith(DistanceClass.class).stream()
|
this.distanceAlgos = new Reflections("eu.dnetlib").getTypesAnnotatedWith(DistanceClass.class).stream()
|
||||||
.filter(DistanceAlgo.class::isAssignableFrom)
|
.filter(DistanceAlgo.class::isAssignableFrom)
|
||||||
.collect(Collectors.toMap(cl -> cl.getAnnotation(DistanceClass.class).value(), cl -> (Class<DistanceAlgo>)cl));
|
.collect(Collectors.toMap(cl -> cl.getAnnotation(DistanceClass.class).value(), cl -> (Class<DistanceAlgo>)cl));
|
||||||
|
|
||||||
this.comparators = new Reflections("eu.dnetlib").getTypesAnnotatedWith(ComparatorClass.class).stream()
|
|
||||||
.filter(Comparator.class::isAssignableFrom)
|
|
||||||
.collect(Collectors.toMap(cl -> cl.getAnnotation(ComparatorClass.class).value(), cl -> (Class<Comparator>) cl));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public ClusteringFunction getClusteringFunction(String name, Map<String, Integer> params) throws PaceException {
|
public ClusteringFunction getClusteringFunction(String name, Map<String, Integer> params) throws PaceException {
|
||||||
|
@ -67,12 +60,4 @@ public class PaceResolver implements Serializable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparator getComparator(String name, Map<String, Number> params) throws PaceException {
|
|
||||||
try {
|
|
||||||
return comparators.get(name).getDeclaredConstructor(Map.class).newInstance(params);
|
|
||||||
} catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException | NullPointerException e) {
|
|
||||||
throw new PaceException(name + " not found ", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue