Merge branch 'master' of https://github.com/dnet-team/dnet-dedup
This commit is contained in:
commit
e6944249ca
252
dependencies.txt
252
dependencies.txt
|
@ -1,252 +0,0 @@
|
|||
[INFO] Scanning for projects...
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
[INFO] Reactor Build Order:
|
||||
[INFO]
|
||||
[INFO] dnet-dedup [pom]
|
||||
[INFO] dnet-pace-core [jar]
|
||||
[INFO] dnet-dedup-test [jar]
|
||||
[INFO]
|
||||
[INFO] -----------------------< eu.dnetlib:dnet-dedup >------------------------
|
||||
[INFO] Building dnet-dedup 3.0.3-SNAPSHOT [1/3]
|
||||
[INFO] --------------------------------[ pom ]---------------------------------
|
||||
[INFO]
|
||||
[INFO] --- maven-dependency-plugin:3.0.0:tree (default-cli) @ dnet-dedup ---
|
||||
[INFO] eu.dnetlib:dnet-dedup:pom:3.0.3-SNAPSHOT
|
||||
[INFO]
|
||||
[INFO] ---------------------< eu.dnetlib:dnet-pace-core >----------------------
|
||||
[INFO] Building dnet-pace-core 3.0.3-SNAPSHOT [2/3]
|
||||
[INFO] --------------------------------[ jar ]---------------------------------
|
||||
[INFO]
|
||||
[INFO] --- maven-dependency-plugin:3.0.0:tree (default-cli) @ dnet-pace-core ---
|
||||
[INFO] eu.dnetlib:dnet-pace-core:jar:3.0.3-SNAPSHOT
|
||||
[INFO] +- edu.cmu:secondstring:jar:1.0.0:compile
|
||||
[INFO] +- com.google.guava:guava:jar:15.0:compile
|
||||
[INFO] +- com.google.code.gson:gson:jar:2.2.2:compile
|
||||
[INFO] +- commons-lang:commons-lang:jar:2.6:compile
|
||||
[INFO] +- commons-io:commons-io:jar:2.4:compile
|
||||
[INFO] +- commons-collections:commons-collections:jar:3.2.1:compile
|
||||
[INFO] +- com.googlecode.protobuf-java-format:protobuf-java-format:jar:1.2:compile
|
||||
[INFO] +- org.antlr:stringtemplate:jar:3.2:compile
|
||||
[INFO] | \- org.antlr:antlr:jar:2.7.7:compile
|
||||
[INFO] +- commons-logging:commons-logging:jar:1.1.3:compile
|
||||
[INFO] +- junit:junit:jar:4.9:test
|
||||
[INFO] | \- org.hamcrest:hamcrest-core:jar:1.1:test
|
||||
[INFO] +- org.reflections:reflections:jar:0.9.10:compile
|
||||
[INFO] | +- org.javassist:javassist:jar:3.19.0-GA:compile
|
||||
[INFO] | \- com.google.code.findbugs:annotations:jar:2.0.1:compile
|
||||
[INFO] +- com.fasterxml.jackson.core:jackson-databind:jar:2.6.6:compile
|
||||
[INFO] | +- com.fasterxml.jackson.core:jackson-annotations:jar:2.6.0:compile
|
||||
[INFO] | \- com.fasterxml.jackson.core:jackson-core:jar:2.6.6:compile
|
||||
[INFO] +- org.codehaus.jackson:jackson-mapper-asl:jar:1.9.13:compile
|
||||
[INFO] | \- org.codehaus.jackson:jackson-core-asl:jar:1.9.13:compile
|
||||
[INFO] \- org.apache.commons:commons-math3:jar:3.6.1:compile
|
||||
[INFO]
|
||||
[INFO] ---------------------< eu.dnetlib:dnet-dedup-test >---------------------
|
||||
[INFO] Building dnet-dedup-test 3.0.3-SNAPSHOT [3/3]
|
||||
[INFO] --------------------------------[ jar ]---------------------------------
|
||||
[INFO]
|
||||
[INFO] --- maven-dependency-plugin:3.0.0:tree (default-cli) @ dnet-dedup-test ---
|
||||
[INFO] eu.dnetlib:dnet-dedup-test:jar:3.0.3-SNAPSHOT
|
||||
[INFO] +- eu.dnetlib:dnet-pace-core:jar:3.0.3-SNAPSHOT:compile
|
||||
[INFO] | +- edu.cmu:secondstring:jar:1.0.0:compile
|
||||
[INFO] | +- com.google.guava:guava:jar:15.0:compile
|
||||
[INFO] | +- com.google.code.gson:gson:jar:2.2.2:compile
|
||||
[INFO] | +- commons-lang:commons-lang:jar:2.6:compile
|
||||
[INFO] | +- commons-io:commons-io:jar:2.4:compile
|
||||
[INFO] | +- commons-collections:commons-collections:jar:3.2.1:compile
|
||||
[INFO] | +- com.googlecode.protobuf-java-format:protobuf-java-format:jar:1.2:compile
|
||||
[INFO] | +- org.antlr:stringtemplate:jar:3.2:compile
|
||||
[INFO] | | \- org.antlr:antlr:jar:2.7.7:compile
|
||||
[INFO] | +- commons-logging:commons-logging:jar:1.1.3:compile
|
||||
[INFO] | +- org.reflections:reflections:jar:0.9.10:compile
|
||||
[INFO] | | +- org.javassist:javassist:jar:3.19.0-GA:compile
|
||||
[INFO] | | \- com.google.code.findbugs:annotations:jar:2.0.1:compile
|
||||
[INFO] | +- com.fasterxml.jackson.core:jackson-databind:jar:2.6.6:compile
|
||||
[INFO] | | +- com.fasterxml.jackson.core:jackson-annotations:jar:2.6.0:compile
|
||||
[INFO] | | \- com.fasterxml.jackson.core:jackson-core:jar:2.6.6:compile
|
||||
[INFO] | +- org.codehaus.jackson:jackson-mapper-asl:jar:1.9.13:compile
|
||||
[INFO] | | \- org.codehaus.jackson:jackson-core-asl:jar:1.9.13:compile
|
||||
[INFO] | \- org.apache.commons:commons-math3:jar:3.6.1:compile
|
||||
[INFO] +- eu.dnetlib:dnet-openaire-data-protos:jar:3.9.3-proto250:compile
|
||||
[INFO] | +- com.google.protobuf:protobuf-java:jar:2.5.0:compile
|
||||
[INFO] | \- log4j:log4j:jar:1.2.17:compile (version selected from constraint [1.2.17,1.2.17])
|
||||
[INFO] +- org.apache.spark:spark-core_2.11:jar:2.2.0:provided
|
||||
[INFO] | +- org.apache.avro:avro:jar:1.7.7:provided
|
||||
[INFO] | | +- com.thoughtworks.paranamer:paranamer:jar:2.3:provided
|
||||
[INFO] | | \- org.apache.commons:commons-compress:jar:1.4.1:provided
|
||||
[INFO] | | \- org.tukaani:xz:jar:1.0:provided
|
||||
[INFO] | +- org.apache.avro:avro-mapred:jar:hadoop2:1.7.7:provided
|
||||
[INFO] | | +- org.apache.avro:avro-ipc:jar:1.7.7:provided
|
||||
[INFO] | | \- org.apache.avro:avro-ipc:jar:tests:1.7.7:provided
|
||||
[INFO] | +- com.twitter:chill_2.11:jar:0.8.0:provided
|
||||
[INFO] | | \- com.esotericsoftware:kryo-shaded:jar:3.0.3:provided
|
||||
[INFO] | | +- com.esotericsoftware:minlog:jar:1.3.0:provided
|
||||
[INFO] | | \- org.objenesis:objenesis:jar:2.1:provided
|
||||
[INFO] | +- com.twitter:chill-java:jar:0.8.0:provided
|
||||
[INFO] | +- org.apache.xbean:xbean-asm5-shaded:jar:4.4:provided
|
||||
[INFO] | +- org.apache.hadoop:hadoop-client:jar:2.6.5:provided
|
||||
[INFO] | | +- org.apache.hadoop:hadoop-common:jar:2.6.5:provided
|
||||
[INFO] | | | +- commons-cli:commons-cli:jar:1.2:provided
|
||||
[INFO] | | | +- xmlenc:xmlenc:jar:0.52:provided
|
||||
[INFO] | | | +- commons-httpclient:commons-httpclient:jar:3.1:provided
|
||||
[INFO] | | | +- commons-configuration:commons-configuration:jar:1.6:provided
|
||||
[INFO] | | | | +- commons-digester:commons-digester:jar:1.8:provided
|
||||
[INFO] | | | | | \- commons-beanutils:commons-beanutils:jar:1.7.0:provided
|
||||
[INFO] | | | | \- commons-beanutils:commons-beanutils-core:jar:1.8.0:provided
|
||||
[INFO] | | | +- org.apache.hadoop:hadoop-auth:jar:2.6.5:provided
|
||||
[INFO] | | | | \- org.apache.directory.server:apacheds-kerberos-codec:jar:2.0.0-M15:provided
|
||||
[INFO] | | | | +- org.apache.directory.server:apacheds-i18n:jar:2.0.0-M15:provided
|
||||
[INFO] | | | | +- org.apache.directory.api:api-asn1-api:jar:1.0.0-M20:provided
|
||||
[INFO] | | | | \- org.apache.directory.api:api-util:jar:1.0.0-M20:provided
|
||||
[INFO] | | | +- org.apache.curator:curator-client:jar:2.6.0:provided
|
||||
[INFO] | | | \- org.htrace:htrace-core:jar:3.0.4:provided
|
||||
[INFO] | | +- org.apache.hadoop:hadoop-hdfs:jar:2.6.5:provided
|
||||
[INFO] | | | +- org.mortbay.jetty:jetty-util:jar:6.1.26:provided
|
||||
[INFO] | | | \- xerces:xercesImpl:jar:2.9.1:provided
|
||||
[INFO] | | | \- xml-apis:xml-apis:jar:1.3.04:provided
|
||||
[INFO] | | +- org.apache.hadoop:hadoop-mapreduce-client-app:jar:2.6.5:provided
|
||||
[INFO] | | | +- org.apache.hadoop:hadoop-mapreduce-client-common:jar:2.6.5:provided
|
||||
[INFO] | | | | +- org.apache.hadoop:hadoop-yarn-client:jar:2.6.5:provided
|
||||
[INFO] | | | | \- org.apache.hadoop:hadoop-yarn-server-common:jar:2.6.5:provided
|
||||
[INFO] | | | \- org.apache.hadoop:hadoop-mapreduce-client-shuffle:jar:2.6.5:provided
|
||||
[INFO] | | +- org.apache.hadoop:hadoop-yarn-api:jar:2.6.5:provided
|
||||
[INFO] | | +- org.apache.hadoop:hadoop-mapreduce-client-core:jar:2.6.5:provided
|
||||
[INFO] | | | \- org.apache.hadoop:hadoop-yarn-common:jar:2.6.5:provided
|
||||
[INFO] | | | +- javax.xml.bind:jaxb-api:jar:2.2.2:provided
|
||||
[INFO] | | | | \- javax.xml.stream:stax-api:jar:1.0-2:provided
|
||||
[INFO] | | | +- org.codehaus.jackson:jackson-jaxrs:jar:1.9.13:provided
|
||||
[INFO] | | | \- org.codehaus.jackson:jackson-xc:jar:1.9.13:provided
|
||||
[INFO] | | +- org.apache.hadoop:hadoop-mapreduce-client-jobclient:jar:2.6.5:provided
|
||||
[INFO] | | \- org.apache.hadoop:hadoop-annotations:jar:2.6.5:provided
|
||||
[INFO] | +- org.apache.spark:spark-launcher_2.11:jar:2.2.0:provided
|
||||
[INFO] | +- org.apache.spark:spark-network-common_2.11:jar:2.2.0:provided
|
||||
[INFO] | | \- org.fusesource.leveldbjni:leveldbjni-all:jar:1.8:provided
|
||||
[INFO] | +- org.apache.spark:spark-network-shuffle_2.11:jar:2.2.0:provided
|
||||
[INFO] | +- org.apache.spark:spark-unsafe_2.11:jar:2.2.0:provided
|
||||
[INFO] | +- net.java.dev.jets3t:jets3t:jar:0.9.3:provided
|
||||
[INFO] | | +- org.apache.httpcomponents:httpcore:jar:4.3.3:provided
|
||||
[INFO] | | +- org.apache.httpcomponents:httpclient:jar:4.3.6:provided
|
||||
[INFO] | | +- javax.activation:activation:jar:1.1.1:provided
|
||||
[INFO] | | +- mx4j:mx4j:jar:3.0.2:provided
|
||||
[INFO] | | +- javax.mail:mail:jar:1.4.7:provided
|
||||
[INFO] | | +- org.bouncycastle:bcprov-jdk15on:jar:1.51:provided
|
||||
[INFO] | | \- com.jamesmurty.utils:java-xmlbuilder:jar:1.0:provided
|
||||
[INFO] | | \- net.iharder:base64:jar:2.3.8:provided
|
||||
[INFO] | +- org.apache.curator:curator-recipes:jar:2.6.0:provided
|
||||
[INFO] | | +- org.apache.curator:curator-framework:jar:2.6.0:provided
|
||||
[INFO] | | \- org.apache.zookeeper:zookeeper:jar:3.4.6:provided
|
||||
[INFO] | +- javax.servlet:javax.servlet-api:jar:3.1.0:provided
|
||||
[INFO] | +- org.apache.commons:commons-lang3:jar:3.5:provided
|
||||
[INFO] | +- com.google.code.findbugs:jsr305:jar:1.3.9:provided
|
||||
[INFO] | +- org.slf4j:slf4j-api:jar:1.7.16:provided
|
||||
[INFO] | +- org.slf4j:jul-to-slf4j:jar:1.7.16:provided
|
||||
[INFO] | +- org.slf4j:jcl-over-slf4j:jar:1.7.16:provided
|
||||
[INFO] | +- org.slf4j:slf4j-log4j12:jar:1.7.16:provided
|
||||
[INFO] | +- com.ning:compress-lzf:jar:1.0.3:provided
|
||||
[INFO] | +- org.xerial.snappy:snappy-java:jar:1.1.2.6:provided
|
||||
[INFO] | +- net.jpountz.lz4:lz4:jar:1.3.0:provided
|
||||
[INFO] | +- org.roaringbitmap:RoaringBitmap:jar:0.5.11:provided
|
||||
[INFO] | +- commons-net:commons-net:jar:2.2:provided
|
||||
[INFO] | +- org.scala-lang:scala-library:jar:2.11.8:provided
|
||||
[INFO] | +- org.json4s:json4s-jackson_2.11:jar:3.2.11:provided
|
||||
[INFO] | | \- org.json4s:json4s-core_2.11:jar:3.2.11:provided
|
||||
[INFO] | | +- org.json4s:json4s-ast_2.11:jar:3.2.11:provided
|
||||
[INFO] | | \- org.scala-lang:scalap:jar:2.11.0:provided
|
||||
[INFO] | | \- org.scala-lang:scala-compiler:jar:2.11.0:provided
|
||||
[INFO] | | +- org.scala-lang.modules:scala-xml_2.11:jar:1.0.1:provided
|
||||
[INFO] | | \- org.scala-lang.modules:scala-parser-combinators_2.11:jar:1.0.1:provided
|
||||
[INFO] | +- org.glassfish.jersey.core:jersey-client:jar:2.22.2:provided
|
||||
[INFO] | | +- javax.ws.rs:javax.ws.rs-api:jar:2.0.1:provided
|
||||
[INFO] | | +- org.glassfish.hk2:hk2-api:jar:2.4.0-b34:provided
|
||||
[INFO] | | | +- org.glassfish.hk2:hk2-utils:jar:2.4.0-b34:provided
|
||||
[INFO] | | | \- org.glassfish.hk2.external:aopalliance-repackaged:jar:2.4.0-b34:provided
|
||||
[INFO] | | +- org.glassfish.hk2.external:javax.inject:jar:2.4.0-b34:provided
|
||||
[INFO] | | \- org.glassfish.hk2:hk2-locator:jar:2.4.0-b34:provided
|
||||
[INFO] | +- org.glassfish.jersey.core:jersey-common:jar:2.22.2:provided
|
||||
[INFO] | | +- javax.annotation:javax.annotation-api:jar:1.2:provided
|
||||
[INFO] | | +- org.glassfish.jersey.bundles.repackaged:jersey-guava:jar:2.22.2:provided
|
||||
[INFO] | | \- org.glassfish.hk2:osgi-resource-locator:jar:1.0.1:provided
|
||||
[INFO] | +- org.glassfish.jersey.core:jersey-server:jar:2.22.2:provided
|
||||
[INFO] | | +- org.glassfish.jersey.media:jersey-media-jaxb:jar:2.22.2:provided
|
||||
[INFO] | | \- javax.validation:validation-api:jar:1.1.0.Final:provided
|
||||
[INFO] | +- org.glassfish.jersey.containers:jersey-container-servlet:jar:2.22.2:provided
|
||||
[INFO] | +- org.glassfish.jersey.containers:jersey-container-servlet-core:jar:2.22.2:provided
|
||||
[INFO] | +- io.netty:netty-all:jar:4.0.43.Final:provided
|
||||
[INFO] | +- io.netty:netty:jar:3.9.9.Final:provided
|
||||
[INFO] | +- com.clearspring.analytics:stream:jar:2.7.0:provided
|
||||
[INFO] | +- io.dropwizard.metrics:metrics-core:jar:3.1.2:provided
|
||||
[INFO] | +- io.dropwizard.metrics:metrics-jvm:jar:3.1.2:provided
|
||||
[INFO] | +- io.dropwizard.metrics:metrics-json:jar:3.1.2:provided
|
||||
[INFO] | +- io.dropwizard.metrics:metrics-graphite:jar:3.1.2:provided
|
||||
[INFO] | +- com.fasterxml.jackson.module:jackson-module-scala_2.11:jar:2.6.5:provided
|
||||
[INFO] | | +- org.scala-lang:scala-reflect:jar:2.11.7:provided
|
||||
[INFO] | | \- com.fasterxml.jackson.module:jackson-module-paranamer:jar:2.6.5:provided
|
||||
[INFO] | +- org.apache.ivy:ivy:jar:2.4.0:provided
|
||||
[INFO] | +- oro:oro:jar:2.0.8:provided
|
||||
[INFO] | +- net.razorvine:pyrolite:jar:4.13:provided
|
||||
[INFO] | +- net.sf.py4j:py4j:jar:0.10.4:provided
|
||||
[INFO] | +- org.apache.spark:spark-tags_2.11:jar:2.2.0:provided
|
||||
[INFO] | +- org.apache.commons:commons-crypto:jar:1.0.0:provided
|
||||
[INFO] | \- org.spark-project.spark:unused:jar:1.0.0:provided
|
||||
[INFO] +- org.apache.spark:spark-graphx_2.11:jar:2.2.0:provided
|
||||
[INFO] | +- org.apache.spark:spark-mllib-local_2.11:jar:2.2.0:provided
|
||||
[INFO] | | \- org.scalanlp:breeze_2.11:jar:0.13.1:provided
|
||||
[INFO] | | +- org.scalanlp:breeze-macros_2.11:jar:0.13.1:provided
|
||||
[INFO] | | +- net.sf.opencsv:opencsv:jar:2.3:provided
|
||||
[INFO] | | +- com.github.rwl:jtransforms:jar:2.4.0:provided
|
||||
[INFO] | | +- org.spire-math:spire_2.11:jar:0.13.0:provided
|
||||
[INFO] | | | +- org.spire-math:spire-macros_2.11:jar:0.13.0:provided
|
||||
[INFO] | | | \- org.typelevel:machinist_2.11:jar:0.6.1:provided
|
||||
[INFO] | | \- com.chuusai:shapeless_2.11:jar:2.3.2:provided
|
||||
[INFO] | | \- org.typelevel:macro-compat_2.11:jar:1.1.1:provided
|
||||
[INFO] | +- com.github.fommil.netlib:core:jar:1.1.2:provided
|
||||
[INFO] | \- net.sourceforge.f2j:arpack_combined_all:jar:0.1:provided
|
||||
[INFO] +- org.apache.spark:spark-sql_2.11:jar:2.2.0:provided
|
||||
[INFO] | +- com.univocity:univocity-parsers:jar:2.2.1:provided
|
||||
[INFO] | +- org.apache.spark:spark-sketch_2.11:jar:2.2.0:provided
|
||||
[INFO] | +- org.apache.spark:spark-catalyst_2.11:jar:2.2.0:provided
|
||||
[INFO] | | +- org.codehaus.janino:janino:jar:3.0.0:provided
|
||||
[INFO] | | +- org.codehaus.janino:commons-compiler:jar:3.0.0:provided
|
||||
[INFO] | | \- org.antlr:antlr4-runtime:jar:4.5.3:provided
|
||||
[INFO] | +- org.apache.parquet:parquet-column:jar:1.8.2:provided
|
||||
[INFO] | | +- org.apache.parquet:parquet-common:jar:1.8.2:provided
|
||||
[INFO] | | \- org.apache.parquet:parquet-encoding:jar:1.8.2:provided
|
||||
[INFO] | \- org.apache.parquet:parquet-hadoop:jar:1.8.2:provided
|
||||
[INFO] | +- org.apache.parquet:parquet-format:jar:2.3.1:provided
|
||||
[INFO] | \- org.apache.parquet:parquet-jackson:jar:1.8.2:provided
|
||||
[INFO] +- eu.dnetlib:dnet-openaireplus-mapping-utils:jar:6.2.18:test
|
||||
[INFO] | +- com.ximpleware:vtd-xml:jar:2.13.4:test (version selected from constraint [2.12,3.0.0))
|
||||
[INFO] | +- commons-codec:commons-codec:jar:1.9:provided
|
||||
[INFO] | +- dom4j:dom4j:jar:1.6.1:test (version selected from constraint [1.6.1,1.6.1])
|
||||
[INFO] | +- net.sf.supercsv:super-csv:jar:2.4.0:test
|
||||
[INFO] | +- eu.dnetlib:cnr-misc-utils:jar:1.0.6-SNAPSHOT:test (version selected from constraint [1.0.0,2.0.0))
|
||||
[INFO] | | +- jaxen:jaxen:jar:1.1.6:test
|
||||
[INFO] | | +- saxonica:saxon:jar:9.1.0.8:test
|
||||
[INFO] | | +- saxonica:saxon-dom:jar:9.1.0.8:test
|
||||
[INFO] | | +- jgrapht:jgrapht:jar:0.7.2:test
|
||||
[INFO] | | +- net.sf.ehcache:ehcache:jar:2.8.0:test
|
||||
[INFO] | | \- org.springframework:spring-test:jar:4.2.5.RELEASE:test (version selected from constraint [4.2.5.RELEASE,4.2.5.RELEASE])
|
||||
[INFO] | | \- org.springframework:spring-core:jar:4.2.5.RELEASE:test
|
||||
[INFO] | +- eu.dnetlib:dnet-hadoop-commons:jar:2.0.2-SNAPSHOT:test (version selected from constraint [2.0.0,3.0.0))
|
||||
[INFO] | | +- org.apache.hadoop:hadoop-core:jar:2.0.0-mr1-cdh4.7.0:test
|
||||
[INFO] | | | +- commons-el:commons-el:jar:1.0:test
|
||||
[INFO] | | | \- hsqldb:hsqldb:jar:1.8.0.10:test
|
||||
[INFO] | | \- org.springframework:spring-beans:jar:4.2.5.RELEASE:test (version selected from constraint [4.2.5.RELEASE,4.2.5.RELEASE])
|
||||
[INFO] | \- eu.dnetlib:dnet-index-solr-common:jar:1.3.1:test (version selected from constraint [1.0.0,1.3.1])
|
||||
[INFO] | \- org.apache.solr:solr-solrj:jar:4.9.0:test
|
||||
[INFO] | +- org.apache.httpcomponents:httpmime:jar:4.3.1:test
|
||||
[INFO] | \- org.noggit:noggit:jar:0.5:test
|
||||
[INFO] \- junit:junit:jar:4.9:test
|
||||
[INFO] \- org.hamcrest:hamcrest-core:jar:1.1:test
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
[INFO] Reactor Summary:
|
||||
[INFO]
|
||||
[INFO] dnet-dedup 3.0.3-SNAPSHOT .......................... SUCCESS [ 1.152 s]
|
||||
[INFO] dnet-pace-core ..................................... SUCCESS [ 0.117 s]
|
||||
[INFO] dnet-dedup-test 3.0.3-SNAPSHOT ..................... SUCCESS [ 1.407 s]
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
[INFO] BUILD SUCCESS
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
[INFO] Total time: 3.216 s
|
||||
[INFO] Finished at: 2019-03-29T15:02:42+01:00
|
||||
[INFO] ------------------------------------------------------------------------
|
|
@ -1,13 +1,16 @@
|
|||
package eu.dnetlib;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import org.codehaus.jackson.annotate.JsonIgnore;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
@ -15,13 +18,15 @@ public class ConnectedComponent implements Serializable {
|
|||
|
||||
private Set<MapDocument> docs;
|
||||
private String id;
|
||||
private Map<String, Field> fieldMap;
|
||||
|
||||
public ConnectedComponent() {
|
||||
}
|
||||
|
||||
public ConnectedComponent(String id, Set<MapDocument> docs) {
|
||||
this.id = id;
|
||||
public ConnectedComponent(Set<MapDocument> docs) {
|
||||
this.docs = docs;
|
||||
this.id = createID(docs);
|
||||
this.fieldMap = chooseFields(docs);
|
||||
}
|
||||
|
||||
public Set<MapDocument> getDocs() {
|
||||
|
@ -40,14 +45,28 @@ public class ConnectedComponent implements Serializable {
|
|||
this.id = id;
|
||||
}
|
||||
|
||||
public void initializeID() {
|
||||
public Map<String, Field> chooseFields(Set<MapDocument> docs) {
|
||||
|
||||
int maxLength = 0;
|
||||
Map<String, Field> maxFieldMap = new HashMap<>();
|
||||
for (MapDocument doc : docs) {
|
||||
if (doc.toString().length()>maxLength){
|
||||
maxFieldMap = doc.getFieldMap();
|
||||
maxLength = doc.toString().length();
|
||||
}
|
||||
}
|
||||
|
||||
return maxFieldMap;
|
||||
}
|
||||
|
||||
public String createID(Set<MapDocument> docs) {
|
||||
if (docs.size() > 1) {
|
||||
String ccID = getMin(docs.stream().map(doc -> doc.getIdentifier()).collect(Collectors.toList()));
|
||||
String prefix = ccID.split("\\|")[0];
|
||||
String id = ccID.split("::")[1];
|
||||
this.id = prefix + "|dedup_______::" + id;
|
||||
return prefix + "|dedup_______::" + id;
|
||||
} else {
|
||||
this.id = docs.iterator().next().getIdentifier();
|
||||
return docs.iterator().next().getIdentifier();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -72,4 +91,12 @@ public class ConnectedComponent implements Serializable {
|
|||
throw new PaceException("Failed to create Json: ", e);
|
||||
}
|
||||
}
|
||||
|
||||
public Map<String, Field> getFieldMap() {
|
||||
return fieldMap;
|
||||
}
|
||||
|
||||
public void setFieldMap(Map<String, Field> fieldMap) {
|
||||
this.fieldMap = fieldMap;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
package eu.dnetlib;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.Sets;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Set;
|
||||
|
||||
public class DocumentsBlock implements Serializable {
|
||||
|
||||
String key;
|
||||
Set<MapDocument> documents;
|
||||
|
||||
public DocumentsBlock(String key, Set<MapDocument> documents) {
|
||||
this.key = key;
|
||||
this.documents = documents;
|
||||
}
|
||||
|
||||
public DocumentsBlock(String key, Iterable<MapDocument> documents) {
|
||||
this.key = key;
|
||||
this.documents = Sets.newHashSet(documents);
|
||||
}
|
||||
|
||||
public String getKey() {
|
||||
return key;
|
||||
}
|
||||
|
||||
public void setKey(String key) {
|
||||
this.key = key;
|
||||
}
|
||||
|
||||
public Iterable<MapDocument> getDocuments() {
|
||||
return documents;
|
||||
}
|
||||
|
||||
public void setDocuments(Set<MapDocument> documents) {
|
||||
this.documents = documents;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(){
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
try {
|
||||
return mapper.writeValueAsString(this);
|
||||
} catch (IOException e) {
|
||||
throw new PaceException("Failed to create Json: ", e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,11 +1,11 @@
|
|||
package eu.dnetlib;
|
||||
|
||||
import com.google.common.collect.Iterables;
|
||||
import eu.dnetlib.graph.GraphProcessor;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.util.BlockProcessor;
|
||||
import eu.dnetlib.pace.utils.PaceUtils;
|
||||
import eu.dnetlib.reporter.SparkCounter;
|
||||
import eu.dnetlib.reporter.SparkBlockProcessor;
|
||||
import eu.dnetlib.reporter.SparkReporter;
|
||||
import org.apache.spark.api.java.JavaPairRDD;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
|
@ -13,13 +13,14 @@ import org.apache.spark.api.java.JavaSparkContext;
|
|||
import org.apache.spark.graphx.Edge;
|
||||
import org.apache.spark.rdd.RDD;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
import scala.Tuple2;
|
||||
|
||||
import java.net.URL;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class SparkLocalTest {
|
||||
public static SparkCounter counter ;
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
|
@ -31,27 +32,23 @@ public class SparkLocalTest {
|
|||
|
||||
final JavaSparkContext context = new JavaSparkContext(spark.sparkContext());
|
||||
|
||||
final URL dataset = SparkTest.class.getResource("/eu/dnetlib/pace/organization.to.fix.json");
|
||||
final URL dataset = SparkLocalTest.class.getResource("/eu/dnetlib/pace/organization.to.fix.json");
|
||||
final JavaRDD<String> dataRDD = context.textFile(dataset.getPath());
|
||||
|
||||
counter = new SparkCounter(context);
|
||||
|
||||
//read the configuration from the classpath
|
||||
final DedupConfig config = DedupConfig.load(Utility.readFromClasspath("/eu/dnetlib/pace/org.curr.conf"));
|
||||
final DedupConfig config = DedupConfig.load(Utility.readFromClasspath("/eu/dnetlib/pace/org.curr.conf", SparkLocalTest.class));
|
||||
|
||||
BlockProcessor.constructAccumulator(config);
|
||||
BlockProcessor.accumulators.forEach(acc -> {
|
||||
|
||||
final String[] values = acc.split("::");
|
||||
counter.incrementCounter(values[0], values[1], 0);
|
||||
|
||||
});
|
||||
Map<String, LongAccumulator> accumulators = Utility.constructAccumulator(config, context.sc());
|
||||
|
||||
//create vertexes of the graph: <ID, MapDocument>
|
||||
JavaPairRDD<String, MapDocument> mapDocs = dataRDD.mapToPair(it -> {
|
||||
MapDocument mapDocument = PaceUtils.asMapDocument(config, it);
|
||||
return new Tuple2<>(mapDocument.getIdentifier(), mapDocument);
|
||||
});
|
||||
|
||||
// mapDocs.filter(d -> d._2().getFieldMap().get("doi").stringValue().length() > 0).foreach(d -> System.out.println(d));
|
||||
// mapDocs.filter(d -> d._2().getFieldMap().get("documentationUrl").stringValue().length() > 0).foreach(d -> System.out.println(d));
|
||||
|
||||
RDD<Tuple2<Object, MapDocument>> vertexes = mapDocs.mapToPair(t -> new Tuple2<Object, MapDocument>( (long) t._1().hashCode(), t._2())).rdd();
|
||||
|
||||
//create relations between documents
|
||||
|
@ -64,19 +61,16 @@ public class SparkLocalTest {
|
|||
.map(it -> new Tuple2<>(it, currentDocument)).collect(Collectors.toList()).iterator();
|
||||
}).groupByKey();//group documents basing on the key
|
||||
|
||||
// blocks = blocks.filter(b -> Iterables.size(b._2())>2);
|
||||
// vertexes = blocks.flatMap(b -> b._2().iterator()).map(t -> new Tuple2<Object, MapDocument>((long) t.getIdentifier().hashCode(), t)).rdd();
|
||||
|
||||
//print blocks
|
||||
blocks.foreach(b -> {
|
||||
String print = b._1() + ": ";
|
||||
for (MapDocument doc : b._2()) {
|
||||
print += doc.getIdentifier() + " ";
|
||||
}
|
||||
System.out.println(print);
|
||||
});
|
||||
// blocks.map(group -> new DocumentsBlock(group._1(), group._2())).foreach(b -> System.out.println(b));
|
||||
|
||||
//create relations by comparing only elements in the same group
|
||||
final JavaPairRDD<String, String> relationRDD = blocks.flatMapToPair(it -> {
|
||||
final SparkReporter reporter = new SparkReporter(counter);
|
||||
new BlockProcessor(config).process(it._1(), it._2(), reporter);
|
||||
final SparkReporter reporter = new SparkReporter();
|
||||
new SparkBlockProcessor(config).process(it._1(), it._2(), reporter, accumulators);
|
||||
return reporter.getReport().iterator();
|
||||
});
|
||||
|
||||
|
@ -87,28 +81,24 @@ public class SparkLocalTest {
|
|||
final JavaRDD<ConnectedComponent> connectedComponents = ccs.filter(cc -> cc.getDocs().size()>1);
|
||||
final JavaRDD<ConnectedComponent> nonDeduplicated = ccs.filter(cc -> cc.getDocs().size()==1);
|
||||
|
||||
//print deduped
|
||||
connectedComponents.foreach(cc -> {
|
||||
System.out.println(cc);
|
||||
});
|
||||
//print nondeduped
|
||||
nonDeduplicated.foreach(cc -> {
|
||||
System.out.println(cc);
|
||||
});
|
||||
|
||||
System.out.println("Non duplicates: " + nonDeduplicated.count());
|
||||
System.out.println("Duplicates: " + connectedComponents.flatMap(cc -> cc.getDocs().iterator()).count());
|
||||
System.out.println("Connected Components: " + connectedComponents.count());
|
||||
|
||||
counter.getAccumulators().values().forEach(it-> System.out.println(it.getGroup()+" "+it.getName()+" -->"+it.value()));
|
||||
accumulators.forEach((name, acc) -> System.out.println(name + " -> " + acc.value()));
|
||||
|
||||
//print deduped
|
||||
connectedComponents.foreach(cc -> {
|
||||
System.out.println("cc = " + cc.getId());
|
||||
for (MapDocument doc: cc.getDocs()) {
|
||||
System.out.println(doc.getIdentifier() + "; ln: " + doc.getFieldMap().get("legalname").stringValue() + "; sn: " + doc.getFieldMap().get("legalshortname").stringValue());
|
||||
}
|
||||
});
|
||||
//print nondeduped
|
||||
nonDeduplicated.foreach(cc -> {
|
||||
System.out.println("nd = " + cc.getId());
|
||||
System.out.println(cc.getDocs().iterator().next().getFieldMap().get("legalname").stringValue() + "; sn: " + cc.getDocs().iterator().next().getFieldMap().get("legalshortname").stringValue());
|
||||
});
|
||||
|
||||
//print ids
|
||||
//// ccs.foreach(cc -> System.out.println(cc.getId()));
|
||||
//// connectedComponents.saveAsTextFile("file:///Users/miconis/Downloads/dumps/organizations_dedup");
|
||||
// //print ids
|
||||
// ccs.foreach(cc -> System.out.println(cc.getId()));
|
||||
// connectedComponents.saveAsTextFile("file:///Users/miconis/Downloads/dumps/organizations_dedup");
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -3,9 +3,8 @@ package eu.dnetlib;
|
|||
import eu.dnetlib.graph.GraphProcessor;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.util.BlockProcessor;
|
||||
import eu.dnetlib.pace.utils.PaceUtils;
|
||||
import eu.dnetlib.reporter.SparkCounter;
|
||||
import eu.dnetlib.reporter.SparkBlockProcessor;
|
||||
import eu.dnetlib.reporter.SparkReporter;
|
||||
import org.apache.spark.api.java.JavaPairRDD;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
|
@ -13,16 +12,22 @@ import org.apache.spark.api.java.JavaSparkContext;
|
|||
import org.apache.spark.graphx.Edge;
|
||||
import org.apache.spark.rdd.RDD;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
import scala.Tuple2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class SparkTest {
|
||||
public static SparkCounter counter ;
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
|
||||
final String inputSpacePath = args[0];
|
||||
final String dedupConfigPath = args[1];
|
||||
final String groupsPath = args[2] + "_groups";
|
||||
final String outputPath = args[2] + "_output";
|
||||
|
||||
final SparkSession spark = SparkSession
|
||||
.builder()
|
||||
.appName("Deduplication")
|
||||
|
@ -31,19 +36,11 @@ public class SparkTest {
|
|||
|
||||
final JavaSparkContext context = new JavaSparkContext(spark.sparkContext());
|
||||
|
||||
final JavaRDD<String> dataRDD = Utility.loadDataFromHDFS(args[0], context);
|
||||
final JavaRDD<String> dataRDD = Utility.loadDataFromHDFS(inputSpacePath, context);
|
||||
|
||||
counter = new SparkCounter(context);
|
||||
final DedupConfig config = Utility.loadConfigFromHDFS(dedupConfigPath);
|
||||
|
||||
final DedupConfig config = Utility.loadConfigFromHDFS(args[1]);
|
||||
|
||||
BlockProcessor.constructAccumulator(config);
|
||||
BlockProcessor.accumulators.forEach(acc -> {
|
||||
|
||||
final String[] values = acc.split("::");
|
||||
counter.incrementCounter(values[0], values[1], 0);
|
||||
|
||||
});
|
||||
Map<String, LongAccumulator> accumulators = Utility.constructAccumulator(config, context.sc());
|
||||
|
||||
//create vertexes of the graph: <ID, MapDocument>
|
||||
JavaPairRDD<String, MapDocument> mapDocs = dataRDD.mapToPair(it -> {
|
||||
|
@ -52,7 +49,7 @@ public class SparkTest {
|
|||
});
|
||||
RDD<Tuple2<Object, MapDocument>> vertexes = mapDocs.mapToPair(t -> new Tuple2<Object, MapDocument>( (long) t._1().hashCode(), t._2())).rdd();
|
||||
|
||||
//create relations between documents
|
||||
//group documents basing on clustering
|
||||
JavaPairRDD<String, Iterable<MapDocument>> blocks = mapDocs.reduceByKey((a, b) -> a) //the reduce is just to be sure that we haven't document with same id
|
||||
//Clustering: from <id, doc> to List<groupkey,doc>
|
||||
.flatMapToPair(a -> {
|
||||
|
@ -60,53 +57,33 @@ public class SparkTest {
|
|||
|
||||
return Utility.getGroupingKeys(config, currentDocument).stream()
|
||||
.map(it -> new Tuple2<>(it, currentDocument)).collect(Collectors.toList()).iterator();
|
||||
}).groupByKey();//group documents basing on the key
|
||||
}).groupByKey();
|
||||
|
||||
//print blocks
|
||||
blocks.foreach(b -> {
|
||||
String print = b._1() + ": ";
|
||||
for (MapDocument doc : b._2()) {
|
||||
print += doc.getIdentifier() + " ";
|
||||
}
|
||||
System.out.println(print);
|
||||
});
|
||||
Utility.deleteIfExists(groupsPath);
|
||||
blocks.map(group -> new DocumentsBlock(group._1(), group._2())).saveAsTextFile(groupsPath);
|
||||
|
||||
//create relations by comparing only elements in the same group
|
||||
final JavaPairRDD<String, String> relationRDD = blocks.flatMapToPair(it -> {
|
||||
final SparkReporter reporter = new SparkReporter(counter);
|
||||
new BlockProcessor(config).process(it._1(), it._2(), reporter);
|
||||
return reporter.getReport().iterator();
|
||||
});
|
||||
final SparkReporter reporter = new SparkReporter();
|
||||
new SparkBlockProcessor(config).process(it._1(), it._2(), reporter, accumulators);
|
||||
return reporter.getReport().iterator();
|
||||
});
|
||||
|
||||
final RDD<Edge<String>> edgeRdd = relationRDD.map(it -> new Edge<>(it._1().hashCode(),it._2().hashCode(), "similarTo")).rdd();
|
||||
|
||||
JavaRDD<ConnectedComponent> ccs = GraphProcessor.findCCs(vertexes, edgeRdd, 20).toJavaRDD();
|
||||
|
||||
//save connected components on textfile
|
||||
Utility.deleteIfExists(outputPath);
|
||||
ccs.saveAsTextFile(outputPath);
|
||||
|
||||
final JavaRDD<ConnectedComponent> connectedComponents = ccs.filter(cc -> cc.getDocs().size()>1);
|
||||
final JavaRDD<ConnectedComponent> nonDeduplicated = ccs.filter(cc -> cc.getDocs().size()==1);
|
||||
|
||||
System.out.println("Non duplicates: " + nonDeduplicated.count());
|
||||
System.out.println("Duplicates: " + connectedComponents.flatMap(cc -> cc.getDocs().iterator()).count());
|
||||
System.out.println("Connected Components: " + connectedComponents.count());
|
||||
|
||||
counter.getAccumulators().values().forEach(it-> System.out.println(it.getGroup()+" "+it.getName()+" -->"+it.value()));
|
||||
|
||||
//print deduped
|
||||
connectedComponents.foreach(cc -> {
|
||||
System.out.println("cc = " + cc.getId());
|
||||
for (MapDocument doc: cc.getDocs()) {
|
||||
System.out.println(doc.getIdentifier() + "; ln: " + doc.getFieldMap().get("legalname").stringValue() + "; sn: " + doc.getFieldMap().get("legalshortname").stringValue());
|
||||
}
|
||||
});
|
||||
//print nondeduped
|
||||
nonDeduplicated.foreach(cc -> {
|
||||
System.out.println("nd = " + cc.getId());
|
||||
System.out.println(cc.getDocs().iterator().next().getFieldMap().get("legalname").stringValue() + "; sn: " + cc.getDocs().iterator().next().getFieldMap().get("legalshortname").stringValue());
|
||||
});
|
||||
|
||||
// print ids
|
||||
// ccs.foreach(cc -> System.out.println(cc.getId()));
|
||||
// connectedComponents.saveAsTextFile("file:///Users/miconis/Downloads/dumps/organizations_dedup");
|
||||
accumulators.forEach((name, acc) -> System.out.println(name + " -> " + acc.value()));
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -9,24 +9,55 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.spark.SparkContext;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
public class Utility {
|
||||
|
||||
public static Map<String, LongAccumulator> constructAccumulator(final DedupConfig dedupConf, final SparkContext context) {
|
||||
|
||||
Map<String, LongAccumulator> accumulators = new HashMap<>();
|
||||
|
||||
String acc1 = String.format("%s::%s",dedupConf.getWf().getEntityType(), "records per hash key = 1");
|
||||
accumulators.put(acc1, context.longAccumulator(acc1));
|
||||
String acc2 = String.format("%s::%s",dedupConf.getWf().getEntityType(), "missing " + dedupConf.getWf().getOrderField());
|
||||
accumulators.put(acc2, context.longAccumulator(acc2));
|
||||
String acc3 = String.format("%s::%s",dedupConf.getWf().getEntityType(), String.format("Skipped records for count(%s) >= %s", dedupConf.getWf().getOrderField(), dedupConf.getWf().getGroupMaxSize()));
|
||||
accumulators.put(acc3, context.longAccumulator(acc3));
|
||||
String acc4 = String.format("%s::%s",dedupConf.getWf().getEntityType(), "skip list");
|
||||
accumulators.put(acc4, context.longAccumulator(acc4));
|
||||
String acc5 = String.format("%s::%s",dedupConf.getWf().getEntityType(), "dedupSimilarity (x2)");
|
||||
accumulators.put(acc5, context.longAccumulator(acc5));
|
||||
String acc6 = String.format("%s::%s",dedupConf.getWf().getEntityType(), "d < " + dedupConf.getWf().getThreshold());
|
||||
accumulators.put(acc6, context.longAccumulator(acc6));
|
||||
|
||||
return accumulators;
|
||||
}
|
||||
|
||||
public static JavaRDD<String> loadDataFromHDFS(String path, JavaSparkContext context) {
|
||||
return context.textFile(path);
|
||||
}
|
||||
|
||||
public static void deleteIfExists(String path) throws IOException {
|
||||
Configuration conf = new Configuration();
|
||||
FileSystem fileSystem = FileSystem.get(conf);
|
||||
if (fileSystem.exists(new Path(path))){
|
||||
fileSystem.delete(new Path(path), true);
|
||||
}
|
||||
}
|
||||
|
||||
public static DedupConfig loadConfigFromHDFS(String path) throws IOException {
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
// conf.set("fs.defaultFS", "");
|
||||
FileSystem fileSystem = FileSystem.get(conf);
|
||||
FSDataInputStream inputStream = new FSDataInputStream(fileSystem.open(new Path(path)));
|
||||
|
||||
|
@ -34,10 +65,10 @@ public class Utility {
|
|||
|
||||
}
|
||||
|
||||
static String readFromClasspath(final String filename) {
|
||||
static <T> String readFromClasspath(final String filename, final Class<T> clazz) {
|
||||
final StringWriter sw = new StringWriter();
|
||||
try {
|
||||
IOUtils.copy(SparkTest.class.getResourceAsStream(filename), sw);
|
||||
IOUtils.copy(clazz.getResourceAsStream(filename), sw);
|
||||
return sw.toString();
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeException("cannot load resource from classpath: " + filename);
|
||||
|
|
|
@ -32,8 +32,7 @@ object GraphProcessor {
|
|||
|
||||
def asConnectedComponent(group: (VertexId, Iterable[MapDocument])): ConnectedComponent = {
|
||||
val docs = group._2.toSet[MapDocument]
|
||||
val connectedComponent = new ConnectedComponent("empty", JavaConversions.setAsJavaSet[MapDocument](docs));
|
||||
connectedComponent.initializeID();
|
||||
val connectedComponent = new ConnectedComponent(JavaConversions.setAsJavaSet[MapDocument](docs));
|
||||
connectedComponent
|
||||
}
|
||||
|
||||
|
|
|
@ -83,6 +83,7 @@ public class PaceUtils {
|
|||
try {
|
||||
JsonFormat.merge(json, b);
|
||||
} catch (JsonFormat.ParseException e) {
|
||||
System.out.println("**************************** " + json);
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
return ProtoDocumentBuilder.newInstance(b.getId(), b.build(), conf.getPace().getModel());
|
||||
|
|
|
@ -0,0 +1,191 @@
|
|||
package eu.dnetlib.reporter;
|
||||
import com.google.common.collect.Lists;
|
||||
import eu.dnetlib.pace.clustering.NGramUtils;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.config.WfConfig;
|
||||
import eu.dnetlib.pace.distance.PaceDocumentDistance;
|
||||
import eu.dnetlib.pace.distance.eval.ScoreResult;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.model.MapDocumentComparator;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class SparkBlockProcessor {
|
||||
|
||||
public static final List<String> accumulators= new ArrayList<>();
|
||||
|
||||
private static final Log log = LogFactory.getLog(SparkBlockProcessor.class);
|
||||
|
||||
private DedupConfig dedupConf;
|
||||
|
||||
public SparkBlockProcessor(DedupConfig dedupConf) {
|
||||
this.dedupConf = dedupConf;
|
||||
}
|
||||
|
||||
public void process(final String key, final Iterable<MapDocument> documents, final SparkReporter context, Map<String, LongAccumulator> accumulators) {
|
||||
|
||||
final Queue<MapDocument> q = prepare(documents);
|
||||
|
||||
if (q.size() > 1) {
|
||||
// log.info("reducing key: '" + key + "' records: " + q.size());
|
||||
//process(q, context);
|
||||
process(simplifyQueue(q, key, context, accumulators), context, accumulators);
|
||||
|
||||
} else {
|
||||
context.incrementCounter(dedupConf.getWf().getEntityType(), "records per hash key = 1", 1, accumulators);
|
||||
}
|
||||
}
|
||||
|
||||
private Queue<MapDocument> prepare(final Iterable<MapDocument> documents) {
|
||||
final Queue<MapDocument> queue = new PriorityQueue<>(100, new MapDocumentComparator(dedupConf.getWf().getOrderField()));
|
||||
|
||||
final Set<String> seen = new HashSet<String>();
|
||||
final int queueMaxSize = dedupConf.getWf().getQueueMaxSize();
|
||||
|
||||
documents.forEach(doc -> {
|
||||
if (queue.size() <= queueMaxSize) {
|
||||
final String id = doc.getIdentifier();
|
||||
|
||||
if (!seen.contains(id)) {
|
||||
seen.add(id);
|
||||
queue.add(doc);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return queue;
|
||||
}
|
||||
|
||||
private Queue<MapDocument> simplifyQueue(final Queue<MapDocument> queue, final String ngram, final SparkReporter context, Map<String, LongAccumulator> accumulators) {
|
||||
final Queue<MapDocument> q = new LinkedList<>();
|
||||
|
||||
String fieldRef = "";
|
||||
final List<MapDocument> tempResults = Lists.newArrayList();
|
||||
|
||||
while (!queue.isEmpty()) {
|
||||
final MapDocument result = queue.remove();
|
||||
|
||||
final String orderFieldName = dedupConf.getWf().getOrderField();
|
||||
final Field orderFieldValue = result.values(orderFieldName);
|
||||
if (!orderFieldValue.isEmpty()) {
|
||||
final String field = NGramUtils.cleanupForOrdering(orderFieldValue.stringValue());
|
||||
if (field.equals(fieldRef)) {
|
||||
tempResults.add(result);
|
||||
} else {
|
||||
populateSimplifiedQueue(q, tempResults, context, fieldRef, ngram, accumulators);
|
||||
tempResults.clear();
|
||||
tempResults.add(result);
|
||||
fieldRef = field;
|
||||
}
|
||||
} else {
|
||||
context.incrementCounter(dedupConf.getWf().getEntityType(), "missing " + dedupConf.getWf().getOrderField(), 1, accumulators);
|
||||
}
|
||||
}
|
||||
populateSimplifiedQueue(q, tempResults, context, fieldRef, ngram, accumulators);
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
private void populateSimplifiedQueue(final Queue<MapDocument> q,
|
||||
final List<MapDocument> tempResults,
|
||||
final SparkReporter context,
|
||||
final String fieldRef,
|
||||
final String ngram,
|
||||
Map<String, LongAccumulator> accumulators) {
|
||||
WfConfig wf = dedupConf.getWf();
|
||||
if (tempResults.size() < wf.getGroupMaxSize()) {
|
||||
q.addAll(tempResults);
|
||||
} else {
|
||||
context.incrementCounter(wf.getEntityType(), String.format("Skipped records for count(%s) >= %s", wf.getOrderField(), wf.getGroupMaxSize()), tempResults.size(), accumulators);
|
||||
// log.info("Skipped field: " + fieldRef + " - size: " + tempResults.size() + " - ngram: " + ngram);
|
||||
}
|
||||
}
|
||||
|
||||
private void process(final Queue<MapDocument> queue, final SparkReporter context, Map<String, LongAccumulator> accumulators) {
|
||||
|
||||
final PaceDocumentDistance algo = new PaceDocumentDistance();
|
||||
|
||||
while (!queue.isEmpty()) {
|
||||
|
||||
final MapDocument pivot = queue.remove();
|
||||
final String idPivot = pivot.getIdentifier();
|
||||
|
||||
WfConfig wf = dedupConf.getWf();
|
||||
final Field fieldsPivot = pivot.values(wf.getOrderField());
|
||||
final String fieldPivot = (fieldsPivot == null) || fieldsPivot.isEmpty() ? null : fieldsPivot.stringValue();
|
||||
|
||||
if (fieldPivot != null) {
|
||||
// System.out.println(idPivot + " --> " + fieldPivot);
|
||||
|
||||
int i = 0;
|
||||
for (final MapDocument curr : queue) {
|
||||
final String idCurr = curr.getIdentifier();
|
||||
|
||||
if (mustSkip(idCurr)) {
|
||||
|
||||
context.incrementCounter(wf.getEntityType(), "skip list", 1, accumulators);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (i > wf.getSlidingWindowSize()) {
|
||||
break;
|
||||
}
|
||||
|
||||
final Field fieldsCurr = curr.values(wf.getOrderField());
|
||||
final String fieldCurr = (fieldsCurr == null) || fieldsCurr.isEmpty() ? null : fieldsCurr.stringValue();
|
||||
|
||||
if (!idCurr.equals(idPivot) && (fieldCurr != null)) {
|
||||
|
||||
final ScoreResult sr = similarity(algo, pivot, curr);
|
||||
// log.info(sr.toString()+"SCORE "+ sr.getScore());
|
||||
emitOutput(sr, idPivot, idCurr, context, accumulators);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void emitOutput(final ScoreResult sr, final String idPivot, final String idCurr, final SparkReporter context, Map<String, LongAccumulator> accumulators) {
|
||||
final double d = sr.getScore();
|
||||
|
||||
if (d >= dedupConf.getWf().getThreshold()) {
|
||||
|
||||
writeSimilarity(context, idPivot, idCurr);
|
||||
context.incrementCounter(dedupConf.getWf().getEntityType(), "dedupSimilarity (x2)", 1, accumulators);
|
||||
} else {
|
||||
context.incrementCounter(dedupConf.getWf().getEntityType(), "d < " + dedupConf.getWf().getThreshold(), 1, accumulators);
|
||||
}
|
||||
}
|
||||
|
||||
private ScoreResult similarity(final PaceDocumentDistance algo, final MapDocument a, final MapDocument b) {
|
||||
try {
|
||||
return algo.between(a, b, dedupConf);
|
||||
} catch(Throwable e) {
|
||||
log.error(String.format("\nA: %s\n----------------------\nB: %s", a, b), e);
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean mustSkip(final String idPivot) {
|
||||
return dedupConf.getWf().getSkipList().contains(getNsPrefix(idPivot));
|
||||
}
|
||||
|
||||
private String getNsPrefix(final String id) {
|
||||
return StringUtils.substringBetween(id, "|", "::");
|
||||
}
|
||||
|
||||
private void writeSimilarity(final SparkReporter context, final String from, final String to) {
|
||||
final String type = dedupConf.getWf().getEntityType();
|
||||
|
||||
context.emit(type, from, to);
|
||||
// context.emit(type, to, from);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
package eu.dnetlib.reporter;
|
||||
|
||||
import eu.dnetlib.DnetAccumulator;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class SparkCounter {
|
||||
final JavaSparkContext javaSparkContext;
|
||||
|
||||
|
||||
public SparkCounter(final JavaSparkContext context){
|
||||
this.javaSparkContext = context;
|
||||
}
|
||||
|
||||
|
||||
final Map<String, DnetAccumulator> accumulators = new HashMap<>();
|
||||
|
||||
public void incrementCounter(String counterGroup, String counterName, long delta) {
|
||||
final String accumulatorName = String.format("%s::%s", counterGroup, counterName);
|
||||
DnetAccumulator currentAccumulator = null;
|
||||
if (!accumulators.containsKey(accumulatorName)) {
|
||||
currentAccumulator = new DnetAccumulator(counterGroup, counterName);
|
||||
javaSparkContext.sc().register(currentAccumulator,accumulatorName);
|
||||
accumulators.put(accumulatorName, currentAccumulator);
|
||||
} else {
|
||||
currentAccumulator = accumulators.get(accumulatorName);
|
||||
}
|
||||
currentAccumulator.add(delta);
|
||||
}
|
||||
|
||||
public Map<String, DnetAccumulator> getAccumulators() {
|
||||
return accumulators;
|
||||
}
|
||||
}
|
|
@ -3,29 +3,30 @@ package eu.dnetlib.reporter;
|
|||
import eu.dnetlib.pace.util.Reporter;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.spark.util.LongAccumulator;
|
||||
import scala.Serializable;
|
||||
import scala.Tuple2;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class SparkReporter implements Reporter {
|
||||
|
||||
final SparkCounter counter;
|
||||
public class SparkReporter implements Serializable {
|
||||
|
||||
final List<Tuple2<String, String>> report = new ArrayList<>();
|
||||
private static final Log log = LogFactory.getLog(SparkReporter.class);
|
||||
|
||||
public SparkReporter(SparkCounter counter){
|
||||
this.counter = counter;
|
||||
public SparkReporter(){}
|
||||
|
||||
public void incrementCounter(String counterGroup, String counterName, long delta, Map<String, LongAccumulator> accumulators) {
|
||||
|
||||
final String accumulatorName = String.format("%s::%s", counterGroup, counterName);
|
||||
if (accumulators.containsKey(accumulatorName)){
|
||||
accumulators.get(accumulatorName).add(delta);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void incrementCounter(String counterGroup, String counterName, long delta) {
|
||||
counter.incrementCounter(counterGroup, counterName, delta);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void emit(String type, String from, String to) {
|
||||
report.add(new Tuple2<>(from, to));
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
"entityType" : "organization",
|
||||
"orderField" : "legalname",
|
||||
"queueMaxSize" : "2000",
|
||||
"groupMaxSize" : "10",
|
||||
"groupMaxSize" : "50",
|
||||
"slidingWindowSize" : "200",
|
||||
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ],
|
||||
"includeChildren" : "true"
|
||||
|
@ -20,13 +20,12 @@
|
|||
{ "name" : "exactMatch", "fields" : [ "gridid" ] }
|
||||
],
|
||||
"conditions" : [
|
||||
{ "name" : "exactMatch", "fields" : [ "country" ] },
|
||||
{ "name" : "DomainExactMatch", "fields" : [ "websiteurl" ] }
|
||||
{ "name" : "DomainExactMatch", "fields" : [ "websiteurl" ] },
|
||||
{ "name" : "exactMatch", "fields" : [ "country" ] }
|
||||
],
|
||||
"model" : [
|
||||
{ "name" : "legalname", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value" },
|
||||
{ "name" : "country", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/country/classid" },
|
||||
{ "name" : "legalshortname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.1", "ignoreMissing" : "true", "path" : "organization/metadata/legalshortname/value" },
|
||||
{ "name" : "country", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "false", "path" : "organization/metadata/country/classid" },
|
||||
{ "name" : "legalshortname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.1", "ignoreMissing" : "false", "path" : "organization/metadata/legalshortname/value" },
|
||||
{ "name" : "legalname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.9", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value", "params" : {"windowSize" : 4, "threshold" : 0.5} },
|
||||
{ "name" : "websiteurl", "algo" : "Null", "type" : "URL", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/websiteurl/value", "params" : { "host" : 0.5, "path" : 0.5 } },
|
||||
{ "name" : "gridid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {grid}]/value" }
|
||||
|
|
|
@ -1,24 +1,30 @@
|
|||
{"dateoftransformation": "2018-09-13", "originalId": ["opendoar____::Fonds_zur_F\u00f6rderung_der_wissenschaftlichen_Forschung_(Austrian_Science_Fund)"], "collectedfrom": [{"value": "OpenDOAR", "key": "10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "FWF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.fwf.ac.at/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Fonds zur F\u00f6rderung der wissenschaftlichen Forschung (Austrian Science Fund)"}, "country": {"classid": "AT", "classname": "Austria", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2015-08-24", "type": 20, "id": "20|opendoar____::77e7cd67c60d0c18aa835ea6ea58122c"}
|
||||
{"dateoftransformation": "2018-12-15", "originalId": ["corda__h2020::998735960"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse - Horizon 2020", "key": "10|openaire____::a55eb91348674d853191f4f4fd73d078"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "FWF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.fwf.ac.at"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "FONDS ZUR F\u00d6RDERUNG DER WISSENSCHAFTLICHEN FORSCHUNG"}, "country": {"classid": "AT", "classname": "Austria", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda__h2020::83f579158b682262181b9a8ffdfa1124"}
|
||||
{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::998735960"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse", "key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "FWF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.fwf.ac.at"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "FONDS ZUR F\u00d6RDERUNG DER WISSENSCHAFTLICHEN FORSCHUNG"}, "country": {"classid": "AT", "classname": "Austria", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::83f579158b682262181b9a8ffdfa1124"}
|
||||
{"dateoftransformation": "2018-09-27", "originalId": ["re3data_____::9f4430cdb5474d6db4bf84834533a7c9"], "collectedfrom": [{"value": "Registry of Research Data Repository", "key": "10|openaire____::21f8a223b9925c2f87c404096080b046"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "FWF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "https://www.fwf.ac.at/en/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Fonds zur F\u00f6rderung der wissenschaftlichen Forschung"}, "country": {"classid": "AT", "classname": "Austria", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-09-27", "type": 20, "id": "20|re3data_____::a3ac0376cc2a582357d821cec70a3e5b"}
|
||||
{"dateoftransformation": "2018-12-15", "originalId": ["corda__h2020::999861936"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse - Horizon 2020", "key": "10|openaire____::a55eb91348674d853191f4f4fd73d078"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "UNITO"}, "ecresearchorganization": {"value": "true"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.unito.it"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "UNIVERSITA DEGLI STUDI DI TORINO"}, "country": {"classid": "IT", "classname": "Italy", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "true"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda__h2020::ef77a7bbe5796b0b47aa60947a5c6f41"}
|
||||
{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::999861936"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse", "key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "UNITO"}, "ecresearchorganization": {"value": "true"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.unito.it"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "UNIVERSITA DEGLI STUDI DI TORINO"}, "country": {"classid": "IT", "classname": "Italy", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "true"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::ef77a7bbe5796b0b47aa60947a5c6f41"}
|
||||
{"dateoftransformation": "2018-09-13", "originalId": ["nih_________::UNIVERSITA_DI_TORINO"], "collectedfrom": [{"value": "NIH - National Institutes of Health", "key": "10|openaire____::9e9e8c76d739212c63eff362e321ba33"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "UNIVERSITA DI TORINO"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-07-11", "type": 20, "id": "20|nih_________::fdd37fcef9df7c69ae7d620bf21ab272"}
|
||||
{"dateoftransformation": "2018-09-19", "originalId": ["doajarticles::Universit\u00e0_degli_Studi_di_Torino"], "collectedfrom": [{"value": "DOAJ-Articles", "key": "10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "Universit\u00e0 degli Studi di Torino"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Universit\u00e0 degli Studi di Torino"}, "country": {"classid": "IT", "classname": "Italy", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-09-19", "type": 20, "id": "20|doajarticles::f7ef827f8fe1d870b6464ef1affc9605"}
|
||||
{"dateoftransformation": "2018-11-12", "originalId": ["opendoar____::Universit\u00e0_degli_Studi_di_Torino"], "collectedfrom": [{"value": "OpenDOAR", "key": "10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.unito.it/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Universit\u00e0 degli Studi di Torino"}, "country": {"classid": "IT", "classname": "Italy", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-11-12", "type": 20, "id": "20|opendoar____::f7ef827f8fe1d870b6464ef1affc9605"}
|
||||
{"collectedfrom": [{"value": "GRID - Global Research Identifier Database", "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977"}], "organization": {"metadata": {"legalshortname": {"value": "RPF"}, "websiteurl": {"value": "http://www.research.org.cy/EN/index.html/"}, "country": {"classid": "CY", "classname": "Cyprus", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "legalname": {"value": "RPF"}}}, "pid": [{"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.14751.36"}], "id": "20|grid________::4f35352983a82950563eadfea49dc867", "type": 20}
|
||||
{"collectedfrom": [{"value": "GRID - Global Research Identifier Database", "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977"}], "organization": {"metadata": {"legalshortname": {"value": "RPF"}, "websiteurl": {"value": "http://www.research.org.cy/EN/index.html/"}, "country": {"classid": "CY", "classname": "Cyprus", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "legalname": {"value": "Research Promotion Foundation"}}}, "pid": [{"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.14751.36"}], "id": "20|grid________::a42b3c67ea94b54ee941fb42fefd51d6", "type": 20}
|
||||
{"dateoftransformation": "2018-08-08", "originalId": ["corda__h2020::999946035"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse - Horizon 2020", "key": "10|openaire____::a55eb91348674d853191f4f4fd73d078"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "RPF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.research.org.cy"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "IDRYMA PROOTHISIS EREVNAS"}, "country": {"classid": "CY", "classname": "Cyprus", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-01-21", "type": 20, "id": "20|corda__h2020::a16918f80d830bf2b6daa5ec304f0e31"}
|
||||
{"dateoftransformation": "2018-08-08", "originalId": ["corda_______::999946035"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse", "key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "RPF"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.research.org.cy"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "RESEARCH PROMOTION FOUNDATION"}, "country": {"classid": "CY", "classname": "Cyprus", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2015-09-10", "type": 20, "id": "20|corda_______::a16918f80d830bf2b6daa5ec304f0e31"}
|
||||
{"collectedfrom": [{"value": "GRID - Global Research Identifier Database", "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977"}], "organization": {"metadata": {"legalshortname": {"value": "DFG"}, "websiteurl": {"value": "http://www.dfg.de/en/"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "legalname": {"value": "Deutsche Forschungsgemeinschaft"}}}, "pid": [{"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.424150.6"}], "id": "20|grid________::7d83de934ecd5091d83334f752cef22c", "type": 20}
|
||||
{"dateoftransformation": "2018-08-08", "originalId": ["corda_______::999547462"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse", "key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "ecnonprofit": {"value": "true"}, "websiteurl": {"value": "http://www.dfg.de"}, "ecnutscode": {"value": "false"}, "legalname": {"value": "DEUTSCHE FORSCHUNGSGEMEINSCHAFT"}}}, "dateofcollection": "2015-09-10", "type": 20, "id": "20|corda_______::3f41cfb7d56cfea69f3ce9792b822eb4"}
|
||||
{"dateoftransformation": "2018-09-28", "originalId": ["dfgf________::DFG"], "collectedfrom": [{"value": "", "key": ""}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "DFG"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Deutsche Forschungsgemeinschaft"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-09-28", "type": 20, "id": "20|dfgf________::3bbe57698e353a2acaa03306316658bb"}
|
||||
{"dateoftransformation": "2018-09-28", "originalId": ["dfgf________::DFGF"], "collectedfrom": [{"value": "", "key": ""}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "DFG"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Deutsche Forschungsgemeinschaft"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-09-28", "type": 20, "id": "20|dfgf________::14a2847759c496334d510ff8fafbd464"}
|
||||
{"dateoftransformation": "2018-06-04", "originalId": ["re3data_____::bf9c8e5c69ff222e3ee2ff0fc4d2b289"], "collectedfrom": [{"value": "Registry of Research Data Repository", "key": "10|openaire____::21f8a223b9925c2f87c404096080b046"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "German Research Foundation"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.dfg.de/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Deutsche Forschungsgemeinschaft"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-01-07", "type": 20, "id": "20|re3data_____::fbb08ab5e8cf8cd1056f61b84ddf05dd"}
|
||||
{"originalId": ["https://academic.microsoft.com/#/detail/87707601"], "pid": [{"qualifier": {"classid": "urn", "classname": "urn", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "http://en.wikipedia.org/wiki/Deutsche_Forschungsgemeinschaft"}, {"qualifier": {"classid": "mag_id", "classname": "Microsoft Academic Graph Identifier", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "https://academic.microsoft.com/#/detail/87707601"}, {"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.424150.6"}], "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}], "organization": {"metadata": {"websiteurl": {"value": "http://www.dfg.de/"}, "legalname": {"value": "Deutsche Forschungsgemeinschaft"}}}, "type": 20, "id": "20|microsoft___::e2edddabcc31b692b4ca7b89456e750a"}
|
||||
{"dateoftransformation": "2018-08-08", "originalId": ["corda__h2020::999547462"], "collectedfrom": [{"value": "CORDA - COmmon Research DAta Warehouse - Horizon 2020", "key": "10|openaire____::a55eb91348674d853191f4f4fd73d078"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "DFG"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "true"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.dfg.de"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "DEUTSCHE FORSCHUNGSGEMEINSCHAFT"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-01-21", "type": 20, "id": "20|corda__h2020::3f41cfb7d56cfea69f3ce9792b822eb4"}
|
||||
{"dateoftransformation": "2018-06-04", "originalId": ["re3data_____::64ef0759fcfccf84cca028ba3c21aa23"], "collectedfrom": [{"value": "Registry of Research Data Repository", "key": "10|openaire____::21f8a223b9925c2f87c404096080b046"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "Deutsche Forschungsgemeinschaft"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.dfg.de/en/index.jsp"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "German Research Foundation"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-01-07", "type": 20, "id": "20|re3data_____::e029b7e0de6cafc0c7126615c65458f0"}
|
||||
{"dateoftransformation": "2018-06-04", "originalId": ["re3data_____::37e3bba353f88b4649d459c698483f6e"], "collectedfrom": [{"value": "Registry of Research Data Repository", "key": "10|openaire____::21f8a223b9925c2f87c404096080b046"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "Deutsche Forschungsgemeinschaft"}, "ecresearchorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.dfg.de/en/index.jsp"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "German Research Association"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-01-07", "type": 20, "id": "20|re3data_____::2080dc170e6cd7c6c06f403f8a08c1be"}
|
||||
{"collectedfrom": [{"value": "GRID - Global Research Identifier Database", "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977"}], "organization": {"metadata": {"legalshortname": {"value": "DFG"}, "websiteurl": {"value": "http://www.dfg.de/en/"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "legalname": {"value": "DFG"}}}, "pid": [{"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.424150.6"}], "id": "20|grid________::085fd89ec6f3f92c354e0bc027de2a58", "type": 20}
|
||||
{"collectedfrom": [{"value": "GRID - Global Research Identifier Database", "key": "10|openaire____::ff4a008470319a22d9cf3d14af485977"}], "organization": {"metadata": {"legalshortname": {"value": "DFG"}, "websiteurl": {"value": "http://www.dfg.de/en/"}, "country": {"classid": "DE", "classname": "Germany", "schemename": "dnet:countries", "schemeid": "dnet:countries"}, "legalname": {"value": "German Research Foundation"}}}, "pid": [{"qualifier": {"classid": "grid", "classname": "grid", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "grid.424150.6"}], "id": "20|grid________::f0d88189673738d2a565aff99eeb59a2", "type": 20}
|
||||
{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Humboldt_State_University"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.humboldt.edu/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt State University"},"country":{"classid":"US","classname":"United States","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::9c3522c59aef0edab19b8a3f0aeb39ed"}
|
||||
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::9758583A-FF1E-41C4-9176-B875E8FAC110"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt State University"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-04","type":20,"id":"20|rcuk________::7715018b4838eaf1d57242c788e222d4"}
|
||||
{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Humboldt-Universität_zu_Berlin"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"HU"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.hu-berlin.de/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität zu Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::13ab9ef517038d3751f4b0e31aea9ac7"}
|
||||
{"dateoftransformation":"2018-09-27","originalId":["re3data_____::678d9d5a712331f6e2fce7b7b764090f"],"collectedfrom":[{"value":"Registry of Research Data Repository","key":"10|openaire____::21f8a223b9925c2f87c404096080b046"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.hu-berlin.de/de/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-27","type":20,"id":"20|re3data_____::aeb488fd15eb1be77b998b5602450910"}
|
||||
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::CFF4C944-5CF1-4AE3-8C03-BE361D6DEDC3"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt University Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::ff1bccdf9520b3fadd2fc26103231de0"}
|
||||
{"dateoftransformation":"2018-09-27","originalId":["re3data_____::4bda5f07be19914ce8e2e4652a72151c"],"collectedfrom":[{"value":"Registry of Research Data Repository","key":"10|openaire____::21f8a223b9925c2f87c404096080b046"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"https://www.hu-berlin.de/de"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität zu Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-27","type":20,"id":"20|re3data_____::d72a4f4665f7df9b48a22d4cfde0dd3c"}
|
||||
{"dateoftransformation":"2018-09-13","originalId":["nih_________::HUMBOLDT_STATE_UNIVERSITY"],"collectedfrom":[{"value":"NIH - National Institutes of Health","key":"10|openaire____::9e9e8c76d739212c63eff362e321ba33"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"HUMBOLDT STATE UNIVERSITY"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-07-14","type":20,"id":"20|nih_________::8aec7ec3198fc69ce74e24b8f6aa9a59"}
|
||||
{"dateoftransformation":"2018-09-19","originalId":["doajarticles::Humboldt-Universität_zu_Berlin"],"collectedfrom":[{"value":"DOAJ-Articles","key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Humboldt-Universität zu Berlin"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Humboldt-Universität zu Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-09-19","type":20,"id":"20|doajarticles::13ab9ef517038d3751f4b0e31aea9ac7"}
|
||||
{"dateoftransformation":"2018-11-20","originalId":["corda_______::999850781"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse","key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UBER"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.hu-berlin.de"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"HUMBOLDT-UNIVERSITAT ZU BERLIN"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda_______::d999b4c2dc81ccd40100056ab0543088"}
|
||||
{"dateoftransformation":"2018-12-15","originalId":["corda__h2020::999850781"],"collectedfrom":[{"value":"CORDA - COmmon Research DAta Warehouse - Horizon 2020","key":"10|openaire____::a55eb91348674d853191f4f4fd73d078"}],"organization":{"metadata":{"eclegalbody":{"value":"true"},"eclegalperson":{"value":"true"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"UBER"},"ecresearchorganization":{"value":"true"},"ecnonprofit":{"value":"true"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.hu-berlin.de"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"HUMBOLDT-UNIVERSITAET ZU BERLIN"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"true"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-03-12","type":20,"id":"20|corda__h2020::d999b4c2dc81ccd40100056ab0543088"}
|
||||
{"dateoftransformation": "2019-05-19", "originalId": ["rcuk________::9169966C-E38A-41D7-AF04-F7470963CBED"], "collectedfrom": [{"key": "10|openaire____::ab2d3310741ea80d3b8726f651502858", "value": "Research Councils UK"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Institute of Physics"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2017-11-04", "type": 20, "id": "20|rcuk________::3eb464c9a21582d7dbb3f115710d863c"}
|
||||
{"dateoftransformation": "2019-05-19", "originalId": ["rcuk________::CEEF86B3-BB98-4CAE-848D-00837C745DEC"], "collectedfrom": [{"key": "10|openaire____::ab2d3310741ea80d3b8726f651502858", "value": "Research Councils UK"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Yerevan Physics Institute"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2017-11-03", "type": 20, "id": "20|rcuk________::f8790ac2aa4eb6fb7cc2980eb7971ee6"}
|
||||
{"dateoftransformation": "2018-09-13", "originalId": ["opendoar____::Aalto_University"], "collectedfrom": [{"key": "10|openaire____::47ce9e9f4fad46e732cff06419ecaabb", "value": "OpenDOAR"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "websiteurl": {"value": "http://www.aalto.fi/en/"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Aalto University"}, "country": {"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2015-08-24", "type": 20, "id": "20|opendoar____::98845925f422ef4987294d6bfac525dd"}
|
||||
{"dateoftransformation": "2018-09-13", "originalId": ["nsf_________::Institute_of_Physics"], "collectedfrom": [{"key": "10|openaire____::dd69b4a1513c9de9f46faf24048da1e8", "value": "NSF - National Science Foundation"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Institute of Physics"}, "country": {"classid": "YU", "classname": "Yugoslavia", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2016-03-10", "type": 20, "id": "20|nsf_________::d540e5e89b1ace31d5dd0b8f658056ec"}
|
||||
{"dateoftransformation": "2019-05-19", "originalId": ["rcuk________::EE4840D3-84C0-47A3-9109-30F67D0D550F"], "collectedfrom": [{"key": "10|openaire____::ab2d3310741ea80d3b8726f651502858", "value": "Research Councils UK"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Aalto University"}, "country": {"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2017-11-03", "type": 20, "id": "20|rcuk________::7559c8bbff5125d74919775a1f290496"}
|
||||
{"dateoftransformation": "2019-05-29", "originalId": ["irb_hr______::Institute of Physics, Zagreb"], "collectedfrom": [{"key": "10|openaire____::db600878200645bd752cf7fd96a37df5", "value": "Rudjer Boskovic Institute Library - Croatian Projects"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Institute of Physics, Zagreb"}, "country": {"classid": "HR", "classname": "Croatia", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2018-06-15", "type": 20, "id": "20|irb_hr______::d0147c5dfa57d00b5bbd8405366d5ed9"}
|
||||
{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::918297740"], "collectedfrom": [{"key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f", "value": "CORDA - COmmon Research DAta Warehouse"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "ecnonprofit": {"value": "true"}, "ecresearchorganization": {"value": "false"}, "websiteurl": {"value": "http://www.ifv.nl"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "country": {"classid": "NL", "classname": "Netherlands", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::490e6333fc4b5b2f0bfbb94875b57911"}
|
||||
{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::987994083"], "collectedfrom": [{"key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f", "value": "CORDA - COmmon Research DAta Warehouse"}], "organization": {"metadata": {"eclegalbody": {"value": "true"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "ecnonprofit": {"value": "true"}, "ecresearchorganization": {"value": "false"}, "websiteurl": {"value": "http://www.ifv.nl"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "INSTITUUT FYSIEKE VEILIGHEID"}, "country": {"classid": "NL", "classname": "Netherlands", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::3ace7e70172b7ddce2ffc8db335e7cd3"}
|
||||
{"dateoftransformation": "2018-11-20", "originalId": ["corda_______::999637672"], "collectedfrom": [{"key": "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f", "value": "CORDA - COmmon Research DAta Warehouse"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "true"}, "ecinternationalorganization": {"value": "false"}, "legalshortname": {"value": "ANL"}, "ecnonprofit": {"value": "true"}, "ecresearchorganization": {"value": "true"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "A I ALIKHANYAN NATIONAL SCIENCE LABORATORY"}, "country": {"classid": "AM", "classname": "Armenia", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}}}, "dateofcollection": "2018-03-12", "type": 20, "id": "20|corda_______::f76c86a31f38609cd3b7930279d9c7c6"}
|
||||
{"dateoftransformation": "2019-04-16", "originalId": ["aka_________::3117bf00abc3330b48bb270494d46ce4"], "collectedfrom": [{"key": "10|openaire____::6ac933301a3933c8a22ceebea7000326", "value": "Academy of Finland"}], "organization": {"metadata": {"eclegalbody": {"value": "false"}, "eclegalperson": {"value": "false"}, "ecinternationalorganization": {"value": "false"}, "ecnonprofit": {"value": "false"}, "ecresearchorganization": {"value": "false"}, "ecenterprise": {"value": "false"}, "ecnutscode": {"value": "false"}, "ecinternationalorganizationeurinterests": {"value": "false"}, "legalname": {"value": "Aalto University"}, "country": {"classid": "FI", "classname": "Finland", "schemeid": "dnet:countries", "schemename": "dnet:countries"}, "echighereducation": {"value": "false"}, "ecsmevalidated": {"value": "false"}}}, "dateofcollection": "2019-01-25", "type": 20, "id": "20|aka_________::c32beace3046af7a121b15237b1e4747"}
|
||||
{"dateoftransformation":"2019-05-04","originalId":["opendoar____::Free_University_of_Bozen_-_Bolzano"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.unibz.it"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Free University of Bozen - Bolzano"},"country":{"classid":"IT","classname":"Italy","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2019-05-04","type":20,"id":"20|opendoar____::c230b60ca1a8a95150c3163e40899e5d"}
|
||||
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::E7C60D41-51F7-4C46-89DC-4E8F6D7DC64B"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Free University of Bozen-Bolzano"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-07","type":20,"id":"20|rcuk________::47a7d6a960f874fdd7c2678f16276cbf"}
|
||||
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::69417031-F8F1-4557-BF08-49096CDBF321"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"University of the Free State"},"country":{"classid":"ZA","classname":"South Africa","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::91d972791a1a3945078724a4ede959d4"}
|
||||
{"dateoftransformation":"2019-05-19","originalId":["rcuk________::9218106A-E8CE-46A5-AABC-B4C8ED148690"],"collectedfrom":[{"value":"Research Councils UK","key":"10|openaire____::ab2d3310741ea80d3b8726f651502858"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"University of Amsterdam"},"country":{"classid":"NL","classname":"Netherlands","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2017-11-03","type":20,"id":"20|rcuk________::77c8206d9739a62c542db14a00d51fc9"}
|
||||
{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Université_Libre_de_Bruxelles"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.ulb.ac.be/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Université Libre de Bruxelles"},"country":{"classid":"BE","classname":"Belgium","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::4348f2277945e85ff4fa371c89d5209e"}
|
||||
{"dateoftransformation":"2018-09-13","originalId":["nsf_________::Liberty_University__Inc_"],"collectedfrom":[{"value":"NSF - National Science Foundation","key":"10|openaire____::dd69b4a1513c9de9f46faf24048da1e8"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Liberty University, Inc."},"country":{"classid":"US","classname":"United States","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-03-10","type":20,"id":"20|nsf_________::ef77f1e8314313a4d53ec4f19054b733"}
|
||||
{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Vrije_Universiteit_Amsterdam"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"VU"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.vu.nl/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Vrije Universiteit Amsterdam"},"country":{"classid":"NL","classname":"Netherlands","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2015-08-24","type":20,"id":"20|opendoar____::40e0928728ca1ea6ebb147ad307fc7db"}
|
||||
{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Burgas_Free_University"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"legalshortname":{"value":"Бургаски свободен университет"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.bfu.bg/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Burgas Free University"},"country":{"classid":"BG","classname":"Bulgaria","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::28a99bd2330504b0dfb6c44192757bde"}
|
||||
{"dateoftransformation":"2018-09-13","originalId":["opendoar____::Université_libre_de_Bruxelles"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecenterprise":{"value":"false"},"websiteurl":{"value":"http://www.ulb.ac.be/"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Université libre de Bruxelles"},"country":{"classid":"BE","classname":"Belgium","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2016-07-21","type":20,"id":"20|opendoar____::0e3d292f95a8f13fed04d7b3ac872b9f"}
|
||||
{"dateoftransformation":"2018-11-12","originalId":["opendoar____::Freie_Universitat_Berlin"],"collectedfrom":[{"value":"OpenDOAR","key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb"}],"organization":{"metadata":{"eclegalbody":{"value":"false"},"eclegalperson":{"value":"false"},"ecinternationalorganization":{"value":"false"},"ecnonprofit":{"value":"false"},"ecresearchorganization":{"value":"false"},"ecenterprise":{"value":"false"},"ecnutscode":{"value":"false"},"ecinternationalorganizationeurinterests":{"value":"false"},"legalname":{"value":"Freie Universitat Berlin"},"country":{"classid":"DE","classname":"Germany","schemename":"dnet:countries","schemeid":"dnet:countries"},"echighereducation":{"value":"false"},"ecsmevalidated":{"value":"false"}}},"dateofcollection":"2018-11-12","type":20,"id":"20|opendoar____::5054b113a655361d929493a95d29e6f1"}
|
|
@ -0,0 +1,40 @@
|
|||
{
|
||||
"wf" : {
|
||||
"threshold" : "0.99",
|
||||
"dedupRun" : "001",
|
||||
"entityType" : "result",
|
||||
"subEntityType" : "resulttype",
|
||||
"subEntityValue" : "software",
|
||||
"orderField" : "title",
|
||||
"queueMaxSize" : "2000",
|
||||
"groupMaxSize" : "10",
|
||||
"slidingWindowSize" : "200",
|
||||
"rootBuilder" : [ "result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments", "resultOrganization_affiliation_isAffiliatedWith", "resultResult_part_hasPart", "resultResult_part_isPartOf", "resultResult_supplement_isSupplementTo", "resultResult_supplement_isSupplementedBy", "resultResult_version_isVersionOf" ],
|
||||
"includeChildren" : "true"
|
||||
},
|
||||
"pace" : {
|
||||
"clustering" : [
|
||||
{ "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} },
|
||||
{ "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } },
|
||||
{ "name" : "lowercase", "fields" : [ "doi" ], "params" : { } },
|
||||
{ "name" : "urlclustering", "fields": [ "url" ], "params" : { } }
|
||||
],
|
||||
"strictConditions" : [
|
||||
{ "name" : "doiExactMatch", "fields": [ "doi" ] },
|
||||
{ "name" : "exactMatch", "fields" : [ "url", "documentationUrl" ] }
|
||||
],
|
||||
"conditions" : [
|
||||
{ "name" : "exactMatch", "fields" : ["resulttype"] }
|
||||
],
|
||||
"model" : [
|
||||
{ "name" : "doi", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {doi}]/value" },
|
||||
{ "name" : "title", "algo" : "LevensteinTitleIgnoreVersion", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "result/metadata/title[qualifier#classid = {main title}]/value", "length" : 250, "size" : 5 },
|
||||
{ "name" : "url", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "result/instance/url" },
|
||||
{ "name" : "resulttype", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "false", "path" : "result/metadata/resulttype/classid" },
|
||||
{ "name" : "documentationUrl", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "false", "path" : "result/metadata/documentationUrl/value" }
|
||||
],
|
||||
"blacklists" : {
|
||||
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
|
@ -1,12 +1,11 @@
|
|||
package eu.dnetlib.pace;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.oozie.client.OozieClient;
|
||||
import org.apache.oozie.client.OozieClientException;
|
||||
import org.apache.oozie.client.WorkflowJob;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.*;
|
||||
import java.io.IOException;
|
||||
import java.util.Properties;
|
||||
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
|
@ -28,14 +27,15 @@ public class DedupTestIT {
|
|||
conf.setProperty(OozieClient.APP_PATH, "hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/michele.debonis/oozieJob/workflow.xml");
|
||||
conf.setProperty(OozieClient.USER_NAME, "michele.debonis");
|
||||
conf.setProperty("oozie.action.sharelib.for.spark", "spark2");
|
||||
conf.setProperty("oozie.use.system.libpath", "true");
|
||||
|
||||
// setting workflow parameters
|
||||
conf.setProperty("jobTracker", "hadoop-rm3.garr-pa1.d4science.org:8032");
|
||||
conf.setProperty("nameNode", "hdfs://hadoop-rm1.garr-pa1.d4science.org:8020");
|
||||
conf.setProperty("dedupConfiguration", prop.getProperty("dedup.configuration"));
|
||||
conf.setProperty("inputSpace", prop.getProperty("input.space"));
|
||||
// conf.setProperty("inputDir", "/usr/tucu/inputdir");
|
||||
// conf.setProperty("outputDir", "/usr/tucu/outputdir");
|
||||
conf.setProperty("outputPath", prop.getProperty("output"));
|
||||
conf.setProperty("statisticsPath", prop.getProperty("dedup.statistics"));
|
||||
|
||||
// submit and start the workflow job
|
||||
String jobId = wc.run(conf);
|
||||
|
@ -49,9 +49,10 @@ public class DedupTestIT {
|
|||
|
||||
// print the final status of the workflow job
|
||||
System.out.println(wc.getJobInfo(jobId));
|
||||
System.out.println("JOB LOG = " + wc.getJobLog(jobId));
|
||||
// System.out.println("JOB LOG = " + wc.getJobLog(jobId));
|
||||
|
||||
assertEquals(WorkflowJob.Status.SUCCEEDED, wc.getJobInfo(jobId).getStatus());
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -66,14 +67,4 @@ public class DedupTestIT {
|
|||
return prop;
|
||||
}
|
||||
|
||||
static String readFromClasspath(final String filename) {
|
||||
final StringWriter sw = new StringWriter();
|
||||
try {
|
||||
IOUtils.copy(DedupTestIT.class.getResourceAsStream(filename), sw);
|
||||
return sw.toString();
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeException("cannot load resource from classpath: " + filename);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
package eu.dnetlib.pace;
|
||||
|
||||
import eu.dnetlib.SparkLocalTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class SparkTester {
|
||||
|
||||
@Test
|
||||
public void sparkLocalTest() throws IOException {
|
||||
|
||||
SparkLocalTest.main(new String[]{});
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -1,2 +1,3 @@
|
|||
input.space = /eu/dnetlib/pace/organization.to.fix.json
|
||||
dedup.configuration = /eu/dnetlib/pace/org.curr.conf
|
||||
input.space = oozieJob/inputSpace/organization.to.fix.json
|
||||
dedup.configuration = oozieJob/dedupConfig/org.curr.conf
|
||||
output.base.name = oozieJob/output/orgdedup
|
|
@ -22,12 +22,17 @@ public class UrlClustering extends AbstractPaceFunctions implements ClusteringFu
|
|||
|
||||
@Override
|
||||
public Collection<String> apply(List<Field> fields) {
|
||||
return fields.stream()
|
||||
.filter(f -> !f.isEmpty())
|
||||
.map(Field::stringValue)
|
||||
.map(this::asUrl)
|
||||
.map(URL::getHost)
|
||||
.collect(Collectors.toCollection(HashSet::new));
|
||||
try {
|
||||
return fields.stream()
|
||||
.filter(f -> !f.isEmpty())
|
||||
.map(Field::stringValue)
|
||||
.map(this::asUrl)
|
||||
.map(URL::getHost)
|
||||
.collect(Collectors.toCollection(HashSet::new));
|
||||
}
|
||||
catch (IllegalStateException e){
|
||||
return new HashSet<>();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -35,7 +40,7 @@ public class UrlClustering extends AbstractPaceFunctions implements ClusteringFu
|
|||
return null;
|
||||
}
|
||||
|
||||
private URL asUrl(final String value) {
|
||||
private URL asUrl(String value) {
|
||||
try {
|
||||
return new URL(value);
|
||||
} catch (MalformedURLException e) {
|
||||
|
@ -44,4 +49,5 @@ public class UrlClustering extends AbstractPaceFunctions implements ClusteringFu
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -35,11 +35,32 @@ public abstract class AbstractCondition extends AbstractPaceFunctions implements
|
|||
final Field va = a.values(fd.getName());
|
||||
final Field vb = b.values(fd.getName());
|
||||
|
||||
if ((va.isEmpty() || vb.isEmpty()) && fd.isIgnoreMissing()) {
|
||||
res.put(fd.getName(), new ConditionEval(cond, va, vb, 0));
|
||||
} else {
|
||||
if (fd.isIgnoreMissing()) {
|
||||
res.put(fd.getName(), verify(fd, va, vb));
|
||||
}
|
||||
else {
|
||||
if (va.isEmpty() || vb.isEmpty()) {
|
||||
res.put(fd.getName(), new ConditionEval(cond, va, vb, -1));
|
||||
}
|
||||
else {
|
||||
res.put(fd.getName(), verify(fd, va, vb));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// // if ignoreMissing=true always return undefined (0) in case of missing
|
||||
// if ((va.isEmpty() || vb.isEmpty()) && fd.isIgnoreMissing()) {
|
||||
// res.put(fd.getName(), new ConditionEval(cond, va, vb, 0));
|
||||
// } else {
|
||||
// if (va.isEmpty()&&vb.isEmpty()) {
|
||||
// res.put(fd.getName(), new ConditionEval(cond, va, vb, -1));
|
||||
// }
|
||||
// else {
|
||||
// res.put(fd.getName(), verify(fd, va, vb));
|
||||
// }
|
||||
// }
|
||||
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
|
|
@ -21,6 +21,8 @@ public class DomainExactMatch extends ExactMatchIgnoreCase {
|
|||
|
||||
private URL asUrl(final String value) {
|
||||
try {
|
||||
if (value.isEmpty())
|
||||
return new URL("http://");
|
||||
return new URL(value);
|
||||
} catch (MalformedURLException e) {
|
||||
// should not happen as checked by pace typing
|
||||
|
|
|
@ -27,7 +27,14 @@ public class ExactMatch extends AbstractCondition {
|
|||
|
||||
int res;
|
||||
|
||||
if (StringUtils.isBlank(fa) && StringUtils.isBlank(fb)) {
|
||||
// if (StringUtils.isBlank(fa) && StringUtils.isBlank(fb)) {
|
||||
// res = 0;
|
||||
// } else {
|
||||
// res = fa.equals(fb) ? 1 : -1;
|
||||
// }
|
||||
|
||||
//if there is a blank, undefined result
|
||||
if (StringUtils.isBlank(fa) || StringUtils.isBlank(fb)) {
|
||||
res = 0;
|
||||
} else {
|
||||
res = fa.equals(fb) ? 1 : -1;
|
||||
|
|
|
@ -5,6 +5,7 @@ import java.util.List;
|
|||
import eu.dnetlib.pace.distance.eval.ConditionEval;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.model.FieldDef;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
/**
|
||||
* The Class ExactMatch.
|
||||
|
@ -24,7 +25,15 @@ public class ExactMatchIgnoreCase extends AbstractCondition {
|
|||
final String fa = getValue(a);
|
||||
final String fb = getValue(b);
|
||||
|
||||
return new ConditionEval(cond, a, b, fa.equalsIgnoreCase(fb) ? 1 : -1);
|
||||
int res;
|
||||
|
||||
if (StringUtils.isBlank(fa) || StringUtils.isBlank(fb)) {
|
||||
res = 0;
|
||||
} else {
|
||||
res = fa.equalsIgnoreCase(fb) ? 1 : -1;
|
||||
}
|
||||
|
||||
return new ConditionEval(cond, a, b, res);
|
||||
}
|
||||
|
||||
protected String getValue(final Field f) {
|
||||
|
|
|
@ -6,7 +6,6 @@ import eu.dnetlib.pace.condition.ConditionAlgo;
|
|||
import eu.dnetlib.pace.model.ClusteringDef;
|
||||
import eu.dnetlib.pace.model.CondDef;
|
||||
import eu.dnetlib.pace.model.FieldDef;
|
||||
import eu.dnetlib.pace.model.TreeNodeDef;
|
||||
import eu.dnetlib.pace.util.PaceResolver;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.codehaus.jackson.annotate.JsonIgnore;
|
||||
|
@ -24,11 +23,9 @@ public class PaceConfig implements Serializable {
|
|||
private List<ClusteringDef> clustering;
|
||||
private Map<String, List<String>> blacklists;
|
||||
|
||||
private Map<String, TreeNodeDef> decisionTree;
|
||||
|
||||
private Map<String, FieldDef> modelMap;
|
||||
|
||||
public static PaceResolver paceResolver;
|
||||
// public PaceResolver paceResolver;
|
||||
|
||||
public PaceConfig() {}
|
||||
|
||||
|
@ -38,7 +35,7 @@ public class PaceConfig implements Serializable {
|
|||
modelMap.put(fd.getName(), fd);
|
||||
}
|
||||
|
||||
paceResolver = new PaceResolver();
|
||||
// paceResolver = new PaceResolver();
|
||||
}
|
||||
|
||||
public List<FieldDef> getModel() {
|
||||
|
@ -61,14 +58,6 @@ public class PaceConfig implements Serializable {
|
|||
return conditions;
|
||||
}
|
||||
|
||||
public Map<String, TreeNodeDef> getDecisionTree() {
|
||||
return decisionTree;
|
||||
}
|
||||
|
||||
public void setDecisionTree(Map<String, TreeNodeDef> decisionTree) {
|
||||
this.decisionTree = decisionTree;
|
||||
}
|
||||
|
||||
@JsonIgnore
|
||||
public List<ConditionAlgo> getConditionAlgos() {
|
||||
return asConditionAlgos(getConditions());
|
||||
|
|
|
@ -32,6 +32,9 @@ public class LevensteinTitleIgnoreVersion extends SecondStringDistanceAlgo {
|
|||
ca = ca.replaceAll("\\d", "").replaceAll(getRomans(ca), "").trim();
|
||||
cb = cb.replaceAll("\\d", "").replaceAll(getRomans(cb), "").trim();
|
||||
|
||||
ca = filterAllStopWords(ca);
|
||||
cb = filterAllStopWords(cb);
|
||||
|
||||
final String cca = finalCleanup(ca);
|
||||
final String ccb = finalCleanup(cb);
|
||||
|
||||
|
|
|
@ -2,12 +2,17 @@ package eu.dnetlib.pace.model;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import eu.dnetlib.pace.clustering.*;
|
||||
import eu.dnetlib.pace.config.PaceConfig;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import eu.dnetlib.pace.util.PaceResolver;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
public class ClusteringDef implements Serializable {
|
||||
|
@ -18,6 +23,8 @@ public class ClusteringDef implements Serializable {
|
|||
|
||||
private Map<String, Integer> params;
|
||||
|
||||
PaceResolver paceResolver = new PaceResolver();
|
||||
|
||||
public ClusteringDef() {}
|
||||
|
||||
public String getName() {
|
||||
|
@ -30,7 +37,7 @@ public class ClusteringDef implements Serializable {
|
|||
|
||||
public ClusteringFunction clusteringFunction() {
|
||||
try {
|
||||
return PaceConfig.paceResolver.getClusteringFunction(getName(), params);
|
||||
return paceResolver.getClusteringFunction(getName(), params);
|
||||
} catch (PaceException e) {
|
||||
e.printStackTrace();
|
||||
return null;
|
||||
|
|
|
@ -7,6 +7,7 @@ import java.util.List;
|
|||
import eu.dnetlib.pace.condition.*;
|
||||
import eu.dnetlib.pace.config.PaceConfig;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import eu.dnetlib.pace.util.PaceResolver;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
public class CondDef implements Serializable {
|
||||
|
@ -15,10 +16,12 @@ public class CondDef implements Serializable {
|
|||
|
||||
private List<String> fields;
|
||||
|
||||
PaceResolver paceResolver = new PaceResolver();
|
||||
|
||||
public CondDef() {}
|
||||
|
||||
public ConditionAlgo conditionAlgo(final List<FieldDef> fields){
|
||||
return PaceConfig.paceResolver.getConditionAlgo(getName(), fields);
|
||||
return paceResolver.getConditionAlgo(getName(), fields);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
|
|
|
@ -6,6 +6,7 @@ import com.google.gson.Gson;
|
|||
import eu.dnetlib.pace.config.PaceConfig;
|
||||
import eu.dnetlib.pace.config.Type;
|
||||
import eu.dnetlib.pace.distance.DistanceAlgo;
|
||||
import eu.dnetlib.pace.util.PaceResolver;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
|
@ -33,6 +34,8 @@ public class FieldDef implements Serializable {
|
|||
|
||||
private double weight;
|
||||
|
||||
PaceResolver paceResolver = new PaceResolver();
|
||||
|
||||
/**
|
||||
* Sets maximum size for the repeatable fields in the model. -1 for unbounded size.
|
||||
*/
|
||||
|
@ -85,7 +88,7 @@ public class FieldDef implements Serializable {
|
|||
params.put("length", getLength());
|
||||
*/
|
||||
params.put("weight", getWeight());
|
||||
return PaceConfig.paceResolver.getDistanceAlgo(getAlgo(), params);
|
||||
return paceResolver.getDistanceAlgo(getAlgo(), params);
|
||||
}
|
||||
|
||||
public boolean isIgnoreMissing() {
|
||||
|
|
|
@ -1,145 +0,0 @@
|
|||
package eu.dnetlib.pace.model;
|
||||
|
||||
import eu.dnetlib.pace.config.PaceConfig;
|
||||
import eu.dnetlib.pace.tree.Comparator;
|
||||
import eu.dnetlib.pace.tree.support.AggType;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
public class TreeNodeDef implements Serializable {
|
||||
|
||||
private List<FieldConf> fields; //list of fields involved in the tree node (contains comparators to be used and field on which apply the comparator)
|
||||
private AggType aggregation; //how to aggregate similarity measures for every field
|
||||
|
||||
private double threshold; //threshold on the similarity measure
|
||||
|
||||
private String positive; //specifies the next node in case of positive result: similarity>=th
|
||||
private String negative; //specifies the next node in case of negative result: similarity<th
|
||||
private String undefined; //specifies the next node in case of undefined result: similarity=-1
|
||||
|
||||
boolean ignoreMissing = true; //specifies what to do in case of missing field
|
||||
|
||||
public TreeNodeDef() {
|
||||
}
|
||||
|
||||
//compute the similarity measure between two documents
|
||||
public double evaluate(MapDocument doc1, MapDocument doc2) {
|
||||
|
||||
DescriptiveStatistics stats = new DescriptiveStatistics();
|
||||
|
||||
for (FieldConf fieldConf : fields) {
|
||||
|
||||
double weight = fieldConf.getWeight();
|
||||
|
||||
double similarity = comparator(fieldConf).compare(doc1.getFieldMap().get(fieldConf.getField()), doc2.getFieldMap().get(fieldConf.getField()));
|
||||
|
||||
//if similarity is -1 means that a comparator gave undefined, do not add result to the stats
|
||||
if (similarity != -1) {
|
||||
stats.addValue(weight * similarity);
|
||||
}
|
||||
else {
|
||||
if (!ignoreMissing) //if the missing value has not to be ignored, return -1
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
switch (aggregation){
|
||||
|
||||
case AVG:
|
||||
return stats.getMean();
|
||||
case SUM:
|
||||
return stats.getSum();
|
||||
case MAX:
|
||||
return stats.getMax();
|
||||
case MIN:
|
||||
return stats.getMin();
|
||||
default:
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private Comparator comparator(final FieldConf field){
|
||||
|
||||
return PaceConfig.paceResolver.getComparator(field.getComparator(), field.getParams());
|
||||
}
|
||||
|
||||
public TreeNodeDef(List<FieldConf> fields, double threshold, AggType aggregation, String positive, String negative, String undefined) {
|
||||
this.fields = fields;
|
||||
this.threshold = threshold;
|
||||
this.aggregation = aggregation;
|
||||
this.positive = positive;
|
||||
this.negative = negative;
|
||||
this.undefined = undefined;
|
||||
}
|
||||
|
||||
public boolean isIgnoreMissing() {
|
||||
return ignoreMissing;
|
||||
}
|
||||
|
||||
public void setIgnoreMissing(boolean ignoreMissing) {
|
||||
this.ignoreMissing = ignoreMissing;
|
||||
}
|
||||
|
||||
public List<FieldConf> getFields() {
|
||||
return fields;
|
||||
}
|
||||
|
||||
public void setFields(List<FieldConf> fields) {
|
||||
this.fields = fields;
|
||||
}
|
||||
|
||||
public double getThreshold() {
|
||||
return threshold;
|
||||
}
|
||||
|
||||
public void setThreshold(double threshold) {
|
||||
this.threshold = threshold;
|
||||
}
|
||||
|
||||
public AggType getAggregation() {
|
||||
return aggregation;
|
||||
}
|
||||
|
||||
public void setAggregation(AggType aggregation) {
|
||||
this.aggregation = aggregation;
|
||||
}
|
||||
|
||||
public String getPositive() {
|
||||
return positive;
|
||||
}
|
||||
|
||||
public void setPositive(String positive) {
|
||||
this.positive = positive;
|
||||
}
|
||||
|
||||
public String getNegative() {
|
||||
return negative;
|
||||
}
|
||||
|
||||
public void setNegative(String negative) {
|
||||
this.negative = negative;
|
||||
}
|
||||
|
||||
public String getUndefined() {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
public void setUndefined(String undefined) {
|
||||
this.undefined = undefined;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
try {
|
||||
return new ObjectMapper().writeValueAsString(this);
|
||||
} catch (IOException e) {
|
||||
throw new PaceException("Impossible to convert to JSON: ", e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
abstract class AbstractComparator implements Comparator {
|
||||
|
||||
Map<String, Number> params;
|
||||
|
||||
public AbstractComparator(Map<String, Number> params){
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double compare(Field a, Field b) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
public static double stringSimilarity(String s1, String s2) {
|
||||
String longer = s1, shorter = s2;
|
||||
if (s1.length() < s2.length()) { // longer should always have greater length
|
||||
longer = s2; shorter = s1;
|
||||
}
|
||||
int longerLength = longer.length();
|
||||
if (longerLength == 0) //if strings have 0 length return 0 (no similarity)
|
||||
return 0.0;
|
||||
|
||||
return (longerLength - StringUtils.getLevenshteinDistance(longer, shorter)) / (double) longerLength;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.model.FieldList;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@ComparatorClass("coauthorsMatch")
|
||||
public class CoauthorsMatch extends AbstractComparator {
|
||||
|
||||
public CoauthorsMatch(Map<String, Number> params) {
|
||||
super(params);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double compare(Field a, Field b) {
|
||||
|
||||
final List<String> c1 = ((FieldList) a).stringList();
|
||||
final List<String> c2 = ((FieldList) b).stringList();
|
||||
|
||||
int size1 = c1.size();
|
||||
int size2 = c2.size();
|
||||
|
||||
//few coauthors or too many coauthors
|
||||
if (size1 < params.getOrDefault("minCoauthors", 5).intValue() || size2 < params.getOrDefault("minCoauthors", 5).intValue() || (size1+size2 > params.getOrDefault("maxCoauthors", 200).intValue()))
|
||||
return -1;
|
||||
|
||||
int coauthorship = 0;
|
||||
for (String ca1: c1){
|
||||
|
||||
for (String ca2: c2){
|
||||
|
||||
if (stringSimilarity(ca1.replaceAll("\\.","").replaceAll(" ",""), ca2.replaceAll("\\.","").replaceAll(" ",""))>= params.getOrDefault("simTh", 0.7).doubleValue())
|
||||
coauthorship++;
|
||||
}
|
||||
}
|
||||
|
||||
return coauthorship;
|
||||
|
||||
}
|
||||
}
|
|
@ -1,10 +0,0 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
|
||||
public interface Comparator {
|
||||
|
||||
//compare two fields and returns: the distace measure, -1 if undefined
|
||||
public double compare(Field a, Field b);
|
||||
|
||||
}
|
|
@ -1,14 +0,0 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import java.lang.annotation.ElementType;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
import java.lang.annotation.Target;
|
||||
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface ComparatorClass {
|
||||
|
||||
public String value();
|
||||
}
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
@ComparatorClass("exactMatch")
|
||||
public class ExactMatch extends AbstractComparator {
|
||||
|
||||
public ExactMatch(Map<String, Number> params) {
|
||||
super(params);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double compare(Field a, Field b) {
|
||||
|
||||
if (a.stringValue().isEmpty() || b.stringValue().isEmpty())
|
||||
return -1;
|
||||
else if (a.stringValue().equals(b.stringValue()))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
@ComparatorClass("similar")
|
||||
public class SimilarMatch extends AbstractComparator {
|
||||
|
||||
public SimilarMatch(Map<String, Number> params) {
|
||||
super(params);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double compare(Field a, Field b) {
|
||||
|
||||
if (a.stringValue().isEmpty() || b.stringValue().isEmpty())
|
||||
return -1; //undefined if one name is missing
|
||||
|
||||
//take only the first name
|
||||
String firstname1 = a.stringValue().split(" ")[0];
|
||||
String firstname2 = b.stringValue().split(" ")[0];
|
||||
|
||||
if (firstname1.toLowerCase().trim().replaceAll("\\.","").replaceAll("\\s","").length()<=2 || firstname2.toLowerCase().replaceAll("\\.", "").replaceAll("\\s","").length()<=2) //too short names (considered similar)
|
||||
return 1;
|
||||
|
||||
return stringSimilarity(firstname1,firstname2);
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.model.FieldListImpl;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
@ComparatorClass("topicsMatch")
|
||||
public class TopicsMatch extends AbstractComparator {
|
||||
|
||||
public TopicsMatch(Map<String, Number> params) {
|
||||
super(params);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double compare(Field a, Field b) {
|
||||
|
||||
double[] t1 = ((FieldListImpl) a).doubleArray();
|
||||
double[] t2 = ((FieldListImpl) b).doubleArray();
|
||||
|
||||
if (t1 == null || t2 == null)
|
||||
return -1; //0 similarity if no topics in one of the authors or in both
|
||||
|
||||
double area = 0.0;
|
||||
|
||||
double min_value[] = new double[t1.length];
|
||||
for(int i=0; i<t1.length; i++){
|
||||
|
||||
min_value[i] = (t1[i]<t2[i])?t1[i]:t2[i];
|
||||
area += min_value[i];
|
||||
}
|
||||
|
||||
return area;
|
||||
|
||||
}
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.model.FieldList;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@ComparatorClass("undefined")
|
||||
public class UndefinedNode implements Comparator {
|
||||
|
||||
Map<String, Number> params;
|
||||
|
||||
@Override
|
||||
public double compare(Field a, Field b) {
|
||||
|
||||
final List<String> sa = ((FieldList) a).stringList();
|
||||
final List<String> sb = ((FieldList) b).stringList();
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
package eu.dnetlib.pace.tree.support;
|
||||
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
|
||||
public enum AggType {
|
||||
|
||||
AVG,
|
||||
SUM,
|
||||
MAX,
|
||||
MIN;
|
||||
|
||||
public static AggType getEnum(String value) {
|
||||
|
||||
try {
|
||||
return AggType.valueOf(value);
|
||||
}
|
||||
catch (IllegalArgumentException e) {
|
||||
throw new PaceException("Undefined aggregation type", e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
package eu.dnetlib.pace.tree.support;
|
||||
|
||||
public enum MatchType {
|
||||
|
||||
ORCID_MATCH,
|
||||
COAUTHORS_MATCH,
|
||||
TOPICS_MATCH,
|
||||
NO_MATCH,
|
||||
UNDEFINED;
|
||||
|
||||
public static MatchType getEnum(String value) {
|
||||
|
||||
try {
|
||||
return MatchType.valueOf(value);
|
||||
}
|
||||
catch (IllegalArgumentException e) {
|
||||
return MatchType.UNDEFINED;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -7,8 +7,6 @@ import eu.dnetlib.pace.condition.ConditionClass;
|
|||
import eu.dnetlib.pace.distance.DistanceAlgo;
|
||||
import eu.dnetlib.pace.distance.DistanceClass;
|
||||
import eu.dnetlib.pace.model.FieldDef;
|
||||
import eu.dnetlib.pace.tree.Comparator;
|
||||
import eu.dnetlib.pace.tree.ComparatorClass;
|
||||
import org.reflections.Reflections;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
@ -22,7 +20,6 @@ public class PaceResolver implements Serializable {
|
|||
private final Map<String, Class<ClusteringFunction>> clusteringFunctions;
|
||||
private final Map<String, Class<ConditionAlgo>> conditionAlgos;
|
||||
private final Map<String, Class<DistanceAlgo>> distanceAlgos;
|
||||
private final Map<String, Class<Comparator>> comparators;
|
||||
|
||||
public PaceResolver() {
|
||||
|
||||
|
@ -37,10 +34,6 @@ public class PaceResolver implements Serializable {
|
|||
this.distanceAlgos = new Reflections("eu.dnetlib").getTypesAnnotatedWith(DistanceClass.class).stream()
|
||||
.filter(DistanceAlgo.class::isAssignableFrom)
|
||||
.collect(Collectors.toMap(cl -> cl.getAnnotation(DistanceClass.class).value(), cl -> (Class<DistanceAlgo>)cl));
|
||||
|
||||
this.comparators = new Reflections("eu.dnetlib").getTypesAnnotatedWith(ComparatorClass.class).stream()
|
||||
.filter(Comparator.class::isAssignableFrom)
|
||||
.collect(Collectors.toMap(cl -> cl.getAnnotation(ComparatorClass.class).value(), cl -> (Class<Comparator>) cl));
|
||||
}
|
||||
|
||||
public ClusteringFunction getClusteringFunction(String name, Map<String, Integer> params) throws PaceException {
|
||||
|
@ -67,12 +60,4 @@ public class PaceResolver implements Serializable {
|
|||
}
|
||||
}
|
||||
|
||||
public Comparator getComparator(String name, Map<String, Number> params) throws PaceException {
|
||||
try {
|
||||
return comparators.get(name).getDeclaredConstructor(Map.class).newInstance(params);
|
||||
} catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException | NullPointerException e) {
|
||||
throw new PaceException(name + " not found ", e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;Uniwersytet;университет;universiteit;πανεπιστήμιο
|
||||
key::1;university;università;università studi;universitario;universitaria;université;universitaire;universitaires;universidad;universitade;Universität;universitaet;Uniwersytet;университет;universiteit;πανεπιστήμιο
|
||||
key::2;studies;studi;études;estudios;estudos;Studien;studia;исследования;studies;σπουδές
|
||||
key::3;advanced;superiore;supérieur;supérieure;supérieurs;supérieures;avancado;avancados;fortgeschrittene;fortgeschritten;zaawansowany;передовой;gevorderd;gevorderde;προχωρημένος;προχωρημένη;προχωρημένο;προχωρημένες;προχωρημένα
|
||||
key::4;institute;istituto;institut;instituto;instituto;Institut;instytut;институт;instituut;ινστιτούτο
|
||||
|
|
|
|
@ -48,9 +48,10 @@ public class DistanceAlgoTest extends AbstractPaceFunctions {
|
|||
@Test
|
||||
public void testJaroWinklerNormalizedName() {
|
||||
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
|
||||
double result = jaroWinklerNormalizedName.distance("Universita di Pisa", "Universita di Parma");
|
||||
double result = jaroWinklerNormalizedName.distance("Free University of Bozen-Bolzano", "University of the Free State");
|
||||
|
||||
assertEquals(result, 0.0);
|
||||
System.out.println("result = " + result);
|
||||
assertEquals(1.0, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue