diff --git a/dnet-dedup-test/src/test/java/eu/dnetlib/pace/DedupLocalTest.java b/dnet-dedup-test/src/test/java/eu/dnetlib/pace/DedupLocalTest.java index a5eb3cb..62a56c8 100644 --- a/dnet-dedup-test/src/test/java/eu/dnetlib/pace/DedupLocalTest.java +++ b/dnet-dedup-test/src/test/java/eu/dnetlib/pace/DedupLocalTest.java @@ -6,10 +6,8 @@ import eu.dnetlib.pace.utils.Utility; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; -@Ignore public class DedupLocalTest extends DedupTestUtils { SparkSession spark; diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java index 1e94b34..6086ac0 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java @@ -22,21 +22,6 @@ public class WordsSuffixPrefix extends AbstractClusteringFunction { private Collection suffixPrefix(String s, int len, int max) { final int words = s.split(" ").length; - - // adjust the token length according to the number of words - switch (words) { - case 1: - return Sets.newLinkedHashSet(); - case 2: - return doSuffixPrefix(s, len+2, max, words); - case 3: - return doSuffixPrefix(s, len+1, max, words); - default: - return doSuffixPrefix(s, len, max, words); - } - } - - private Collection doSuffixPrefix(String s, int len, int max, int words) { final Set bigrams = Sets.newLinkedHashSet(); int i = 0; while (++i < s.length() && bigrams.size() < max) { @@ -54,4 +39,4 @@ public class WordsSuffixPrefix extends AbstractClusteringFunction { return bigrams; } -} \ No newline at end of file +} diff --git a/pom.xml.releaseBackup b/pom.xml.releaseBackup new file mode 100644 index 0000000..d840af1 --- /dev/null +++ b/pom.xml.releaseBackup @@ -0,0 +1,333 @@ + + + + 4.0.0 + + eu.dnetlib + dnet-dedup + 4.0.2-SNAPSHOT + + pom + + http://www.d-net.research-infrastructures.eu + + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + A business-friendly OSS license + + + + + scm:git:https://code-repo.d4science.org/D-Net/dnet-dedup.git + HEAD + + + + dnet-pace-core + dnet-dedup-test + + + + Redmine + https://issue.openaire.research-infrastructures.eu/projects/openaire + + + + + + + dnet45-releases + D-Net 45 Releases + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases + default + + + + + + dnet-deps + dnet-dependencies + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet-deps + default + + + dnet45-releases + D-Net 45 Releases + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases + default + + true + + + + dnet45-snapshots + D-Net 45 Snapshots + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots + default + + true + + + + + cloudera + Cloudera Repository + https://repository.cloudera.com/artifactory/cloudera-repos + + true + + + false + + + + + target + target/classes + ${project.artifactId}-${project.version} + target/test-classes + + + + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.6.0 + + 1.8 + 1.8 + ${project.build.sourceEncoding} + + + + + org.apache.maven.plugins + maven-jar-plugin + 3.0.2 + + + + org.apache.maven.plugins + maven-source-plugin + 3.0.1 + + + attach-sources + verify + + jar-no-fork + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.19.1 + + true + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.10.4 + + true + + + + + org.apache.maven.plugins + maven-dependency-plugin + 3.0.0 + + + + org.apache.maven.plugins + maven-failsafe-plugin + 2.13 + + + integration-test + + integration-test + + + + verify + + verify + + + + + + + + + + + + org.apache.maven.plugins + maven-release-plugin + 2.5.3 + + + + + + + + + edu.cmu + secondstring + 1.0.0 + + + org.antlr + stringtemplate + 3.2 + + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + + com.fasterxml.jackson.dataformat + jackson-dataformat-xml + ${jackson.version} + + + com.fasterxml.jackson.module + jackson-module-jsonSchema + ${jackson.version} + + + + + + org.apache.commons + commons-math3 + 3.6.1 + + + + com.google.guava + guava + ${google.guava.version} + + + com.google.code.gson + gson + ${google.gson.version} + + + + org.apache.commons + commons-lang3 + ${commons.lang.version} + + + + commons-io + commons-io + ${commons.io.version} + + + commons-collections + commons-collections + ${commons.collections.version} + + + commons-logging + commons-logging + ${commons.logging.version} + + + org.apache.spark + spark-core_2.11 + ${spark.version} + provided + + + org.apache.spark + spark-graphx_2.11 + ${spark.version} + provided + + + org.apache.spark + spark-sql_2.11 + ${spark.version} + provided + + + junit + junit + ${junit.version} + test + + + org.reflections + reflections + 0.9.10 + + + + org.scala-lang + scala-library + ${scala.version} + + + + org.apache.oozie + oozie-client + 5.1.0 + + + com.jayway.jsonpath + json-path + 2.4.0 + + + + + + + + + UTF-8 + UTF-8 + + 2.2.2 + 15.0 + + 2.2.0 + 2.6.6 + + 3.5 + 2.4 + 3.2.1 + 1.1.3 + + 4.9 + 2.11.8 + + false + + diff --git a/release.properties b/release.properties new file mode 100644 index 0000000..70a3c40 --- /dev/null +++ b/release.properties @@ -0,0 +1,22 @@ +#release configuration +#Thu Jul 02 17:06:39 CEST 2020 +scm.commentPrefix=[maven-release-plugin] +pushChanges=true +project.rel.eu.dnetlib\:dnet-dedup-test=4.0.2 +scm.tag=dnet-dedup-4.0.2 +remoteTagging=true +project.scm.eu.dnetlib\:dnet-dedup-test.empty=true +projectVersionPolicyId=default +scm.url=scm\:git\:https\://code-repo.d4science.org/D-Net/dnet-dedup.git +scm.tagNameFormat=@{project.artifactId}-@{project.version} +project.rel.eu.dnetlib\:dnet-dedup=4.0.2 +project.dev.eu.dnetlib\:dnet-pace-core=4.0.3-SNAPSHOT +preparationGoals=clean verify +project.scm.eu.dnetlib\:dnet-dedup.tag=HEAD +project.scm.eu.dnetlib\:dnet-dedup.developerConnection=scm\:git\:https\://code-repo.d4science.org/D-Net/dnet-dedup.git +exec.snapshotReleasePluginAllowed=false +project.dev.eu.dnetlib\:dnet-dedup=4.0.3-SNAPSHOT +project.scm.eu.dnetlib\:dnet-pace-core.empty=true +project.dev.eu.dnetlib\:dnet-dedup-test=4.0.3-SNAPSHOT +completedPhase=end-release +project.rel.eu.dnetlib\:dnet-pace-core=4.0.2