From 3c6f8d1e44948c5cf77b44324ec3781e65211467 Mon Sep 17 00:00:00 2001 From: miconis Date: Mon, 8 Jul 2019 11:01:49 +0200 Subject: [PATCH] bug fixing in the keywordsclustering class --- .../dnetlib/pace/clustering/KeywordsClustering.java | 9 +++++---- .../eu/dnetlib/pace/common/AbstractPaceFunctions.java | 4 ++-- .../pace/clustering/ClusteringFunctionTest.java | 7 +++++++ release.properties | 11 +++++++++++ 4 files changed, 25 insertions(+), 6 deletions(-) create mode 100644 release.properties diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java index 1aac6c8a5..dfd59384b 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java @@ -19,16 +19,17 @@ public class KeywordsClustering extends AbstractClusteringFunction { @Override protected Collection doApply(String s) { - List keywords = getCodes(s, translationMap, params.getOrDefault("windowSize", 4)); - List cities = getCodes(s, cityMap, params.getOrDefault("windowSize", 4)); + //takes city codes and keywords codes without duplicates + Set keywords = getCodes(s, translationMap, params.getOrDefault("windowSize", 4)); + Set cities = getCodes(s, cityMap, params.getOrDefault("windowSize", 4)); + //list of combination to return as result final Collection combinations = new LinkedHashSet(); - int size = 0; for (String keyword: keywords){ for (String city: cities) { combinations.add(keyword+"-"+city); - if (++size>params.getOrDefault("max", 2)) { + if (combinations.size()>=params.getOrDefault("max", 2)) { return combinations; } } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java index 8c99342a7..748eec9a6 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java @@ -320,7 +320,7 @@ public abstract class AbstractPaceFunctions { } //get the list of codes into the input string - public List getCodes(String s1, Map translationMap, int windowSize){ + public Set getCodes(String s1, Map translationMap, int windowSize){ String s = cleanup(s1); @@ -328,7 +328,7 @@ public abstract class AbstractPaceFunctions { List tokens = Arrays.asList(s.toLowerCase().split(" ")); - List codes = new ArrayList<>(); + Set codes = new HashSet<>(); if (tokens.size()