Master branch updates from beta September 2023 #337
|
@ -25,7 +25,7 @@ public class KeywordsClustering extends AbstractClusteringFunction {
|
|||
//list of combination to return as result
|
||||
final Collection<String> combinations = new LinkedHashSet<String>();
|
||||
|
||||
for (String keyword: keywordsToCodes(keywords)){
|
||||
for (String keyword: keywordsToCodes(keywords, conf.translationMap())){
|
||||
for (String city: citiesToCodes(cities)) {
|
||||
combinations.add(keyword+"-"+city);
|
||||
if (combinations.size()>=params.getOrDefault("max", 2)) {
|
||||
|
|
|
@ -17,10 +17,7 @@ import java.io.IOException;
|
|||
import java.io.StringWriter;
|
||||
import java.text.Normalizer;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Set of common functions
|
||||
|
@ -30,7 +27,6 @@ import java.util.stream.Stream;
|
|||
*/
|
||||
public abstract class AbstractPaceFunctions {
|
||||
|
||||
private static Map<String,String> translationMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/translation_map.csv");
|
||||
private static Map<String,String> cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv");
|
||||
|
||||
protected static Set<String> stopwords_en = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt");
|
||||
|
@ -243,10 +239,10 @@ public abstract class AbstractPaceFunctions {
|
|||
}
|
||||
|
||||
|
||||
public double keywordsCompare(Set<String> s1, Set<String> s2){
|
||||
public double keywordsCompare(Set<String> s1, Set<String> s2, Map<String, String> translationMap){
|
||||
|
||||
Set<String> k1 = keywordsToCodes(s1);
|
||||
Set<String> k2 = keywordsToCodes(s2);
|
||||
Set<String> k1 = keywordsToCodes(s1, translationMap);
|
||||
Set<String> k2 = keywordsToCodes(s2, translationMap);
|
||||
|
||||
int longer = (k1.size()>k2.size())?k1.size():k2.size();
|
||||
|
||||
|
@ -278,7 +274,7 @@ public abstract class AbstractPaceFunctions {
|
|||
return keywords.stream().map(s -> translationMap.get(s)).collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
public Set<String> keywordsToCodes(Set<String> keywords) {
|
||||
public Set<String> keywordsToCodes(Set<String> keywords, Map<String, String> translationMap) {
|
||||
return toCodes(keywords, translationMap);
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@ package eu.dnetlib.pace.config;
|
|||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import eu.dnetlib.pace.common.AbstractPaceFunctions;
|
||||
import eu.dnetlib.pace.condition.ConditionAlgo;
|
||||
import eu.dnetlib.pace.model.ClusteringDef;
|
||||
import eu.dnetlib.pace.model.CondDef;
|
||||
|
|
|
@ -46,7 +46,7 @@ public class JaroWinklerNormalizedName extends SecondStringDistanceAlgo {
|
|||
|
||||
if (sameCity(cities1,cities2)) {
|
||||
|
||||
if (keywordsCompare(keywords1, keywords2)>params.getOrDefault("threshold", 0.5).doubleValue()) {
|
||||
if (keywordsCompare(keywords1, keywords2, conf.translationMap())>params.getOrDefault("threshold", 0.5).doubleValue()) {
|
||||
|
||||
ca = removeKeywords(ca, keywords1);
|
||||
ca = removeKeywords(ca, cities1);
|
||||
|
|
Loading…
Reference in New Issue