Master branch updates from beta September 2023 #337

Manually merged
claudio.atzori merged 1271 commits from beta into master 2023-09-06 11:31:09 +02:00
4 changed files with 7 additions and 10 deletions
Showing only changes of commit 5499ca17c3 - Show all commits

View File

@ -25,7 +25,7 @@ public class KeywordsClustering extends AbstractClusteringFunction {
//list of combination to return as result
final Collection<String> combinations = new LinkedHashSet<String>();
for (String keyword: keywordsToCodes(keywords)){
for (String keyword: keywordsToCodes(keywords, conf.translationMap())){
for (String city: citiesToCodes(cities)) {
combinations.add(keyword+"-"+city);
if (combinations.size()>=params.getOrDefault("max", 2)) {

View File

@ -17,10 +17,7 @@ import java.io.IOException;
import java.io.StringWriter;
import java.text.Normalizer;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* Set of common functions
@ -30,7 +27,6 @@ import java.util.stream.Stream;
*/
public abstract class AbstractPaceFunctions {
private static Map<String,String> translationMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/translation_map.csv");
private static Map<String,String> cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv");
protected static Set<String> stopwords_en = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt");
@ -243,10 +239,10 @@ public abstract class AbstractPaceFunctions {
}
public double keywordsCompare(Set<String> s1, Set<String> s2){
public double keywordsCompare(Set<String> s1, Set<String> s2, Map<String, String> translationMap){
Set<String> k1 = keywordsToCodes(s1);
Set<String> k2 = keywordsToCodes(s2);
Set<String> k1 = keywordsToCodes(s1, translationMap);
Set<String> k2 = keywordsToCodes(s2, translationMap);
int longer = (k1.size()>k2.size())?k1.size():k2.size();
@ -278,7 +274,7 @@ public abstract class AbstractPaceFunctions {
return keywords.stream().map(s -> translationMap.get(s)).collect(Collectors.toSet());
}
public Set<String> keywordsToCodes(Set<String> keywords) {
public Set<String> keywordsToCodes(Set<String> keywords, Map<String, String> translationMap) {
return toCodes(keywords, translationMap);
}

View File

@ -2,6 +2,7 @@ package eu.dnetlib.pace.config;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import eu.dnetlib.pace.common.AbstractPaceFunctions;
import eu.dnetlib.pace.condition.ConditionAlgo;
import eu.dnetlib.pace.model.ClusteringDef;
import eu.dnetlib.pace.model.CondDef;

View File

@ -46,7 +46,7 @@ public class JaroWinklerNormalizedName extends SecondStringDistanceAlgo {
if (sameCity(cities1,cities2)) {
if (keywordsCompare(keywords1, keywords2)>params.getOrDefault("threshold", 0.5).doubleValue()) {
if (keywordsCompare(keywords1, keywords2, conf.translationMap())>params.getOrDefault("threshold", 0.5).doubleValue()) {
ca = removeKeywords(ca, keywords1);
ca = removeKeywords(ca, cities1);