normalization of the term in the translation map added

This commit is contained in:
miconis 2019-10-08 15:13:45 +02:00
parent 26b383fea2
commit 50b7a12b3f
2 changed files with 13 additions and 2 deletions

View File

@ -11,6 +11,7 @@ import org.apache.commons.collections.CollectionUtils;
import org.codehaus.jackson.annotate.JsonIgnore;
import java.io.Serializable;
import java.text.Normalizer;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@ -46,7 +47,9 @@ public class PaceConfig implements Serializable {
translationMap = Maps.newHashMap();
for (String key : synonyms.keySet()) {
for (String term : synonyms.get(key)){
translationMap.put(term.toLowerCase(), key);
translationMap.put(
Normalizer.normalize(term.toLowerCase(), Normalizer.Form.NFD),
key);
}
}
}

View File

@ -10,7 +10,7 @@ public class ConfigTest extends AbstractPaceTest {
@Test
public void dedupConfigSerializationTest() {
final DedupConfig cfgFromClasspath = DedupConfig.load(readFromClasspath("result.pace.conf.json"));
final DedupConfig cfgFromClasspath = DedupConfig.load(readFromClasspath("org.curr.conf"));
final String conf = cfgFromClasspath.toString();
@ -37,4 +37,12 @@ public class ConfigTest extends AbstractPaceTest {
System.out.println(load.toString());
}
@Test
public void translationMapTest() {
DedupConfig load = DedupConfig.load(readFromClasspath("org.curr.conf"));
System.out.println("translationMap = " + load.getPace().translationMap().toString());
}
}