forked from D-Net/dnet-hadoop
normalization of the term in the translation map added
This commit is contained in:
parent
26b383fea2
commit
50b7a12b3f
|
@ -11,6 +11,7 @@ import org.apache.commons.collections.CollectionUtils;
|
|||
import org.codehaus.jackson.annotate.JsonIgnore;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.text.Normalizer;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -46,7 +47,9 @@ public class PaceConfig implements Serializable {
|
|||
translationMap = Maps.newHashMap();
|
||||
for (String key : synonyms.keySet()) {
|
||||
for (String term : synonyms.get(key)){
|
||||
translationMap.put(term.toLowerCase(), key);
|
||||
translationMap.put(
|
||||
Normalizer.normalize(term.toLowerCase(), Normalizer.Form.NFD),
|
||||
key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@ public class ConfigTest extends AbstractPaceTest {
|
|||
|
||||
@Test
|
||||
public void dedupConfigSerializationTest() {
|
||||
final DedupConfig cfgFromClasspath = DedupConfig.load(readFromClasspath("result.pace.conf.json"));
|
||||
final DedupConfig cfgFromClasspath = DedupConfig.load(readFromClasspath("org.curr.conf"));
|
||||
|
||||
final String conf = cfgFromClasspath.toString();
|
||||
|
||||
|
@ -37,4 +37,12 @@ public class ConfigTest extends AbstractPaceTest {
|
|||
System.out.println(load.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void translationMapTest() {
|
||||
|
||||
DedupConfig load = DedupConfig.load(readFromClasspath("org.curr.conf"));
|
||||
|
||||
System.out.println("translationMap = " + load.getPace().translationMap().toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue