forked from D-Net/dnet-hadoop
changed upper bound for whitelist
This commit is contained in:
parent
96d50080b4
commit
f4fee8f43c
|
@ -390,11 +390,14 @@ public class MakeReportSparkJob implements Serializable {
|
|||
.createDocument();
|
||||
}).collect(Collectors.toList());
|
||||
if (matchService.applyMatchByDocId(documentList).entrySet().size() == 0) {
|
||||
if (FuzzySearch.ratio(input[0][1], input[1][1]) < 20) {
|
||||
reportInfo.setLevel("less than 20 in fuzzywuzzy");
|
||||
double out = FuzzySearch.ratio(input[0][1], input[1][1]);
|
||||
if (out < 29) {
|
||||
reportInfo.setLevel("less than 29 in fuzzywuzzy");
|
||||
return new Tuple2<>("wrong", reportInfo);
|
||||
} else {
|
||||
reportInfo.setLevel("more than 20 in fuzzywuzzy");
|
||||
// TODO extend the checking to catch range of fuzzy wuzzy that could be wrong
|
||||
// try using soundex techniques or merge with previous implementation or both
|
||||
reportInfo.setLevel("more than 29 in fuzzywuzzy");
|
||||
return new Tuple2<>("check", reportInfo);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue