changed upper bound for whitelist

This commit is contained in:
Miriam Baglioni 2020-11-18 14:04:17 +01:00
parent 96d50080b4
commit f4fee8f43c
1 changed files with 6 additions and 3 deletions

View File

@ -390,11 +390,14 @@ public class MakeReportSparkJob implements Serializable {
.createDocument();
}).collect(Collectors.toList());
if (matchService.applyMatchByDocId(documentList).entrySet().size() == 0) {
if (FuzzySearch.ratio(input[0][1], input[1][1]) < 20) {
reportInfo.setLevel("less than 20 in fuzzywuzzy");
double out = FuzzySearch.ratio(input[0][1], input[1][1]);
if (out < 29) {
reportInfo.setLevel("less than 29 in fuzzywuzzy");
return new Tuple2<>("wrong", reportInfo);
} else {
reportInfo.setLevel("more than 20 in fuzzywuzzy");
// TODO extend the checking to catch range of fuzzy wuzzy that could be wrong
// try using soundex techniques or merge with previous implementation or both
reportInfo.setLevel("more than 29 in fuzzywuzzy");
return new Tuple2<>("check", reportInfo);
}