This commit is contained in:
Miriam Baglioni 2020-11-20 10:07:50 +01:00
parent d08dca0745
commit 33c27b6f75
2 changed files with 25 additions and 2 deletions

View File

@ -424,6 +424,9 @@ public class MakeReportSparkJob implements Serializable {
return new Tuple2<>("check", reportInfo);
}
}else{
reportInfo.setLevel("not found a match in name matching");
return new Tuple2<>("check", reportInfo);
}
}

View File

@ -12,6 +12,7 @@ import java.util.stream.Collectors;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.similarity.CosineDistance;
import org.apache.commons.text.similarity.FuzzyScore;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.security.WhitelistBasedResolver;
import org.apache.neethi.Assertion;
@ -160,8 +161,15 @@ public class CleanOrcidTest {
"21", MakeReportSparkJob.handleNameSurname("Matthew Jones")
},
{
"20", MakeReportSparkJob.handleNameSurname("Martin Fenner")
}
"22", MakeReportSparkJob.handleNameSurname("Martin Fenner")
},
{
"23", MakeReportSparkJob.handleNameSurname("Surachai Karnjanakom")
},
{
"24", MakeReportSparkJob.handleNameSurname("Panya Maneechakr")
}
};
@ -544,6 +552,18 @@ public class CleanOrcidTest {
});
}
@Test
public void testAuthorFuzzyApache(){
for (int i = 0; i < input.length; i += 2) {
System.out
.println(
"FuzzyScore of '" + input[i][1] + "' & '" + input[i + 1][1] + "' | Similarity ratio "
+ new FuzzyScore(Locale.getDefault()).fuzzyScore(input[i][1], input[i + 1][1]));
}
}
@Test
public void FuzzyWuzzyTest() {
applyFuzzyWuzzy(input);