When generating similarities put as 'from' component the one with smaller lexicographic id

This commit is contained in:
Giambattista Bloisi 2023-07-10 15:45:49 +02:00
parent dcc08cc512
commit 745e70e0d7
1 changed files with 5 additions and 1 deletions

View File

@ -121,7 +121,11 @@ public class BlockProcessor {
private void emitOutput(final boolean result, final String idPivot, final String idCurr, final Reporter context) {
if (result) {
writeSimilarity(context, idPivot, idCurr);
if (idPivot.compareTo(idCurr) <= 0) {
writeSimilarity(context, idPivot, idCurr);
} else {
writeSimilarity(context, idCurr, idPivot);
}
context.incrementCounter(dedupConf.getWf().getEntityType(), "dedupSimilarity (x2)", 1);
} else {
context.incrementCounter(dedupConf.getWf().getEntityType(), "d < " + dedupConf.getWf().getThreshold(), 1);