added new properties to FieldDef (size, length) to limit the information mapped onto each MapDocument

This commit is contained in:
Claudio Atzori 2018-11-19 17:37:57 +01:00
parent 394fcafd41
commit c2d4cb3ba6
2 changed files with 35 additions and 21 deletions

View File

@ -36,7 +36,15 @@ public class FieldDef implements Serializable {
private double weight; private double weight;
private int limit = -1; /**
* Sets maximum size for the repeatable fields in the model. -1 for unbounded size.
*/
private int size = -1;
/**
* Sets maximum length for field values in the model. -1 for unbounded length.
*/
private int length = -1;
private Map<String, Number> params; private Map<String, Number> params;
@ -73,7 +81,12 @@ public class FieldDef implements Serializable {
if (params == null) { if (params == null) {
params = new HashMap<>(); params = new HashMap<>();
} }
params.put("limit", getLimit());
//TODO verify that the init signatures for the distance algos are all the same!
/*
params.put("size", getSize());
params.put("length", getLength());
*/
params.put("weight", getWeight()); params.put("weight", getWeight());
return PaceConfig.paceResolver.getDistanceAlgo(getAlgo(), params); return PaceConfig.paceResolver.getDistanceAlgo(getAlgo(), params);
} }
@ -98,11 +111,6 @@ public class FieldDef implements Serializable {
this.overrideMatch = overrideMatch; this.overrideMatch = overrideMatch;
} }
@Override
public String toString() {
return new Gson().toJson(this);
}
public double getWeight() { public double getWeight() {
return weight; return weight;
} }
@ -119,12 +127,21 @@ public class FieldDef implements Serializable {
this.algo = algo; this.algo = algo;
} }
public int getLimit() {
return limit; public int getSize() {
return size;
} }
public void setLimit(final int limit) { public void setSize(int size) {
this.limit = limit; this.size = size;
}
public int getLength() {
return length;
}
public void setLength(int length) {
this.length = length;
} }
public Map<String, Number> getParams() { public Map<String, Number> getParams() {
@ -146,4 +163,10 @@ public class FieldDef implements Serializable {
public void setIgnoreMissing(boolean ignoreMissing) { public void setIgnoreMissing(boolean ignoreMissing) {
this.ignoreMissing = ignoreMissing; this.ignoreMissing = ignoreMissing;
} }
@Override
public String toString() {
return new Gson().toJson(this);
}
} }

View File

@ -149,7 +149,7 @@ public class BlockProcessor {
if (!idCurr.equals(idPivot) && (fieldCurr != null)) { if (!idCurr.equals(idPivot) && (fieldCurr != null)) {
final ScoreResult sr = similarity(algo, pivot, curr); final ScoreResult sr = algo.between(pivot, curr, dedupConf);
log.debug(sr.toString()+"SCORE "+ sr.getScore()); log.debug(sr.toString()+"SCORE "+ sr.getScore());
emitOutput(sr, idPivot, idCurr, context); emitOutput(sr, idPivot, idCurr, context);
i++; i++;
@ -171,15 +171,6 @@ public class BlockProcessor {
} }
} }
private ScoreResult similarity(final PaceDocumentDistance algo, final MapDocument a, final MapDocument b) {
try {
return algo.between(a, b, dedupConf);
} catch(Throwable e) {
log.error(String.format("\nA: %s\n----------------------\nB: %s", a, b), e);
throw new IllegalArgumentException(e);
}
}
private boolean mustSkip(final String idPivot) { private boolean mustSkip(final String idPivot) {
return dedupConf.getWf().getSkipList().contains(getNsPrefix(idPivot)); return dedupConf.getWf().getSkipList().contains(getNsPrefix(idPivot));
} }