forked from D-Net/dnet-hadoop
added new properties to FieldDef (size, length) to limit the information mapped onto each MapDocument
This commit is contained in:
parent
394fcafd41
commit
c2d4cb3ba6
|
@ -36,7 +36,15 @@ public class FieldDef implements Serializable {
|
||||||
|
|
||||||
private double weight;
|
private double weight;
|
||||||
|
|
||||||
private int limit = -1;
|
/**
|
||||||
|
* Sets maximum size for the repeatable fields in the model. -1 for unbounded size.
|
||||||
|
*/
|
||||||
|
private int size = -1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets maximum length for field values in the model. -1 for unbounded length.
|
||||||
|
*/
|
||||||
|
private int length = -1;
|
||||||
|
|
||||||
private Map<String, Number> params;
|
private Map<String, Number> params;
|
||||||
|
|
||||||
|
@ -73,7 +81,12 @@ public class FieldDef implements Serializable {
|
||||||
if (params == null) {
|
if (params == null) {
|
||||||
params = new HashMap<>();
|
params = new HashMap<>();
|
||||||
}
|
}
|
||||||
params.put("limit", getLimit());
|
|
||||||
|
//TODO verify that the init signatures for the distance algos are all the same!
|
||||||
|
/*
|
||||||
|
params.put("size", getSize());
|
||||||
|
params.put("length", getLength());
|
||||||
|
*/
|
||||||
params.put("weight", getWeight());
|
params.put("weight", getWeight());
|
||||||
return PaceConfig.paceResolver.getDistanceAlgo(getAlgo(), params);
|
return PaceConfig.paceResolver.getDistanceAlgo(getAlgo(), params);
|
||||||
}
|
}
|
||||||
|
@ -98,11 +111,6 @@ public class FieldDef implements Serializable {
|
||||||
this.overrideMatch = overrideMatch;
|
this.overrideMatch = overrideMatch;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return new Gson().toJson(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
public double getWeight() {
|
public double getWeight() {
|
||||||
return weight;
|
return weight;
|
||||||
}
|
}
|
||||||
|
@ -119,12 +127,21 @@ public class FieldDef implements Serializable {
|
||||||
this.algo = algo;
|
this.algo = algo;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getLimit() {
|
|
||||||
return limit;
|
public int getSize() {
|
||||||
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLimit(final int limit) {
|
public void setSize(int size) {
|
||||||
this.limit = limit;
|
this.size = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getLength() {
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLength(int length) {
|
||||||
|
this.length = length;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Number> getParams() {
|
public Map<String, Number> getParams() {
|
||||||
|
@ -146,4 +163,10 @@ public class FieldDef implements Serializable {
|
||||||
public void setIgnoreMissing(boolean ignoreMissing) {
|
public void setIgnoreMissing(boolean ignoreMissing) {
|
||||||
this.ignoreMissing = ignoreMissing;
|
this.ignoreMissing = ignoreMissing;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return new Gson().toJson(this);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -149,7 +149,7 @@ public class BlockProcessor {
|
||||||
|
|
||||||
if (!idCurr.equals(idPivot) && (fieldCurr != null)) {
|
if (!idCurr.equals(idPivot) && (fieldCurr != null)) {
|
||||||
|
|
||||||
final ScoreResult sr = similarity(algo, pivot, curr);
|
final ScoreResult sr = algo.between(pivot, curr, dedupConf);
|
||||||
log.debug(sr.toString()+"SCORE "+ sr.getScore());
|
log.debug(sr.toString()+"SCORE "+ sr.getScore());
|
||||||
emitOutput(sr, idPivot, idCurr, context);
|
emitOutput(sr, idPivot, idCurr, context);
|
||||||
i++;
|
i++;
|
||||||
|
@ -171,15 +171,6 @@ public class BlockProcessor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private ScoreResult similarity(final PaceDocumentDistance algo, final MapDocument a, final MapDocument b) {
|
|
||||||
try {
|
|
||||||
return algo.between(a, b, dedupConf);
|
|
||||||
} catch(Throwable e) {
|
|
||||||
log.error(String.format("\nA: %s\n----------------------\nB: %s", a, b), e);
|
|
||||||
throw new IllegalArgumentException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean mustSkip(final String idPivot) {
|
private boolean mustSkip(final String idPivot) {
|
||||||
return dedupConf.getWf().getSkipList().contains(getNsPrefix(idPivot));
|
return dedupConf.getWf().getSkipList().contains(getNsPrefix(idPivot));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue