forked from D-Net/dnet-hadoop
added new properties to FieldDef (size, length) to limit the information mapped onto each MapDocument
This commit is contained in:
parent
394fcafd41
commit
c2d4cb3ba6
|
@ -36,7 +36,15 @@ public class FieldDef implements Serializable {
|
|||
|
||||
private double weight;
|
||||
|
||||
private int limit = -1;
|
||||
/**
|
||||
* Sets maximum size for the repeatable fields in the model. -1 for unbounded size.
|
||||
*/
|
||||
private int size = -1;
|
||||
|
||||
/**
|
||||
* Sets maximum length for field values in the model. -1 for unbounded length.
|
||||
*/
|
||||
private int length = -1;
|
||||
|
||||
private Map<String, Number> params;
|
||||
|
||||
|
@ -73,7 +81,12 @@ public class FieldDef implements Serializable {
|
|||
if (params == null) {
|
||||
params = new HashMap<>();
|
||||
}
|
||||
params.put("limit", getLimit());
|
||||
|
||||
//TODO verify that the init signatures for the distance algos are all the same!
|
||||
/*
|
||||
params.put("size", getSize());
|
||||
params.put("length", getLength());
|
||||
*/
|
||||
params.put("weight", getWeight());
|
||||
return PaceConfig.paceResolver.getDistanceAlgo(getAlgo(), params);
|
||||
}
|
||||
|
@ -98,11 +111,6 @@ public class FieldDef implements Serializable {
|
|||
this.overrideMatch = overrideMatch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new Gson().toJson(this);
|
||||
}
|
||||
|
||||
public double getWeight() {
|
||||
return weight;
|
||||
}
|
||||
|
@ -119,12 +127,21 @@ public class FieldDef implements Serializable {
|
|||
this.algo = algo;
|
||||
}
|
||||
|
||||
public int getLimit() {
|
||||
return limit;
|
||||
|
||||
public int getSize() {
|
||||
return size;
|
||||
}
|
||||
|
||||
public void setLimit(final int limit) {
|
||||
this.limit = limit;
|
||||
public void setSize(int size) {
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
public int getLength() {
|
||||
return length;
|
||||
}
|
||||
|
||||
public void setLength(int length) {
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
public Map<String, Number> getParams() {
|
||||
|
@ -146,4 +163,10 @@ public class FieldDef implements Serializable {
|
|||
public void setIgnoreMissing(boolean ignoreMissing) {
|
||||
this.ignoreMissing = ignoreMissing;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new Gson().toJson(this);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -149,7 +149,7 @@ public class BlockProcessor {
|
|||
|
||||
if (!idCurr.equals(idPivot) && (fieldCurr != null)) {
|
||||
|
||||
final ScoreResult sr = similarity(algo, pivot, curr);
|
||||
final ScoreResult sr = algo.between(pivot, curr, dedupConf);
|
||||
log.debug(sr.toString()+"SCORE "+ sr.getScore());
|
||||
emitOutput(sr, idPivot, idCurr, context);
|
||||
i++;
|
||||
|
@ -171,15 +171,6 @@ public class BlockProcessor {
|
|||
}
|
||||
}
|
||||
|
||||
private ScoreResult similarity(final PaceDocumentDistance algo, final MapDocument a, final MapDocument b) {
|
||||
try {
|
||||
return algo.between(a, b, dedupConf);
|
||||
} catch(Throwable e) {
|
||||
log.error(String.format("\nA: %s\n----------------------\nB: %s", a, b), e);
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean mustSkip(final String idPivot) {
|
||||
return dedupConf.getWf().getSkipList().contains(getNsPrefix(idPivot));
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue