From c2d4cb3ba6cb7a4f7212bf48e7d868ad2f96c074 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 19 Nov 2018 17:37:57 +0100 Subject: [PATCH] added new properties to FieldDef (size, length) to limit the information mapped onto each MapDocument --- .../java/eu/dnetlib/pace/model/FieldDef.java | 45 ++++++++++++++----- .../eu/dnetlib/pace/util/BlockProcessor.java | 11 +---- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java index bcc96c6c0..736a255d7 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java @@ -36,7 +36,15 @@ public class FieldDef implements Serializable { private double weight; - private int limit = -1; + /** + * Sets maximum size for the repeatable fields in the model. -1 for unbounded size. + */ + private int size = -1; + + /** + * Sets maximum length for field values in the model. -1 for unbounded length. + */ + private int length = -1; private Map params; @@ -73,7 +81,12 @@ public class FieldDef implements Serializable { if (params == null) { params = new HashMap<>(); } - params.put("limit", getLimit()); + + //TODO verify that the init signatures for the distance algos are all the same! + /* + params.put("size", getSize()); + params.put("length", getLength()); + */ params.put("weight", getWeight()); return PaceConfig.paceResolver.getDistanceAlgo(getAlgo(), params); } @@ -98,11 +111,6 @@ public class FieldDef implements Serializable { this.overrideMatch = overrideMatch; } - @Override - public String toString() { - return new Gson().toJson(this); - } - public double getWeight() { return weight; } @@ -119,12 +127,21 @@ public class FieldDef implements Serializable { this.algo = algo; } - public int getLimit() { - return limit; + + public int getSize() { + return size; } - public void setLimit(final int limit) { - this.limit = limit; + public void setSize(int size) { + this.size = size; + } + + public int getLength() { + return length; + } + + public void setLength(int length) { + this.length = length; } public Map getParams() { @@ -146,4 +163,10 @@ public class FieldDef implements Serializable { public void setIgnoreMissing(boolean ignoreMissing) { this.ignoreMissing = ignoreMissing; } + + @Override + public String toString() { + return new Gson().toJson(this); + } + } diff --git a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java index f5a41f5c5..2b2ddf02d 100644 --- a/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java +++ b/dnet-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java @@ -149,7 +149,7 @@ public class BlockProcessor { if (!idCurr.equals(idPivot) && (fieldCurr != null)) { - final ScoreResult sr = similarity(algo, pivot, curr); + final ScoreResult sr = algo.between(pivot, curr, dedupConf); log.debug(sr.toString()+"SCORE "+ sr.getScore()); emitOutput(sr, idPivot, idCurr, context); i++; @@ -171,15 +171,6 @@ public class BlockProcessor { } } - private ScoreResult similarity(final PaceDocumentDistance algo, final MapDocument a, final MapDocument b) { - try { - return algo.between(a, b, dedupConf); - } catch(Throwable e) { - log.error(String.format("\nA: %s\n----------------------\nB: %s", a, b), e); - throw new IllegalArgumentException(e); - } - } - private boolean mustSkip(final String idPivot) { return dedupConf.getWf().getSkipList().contains(getNsPrefix(idPivot)); }