forked from D-Net/dnet-hadoop
Merge pull request 'fixed dedup configuration management in the Broker workflow' (#341) from fix_8997 into master
Reviewed-on: D-Net/dnet-hadoop#341
This commit is contained in:
commit
ef02648399
|
@ -81,7 +81,7 @@ case class SparkModel(conf: DedupConfig) {
|
|||
MapDocumentUtil.truncateList(
|
||||
MapDocumentUtil.getJPathList(fdef.getPath, documentContext, fdef.getType),
|
||||
fdef.getSize
|
||||
).toArray
|
||||
).asScala
|
||||
|
||||
case Type.StringConcat =>
|
||||
val jpaths = CONCAT_REGEX.split(fdef.getPath)
|
||||
|
|
|
@ -2,7 +2,9 @@
|
|||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -27,10 +29,14 @@ public class TrustUtils {
|
|||
static {
|
||||
mapper = new ObjectMapper();
|
||||
try {
|
||||
dedupConfig = mapper
|
||||
.readValue(
|
||||
DedupConfig.class.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/dedupConfig/dedupConfig.json"),
|
||||
DedupConfig.class);
|
||||
dedupConfig = DedupConfig
|
||||
.load(
|
||||
IOUtils
|
||||
.toString(
|
||||
DedupConfig.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/dedupConfig/dedupConfig.json"),
|
||||
StandardCharsets.UTF_8));
|
||||
|
||||
deduper = new SparkDeduper(dedupConfig);
|
||||
} catch (final IOException e) {
|
||||
log.error("Error loading dedupConfig, e");
|
||||
|
@ -57,7 +63,7 @@ public class TrustUtils {
|
|||
return TrustUtils.rescale(score, threshold);
|
||||
} catch (final Exception e) {
|
||||
log.error("Error computing score between results", e);
|
||||
return BrokerConstants.MIN_TRUST;
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue