Merge pull request 'fixed dedup configuration management in the Broker workflow' (#341) from fix_8997 into master

Reviewed-on: #341
This commit is contained in:
Claudio Atzori 2023-10-02 11:03:50 +02:00
commit ef02648399
2 changed files with 12 additions and 6 deletions

View File

@ -81,7 +81,7 @@ case class SparkModel(conf: DedupConfig) {
MapDocumentUtil.truncateList( MapDocumentUtil.truncateList(
MapDocumentUtil.getJPathList(fdef.getPath, documentContext, fdef.getType), MapDocumentUtil.getJPathList(fdef.getPath, documentContext, fdef.getType),
fdef.getSize fdef.getSize
).toArray ).asScala
case Type.StringConcat => case Type.StringConcat =>
val jpaths = CONCAT_REGEX.split(fdef.getPath) val jpaths = CONCAT_REGEX.split(fdef.getPath)

View File

@ -2,7 +2,9 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.apache.commons.io.IOUtils;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -27,10 +29,14 @@ public class TrustUtils {
static { static {
mapper = new ObjectMapper(); mapper = new ObjectMapper();
try { try {
dedupConfig = mapper dedupConfig = DedupConfig
.readValue( .load(
DedupConfig.class.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/dedupConfig/dedupConfig.json"), IOUtils
DedupConfig.class); .toString(
DedupConfig.class
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/dedupConfig/dedupConfig.json"),
StandardCharsets.UTF_8));
deduper = new SparkDeduper(dedupConfig); deduper = new SparkDeduper(dedupConfig);
} catch (final IOException e) { } catch (final IOException e) {
log.error("Error loading dedupConfig, e"); log.error("Error loading dedupConfig, e");
@ -57,7 +63,7 @@ public class TrustUtils {
return TrustUtils.rescale(score, threshold); return TrustUtils.rescale(score, threshold);
} catch (final Exception e) { } catch (final Exception e) {
log.error("Error computing score between results", e); log.error("Error computing score between results", e);
return BrokerConstants.MIN_TRUST; throw new RuntimeException(e);
} }
} }