forked from D-Net/dnet-hadoop
Fix defect #8997: GenerateEventsJob is generating huge amounts of logs because broker entity similarity calculation consistently failed
This commit is contained in:
parent
0935d7757c
commit
e239b81740
|
@ -78,10 +78,10 @@ case class SparkModel(conf: DedupConfig) {
|
||||||
uv
|
uv
|
||||||
|
|
||||||
case Type.List | Type.JSON =>
|
case Type.List | Type.JSON =>
|
||||||
MapDocumentUtil.truncateList(
|
Seq(MapDocumentUtil.truncateList(
|
||||||
MapDocumentUtil.getJPathList(fdef.getPath, documentContext, fdef.getType),
|
MapDocumentUtil.getJPathList(fdef.getPath, documentContext, fdef.getType),
|
||||||
fdef.getSize
|
fdef.getSize
|
||||||
).toArray
|
))
|
||||||
|
|
||||||
case Type.StringConcat =>
|
case Type.StringConcat =>
|
||||||
val jpaths = CONCAT_REGEX.split(fdef.getPath)
|
val jpaths = CONCAT_REGEX.split(fdef.getPath)
|
||||||
|
|
|
@ -1,18 +1,18 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.broker.oa.util;
|
package eu.dnetlib.dhp.broker.oa.util;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.spark.sql.Row;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
|
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
|
||||||
import eu.dnetlib.pace.config.DedupConfig;
|
import eu.dnetlib.pace.config.DedupConfig;
|
||||||
import eu.dnetlib.pace.model.SparkDeduper;
|
import eu.dnetlib.pace.model.SparkDeduper;
|
||||||
import eu.dnetlib.pace.tree.support.TreeProcessor;
|
import eu.dnetlib.pace.tree.support.TreeProcessor;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.sql.Row;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
public class TrustUtils {
|
public class TrustUtils {
|
||||||
|
|
||||||
|
@ -27,10 +27,8 @@ public class TrustUtils {
|
||||||
static {
|
static {
|
||||||
mapper = new ObjectMapper();
|
mapper = new ObjectMapper();
|
||||||
try {
|
try {
|
||||||
dedupConfig = mapper
|
dedupConfig = DedupConfig.load(IOUtils.toString(DedupConfig.class.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/dedupConfig/dedupConfig.json"), StandardCharsets.UTF_8));
|
||||||
.readValue(
|
|
||||||
DedupConfig.class.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/dedupConfig/dedupConfig.json"),
|
|
||||||
DedupConfig.class);
|
|
||||||
deduper = new SparkDeduper(dedupConfig);
|
deduper = new SparkDeduper(dedupConfig);
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
log.error("Error loading dedupConfig, e");
|
log.error("Error loading dedupConfig, e");
|
||||||
|
@ -57,7 +55,7 @@ public class TrustUtils {
|
||||||
return TrustUtils.rescale(score, threshold);
|
return TrustUtils.rescale(score, threshold);
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
log.error("Error computing score between results", e);
|
log.error("Error computing score between results", e);
|
||||||
return BrokerConstants.MIN_TRUST;
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue