forked from D-Net/dnet-hadoop
changed behavior in transformation job, that doesn't fail at first error
This commit is contained in:
parent
3c6fc2096c
commit
aed29156c7
|
@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.utils.DHPUtils.*;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
@ -126,7 +127,8 @@ public class TransformSparkJobNode {
|
|||
JavaRDD<MetadataRecord> mdstore = inputMDStore
|
||||
.javaRDD()
|
||||
.repartition(getRepartitionNumber(totalInput, rpt))
|
||||
.map((Function<MetadataRecord, MetadataRecord>) x::call);
|
||||
.map((Function<MetadataRecord, MetadataRecord>) x::call)
|
||||
.filter((Function<MetadataRecord, Boolean>) Objects::nonNull);
|
||||
saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH);
|
||||
|
||||
log.info("Transformed item {}", ct.getProcessedItems().count());
|
||||
|
|
|
@ -81,7 +81,8 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
|
|||
return value;
|
||||
} catch (Throwable e) {
|
||||
aggregationCounter.getErrorItems().add(1);
|
||||
throw new RuntimeException(e);
|
||||
return null;
|
||||
// throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue