changed behavior in transformation job, that doesn't fail at first error

This commit is contained in:
Sandro La Bruzzo 2021-09-07 19:05:46 +02:00
parent 3c6fc2096c
commit aed29156c7
2 changed files with 5 additions and 2 deletions

View File

@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.utils.DHPUtils.*;
import java.io.IOException;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
@ -126,7 +127,8 @@ public class TransformSparkJobNode {
JavaRDD<MetadataRecord> mdstore = inputMDStore
.javaRDD()
.repartition(getRepartitionNumber(totalInput, rpt))
.map((Function<MetadataRecord, MetadataRecord>) x::call);
.map((Function<MetadataRecord, MetadataRecord>) x::call)
.filter((Function<MetadataRecord, Boolean>) Objects::nonNull);
saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH);
log.info("Transformed item {}", ct.getProcessedItems().count());

View File

@ -81,7 +81,8 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
return value;
} catch (Throwable e) {
aggregationCounter.getErrorItems().add(1);
throw new RuntimeException(e);
return null;
// throw new RuntimeException(e);
}
}