forked from D-Net/dnet-hadoop
changed behavior in transformation job, that doesn't fail at first error
This commit is contained in:
parent
3c6fc2096c
commit
aed29156c7
|
@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.utils.DHPUtils.*;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
@ -126,7 +127,8 @@ public class TransformSparkJobNode {
|
||||||
JavaRDD<MetadataRecord> mdstore = inputMDStore
|
JavaRDD<MetadataRecord> mdstore = inputMDStore
|
||||||
.javaRDD()
|
.javaRDD()
|
||||||
.repartition(getRepartitionNumber(totalInput, rpt))
|
.repartition(getRepartitionNumber(totalInput, rpt))
|
||||||
.map((Function<MetadataRecord, MetadataRecord>) x::call);
|
.map((Function<MetadataRecord, MetadataRecord>) x::call)
|
||||||
|
.filter((Function<MetadataRecord, Boolean>) Objects::nonNull);
|
||||||
saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH);
|
saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH);
|
||||||
|
|
||||||
log.info("Transformed item {}", ct.getProcessedItems().count());
|
log.info("Transformed item {}", ct.getProcessedItems().count());
|
||||||
|
|
|
@ -81,7 +81,8 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
|
||||||
return value;
|
return value;
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
aggregationCounter.getErrorItems().add(1);
|
aggregationCounter.getErrorItems().add(1);
|
||||||
throw new RuntimeException(e);
|
return null;
|
||||||
|
// throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue