added code to filter out null originalId from the dump

This commit is contained in:
Miriam Baglioni 2020-07-29 18:28:21 +02:00
parent ef1d8aef17
commit 76bcab98ce
1 changed files with 2 additions and 2 deletions

View File

@ -82,7 +82,7 @@ public class DumpGraphEntities implements Serializable {
Utils Utils
.readPath(spark, inputPath, inputClazz) .readPath(spark, inputPath, inputClazz)
.map(d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d), Encoders.bean(Datasource.class)) .map(d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d), Encoders.bean(Datasource.class))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
@ -113,7 +113,7 @@ public class DumpGraphEntities implements Serializable {
datasource.setId(d.getId()); datasource.setId(d.getId());
Optional.ofNullable(d.getOriginalId()).ifPresent(oId -> datasource.setOriginalId(oId)); Optional.ofNullable(d.getOriginalId()).ifPresent(oId -> datasource.setOriginalId(oId.stream().filter(Objects::nonNull).collect(Collectors.toList())));
Optional Optional
.ofNullable(d.getPid()) .ofNullable(d.getPid())