added code to filter out null originalId from the dump

This commit is contained in:
Miriam Baglioni 2020-07-29 18:28:21 +02:00
parent ef1d8aef17
commit 76bcab98ce
1 changed files with 2 additions and 2 deletions

View File

@ -82,7 +82,7 @@ public class DumpGraphEntities implements Serializable {
Utils
.readPath(spark, inputPath, inputClazz)
.map(d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d), Encoders.bean(Datasource.class))
.filter(Objects::nonNull)
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
@ -113,7 +113,7 @@ public class DumpGraphEntities implements Serializable {
datasource.setId(d.getId());
Optional.ofNullable(d.getOriginalId()).ifPresent(oId -> datasource.setOriginalId(oId));
Optional.ofNullable(d.getOriginalId()).ifPresent(oId -> datasource.setOriginalId(oId.stream().filter(Objects::nonNull).collect(Collectors.toList())));
Optional
.ofNullable(d.getPid())