forked from D-Net/dnet-hadoop
added code to filter out null originalId from the dump
This commit is contained in:
parent
ef1d8aef17
commit
76bcab98ce
|
@ -82,7 +82,7 @@ public class DumpGraphEntities implements Serializable {
|
||||||
Utils
|
Utils
|
||||||
.readPath(spark, inputPath, inputClazz)
|
.readPath(spark, inputPath, inputClazz)
|
||||||
.map(d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d), Encoders.bean(Datasource.class))
|
.map(d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d), Encoders.bean(Datasource.class))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
|
@ -113,7 +113,7 @@ public class DumpGraphEntities implements Serializable {
|
||||||
|
|
||||||
datasource.setId(d.getId());
|
datasource.setId(d.getId());
|
||||||
|
|
||||||
Optional.ofNullable(d.getOriginalId()).ifPresent(oId -> datasource.setOriginalId(oId));
|
Optional.ofNullable(d.getOriginalId()).ifPresent(oId -> datasource.setOriginalId(oId.stream().filter(Objects::nonNull).collect(Collectors.toList())));
|
||||||
|
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(d.getPid())
|
.ofNullable(d.getPid())
|
||||||
|
|
Loading…
Reference in New Issue