added check to not dump datasources related to funders

This commit is contained in:
Miriam Baglioni 2020-07-29 17:56:18 +02:00
parent b48934f6df
commit 40e194fe2f
1 changed files with 10 additions and 4 deletions

View File

@ -5,10 +5,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable; import java.io.Serializable;
import java.io.StringReader; import java.io.StringReader;
import java.util.ArrayList; import java.util.*;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import javax.swing.text.html.Option; import javax.swing.text.html.Option;
@ -85,6 +82,7 @@ public class DumpGraphEntities implements Serializable {
Utils Utils
.readPath(spark, inputPath, inputClazz) .readPath(spark, inputPath, inputClazz)
.map(d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d), Encoders.bean(Datasource.class)) .map(d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d), Encoders.bean(Datasource.class))
.filter(Objects::nonNull)
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
@ -105,6 +103,14 @@ public class DumpGraphEntities implements Serializable {
private static Datasource mapDatasource(eu.dnetlib.dhp.schema.oaf.Datasource d) { private static Datasource mapDatasource(eu.dnetlib.dhp.schema.oaf.Datasource d) {
Datasource datasource = new Datasource(); Datasource datasource = new Datasource();
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> odstype = Optional.ofNullable(d.getDatasourcetype());
if(odstype.isPresent()){
if (odstype.get().getClassid().equals(Constants.FUNDER_DS)){
return null;
}
}
datasource.setId(d.getId()); datasource.setId(d.getId());
Optional.ofNullable(d.getOriginalId()).ifPresent(oId -> datasource.setOriginalId(oId)); Optional.ofNullable(d.getOriginalId()).ifPresent(oId -> datasource.setOriginalId(oId));