1
0
Fork 0

refactoring due to compilation

This commit is contained in:
Miriam Baglioni 2020-08-24 17:07:34 +02:00
parent 428f1022fd
commit 8694bb9b31
3 changed files with 14 additions and 9 deletions

View File

@ -8,8 +8,6 @@ import java.io.StringReader;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.dump.oaf.graph.Funder;
import eu.dnetlib.dhp.schema.dump.oaf.graph.Project;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
@ -91,7 +89,9 @@ public class DumpGraphEntities implements Serializable {
Class<E> inputClazz) { Class<E> inputClazz) {
Utils Utils
.readPath(spark, inputPath, inputClazz) .readPath(spark, inputPath, inputClazz)
.map((MapFunction<E, Datasource>) d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d), Encoders.bean(Datasource.class)) .map(
(MapFunction<E, Datasource>) d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d),
Encoders.bean(Datasource.class))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
@ -103,7 +103,9 @@ public class DumpGraphEntities implements Serializable {
Class<E> inputClazz) { Class<E> inputClazz) {
Utils Utils
.readPath(spark, inputPath, inputClazz) .readPath(spark, inputPath, inputClazz)
.map((MapFunction<E,Project>) p -> mapProject((eu.dnetlib.dhp.schema.oaf.Project) p), Encoders.bean(Project.class)) .map(
(MapFunction<E, Project>) p -> mapProject((eu.dnetlib.dhp.schema.oaf.Project) p),
Encoders.bean(Project.class))
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
@ -445,7 +447,9 @@ public class DumpGraphEntities implements Serializable {
Class<E> inputClazz) { Class<E> inputClazz) {
Utils Utils
.readPath(spark, inputPath, inputClazz) .readPath(spark, inputPath, inputClazz)
.map((MapFunction<E, Organization>) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o), Encoders.bean(Organization.class)) .map(
(MapFunction<E, Organization>) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o),
Encoders.bean(Organization.class))
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")

View File

@ -8,6 +8,7 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
@ -67,7 +68,7 @@ public class SparkDumpRelationJob implements Serializable {
private static void dumpRelation(SparkSession spark, String inputPath, String outputPath) { private static void dumpRelation(SparkSession spark, String inputPath, String outputPath) {
Utils Utils
.readPath(spark, inputPath, Relation.class) .readPath(spark, inputPath, Relation.class)
.map(relation -> { .map((MapFunction<Relation, eu.dnetlib.dhp.schema.dump.oaf.graph.Relation>) relation -> {
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation rel = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation(); eu.dnetlib.dhp.schema.dump.oaf.graph.Relation rel = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation();
rel rel
.setSource( .setSource(