dump #50
|
@ -8,8 +8,6 @@ import java.io.StringReader;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.Funder;
|
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.Project;
|
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
|
@ -91,7 +89,9 @@ public class DumpGraphEntities implements Serializable {
|
||||||
Class<E> inputClazz) {
|
Class<E> inputClazz) {
|
||||||
Utils
|
Utils
|
||||||
.readPath(spark, inputPath, inputClazz)
|
.readPath(spark, inputPath, inputClazz)
|
||||||
.map((MapFunction<E, Datasource>) d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d), Encoders.bean(Datasource.class))
|
.map(
|
||||||
|
(MapFunction<E, Datasource>) d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d),
|
||||||
|
Encoders.bean(Datasource.class))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
|
@ -103,7 +103,9 @@ public class DumpGraphEntities implements Serializable {
|
||||||
Class<E> inputClazz) {
|
Class<E> inputClazz) {
|
||||||
Utils
|
Utils
|
||||||
.readPath(spark, inputPath, inputClazz)
|
.readPath(spark, inputPath, inputClazz)
|
||||||
.map((MapFunction<E,Project>) p -> mapProject((eu.dnetlib.dhp.schema.oaf.Project) p), Encoders.bean(Project.class))
|
.map(
|
||||||
|
(MapFunction<E, Project>) p -> mapProject((eu.dnetlib.dhp.schema.oaf.Project) p),
|
||||||
|
Encoders.bean(Project.class))
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
|
@ -445,7 +447,9 @@ public class DumpGraphEntities implements Serializable {
|
||||||
Class<E> inputClazz) {
|
Class<E> inputClazz) {
|
||||||
Utils
|
Utils
|
||||||
.readPath(spark, inputPath, inputClazz)
|
.readPath(spark, inputPath, inputClazz)
|
||||||
.map((MapFunction<E, Organization>) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o), Encoders.bean(Organization.class))
|
.map(
|
||||||
|
(MapFunction<E, Organization>) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o),
|
||||||
|
Encoders.bean(Organization.class))
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
|
|
|
@ -49,9 +49,9 @@ public class SparkCollectAndSave implements Serializable {
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
final Boolean aggregateResult = Optional
|
final Boolean aggregateResult = Optional
|
||||||
.ofNullable(parser.get("resultAggregation"))
|
.ofNullable(parser.get("resultAggregation"))
|
||||||
.map(Boolean::valueOf)
|
.map(Boolean::valueOf)
|
||||||
.orElse(Boolean.TRUE);
|
.orElse(Boolean.TRUE);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,7 @@ import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.SaveMode;
|
import org.apache.spark.sql.SaveMode;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
@ -67,7 +68,7 @@ public class SparkDumpRelationJob implements Serializable {
|
||||||
private static void dumpRelation(SparkSession spark, String inputPath, String outputPath) {
|
private static void dumpRelation(SparkSession spark, String inputPath, String outputPath) {
|
||||||
Utils
|
Utils
|
||||||
.readPath(spark, inputPath, Relation.class)
|
.readPath(spark, inputPath, Relation.class)
|
||||||
.map(relation -> {
|
.map((MapFunction<Relation, eu.dnetlib.dhp.schema.dump.oaf.graph.Relation>) relation -> {
|
||||||
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation rel = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation();
|
eu.dnetlib.dhp.schema.dump.oaf.graph.Relation rel = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation();
|
||||||
rel
|
rel
|
||||||
.setSource(
|
.setSource(
|
||||||
|
|
Loading…
Reference in New Issue