forked from antonis.lempesis/dnet-hadoop
code formatting
This commit is contained in:
parent
db8b90a156
commit
1781609508
|
@ -1,11 +1,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.merge;
|
package eu.dnetlib.dhp.oa.graph.merge;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import java.util.Objects;
|
||||||
import eu.dnetlib.dhp.oa.graph.clean.CleanGraphSparkJob;
|
import java.util.Optional;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
@ -16,13 +16,16 @@ import org.apache.spark.sql.SaveMode;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.clean.CleanGraphSparkJob;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.Optional;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Combines the content from two aggregator graph tables of the same type, entities (or relationships) with the same ids
|
* Combines the content from two aggregator graph tables of the same type, entities (or relationships) with the same ids
|
||||||
* are picked preferring those from the BETA aggregator rather then from PROD. The identity of a relationship is defined
|
* are picked preferring those from the BETA aggregator rather then from PROD. The identity of a relationship is defined
|
||||||
|
@ -88,7 +91,8 @@ public class MergeGraphSparkJob {
|
||||||
Dataset<Tuple2<String, B>> beta = readTableFromPath(spark, betaInputPath, b_clazz);
|
Dataset<Tuple2<String, B>> beta = readTableFromPath(spark, betaInputPath, b_clazz);
|
||||||
Dataset<Tuple2<String, P>> prod = readTableFromPath(spark, prodInputPath, p_clazz);
|
Dataset<Tuple2<String, P>> prod = readTableFromPath(spark, prodInputPath, p_clazz);
|
||||||
|
|
||||||
prod.joinWith(beta, prod.col("_1").equalTo(beta.col("_1")), "full_outer")
|
prod
|
||||||
|
.joinWith(beta, prod.col("_1").equalTo(beta.col("_1")), "full_outer")
|
||||||
.map((MapFunction<Tuple2<Tuple2<String, P>, Tuple2<String, B>>, P>) value -> {
|
.map((MapFunction<Tuple2<Tuple2<String, P>, Tuple2<String, B>>, P>) value -> {
|
||||||
Optional<P> p = Optional.ofNullable(value._1()).map(Tuple2::_2);
|
Optional<P> p = Optional.ofNullable(value._1()).map(Tuple2::_2);
|
||||||
Optional<B> b = Optional.ofNullable(value._2()).map(Tuple2::_2);
|
Optional<B> b = Optional.ofNullable(value._2()).map(Tuple2::_2);
|
||||||
|
|
Loading…
Reference in New Issue