1
0
Fork 0

removed not needed variable

This commit is contained in:
Miriam Baglioni 2020-08-12 10:03:33 +02:00
parent 98d28bab5c
commit 7400cd019d
1 changed files with 15 additions and 9 deletions

View File

@ -8,11 +8,13 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable; import java.io.Serializable;
import java.util.*; import java.util.*;
import java.util.function.Consumer;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*; import org.apache.spark.sql.*;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -74,7 +76,6 @@ public class SparkOrganizationRelation implements Serializable {
Dataset<Relation> relationDataset = Utils.readPath(spark, inputPath, Relation.class); Dataset<Relation> relationDataset = Utils.readPath(spark, inputPath, Relation.class);
relationDataset.createOrReplaceTempView("relation"); relationDataset.createOrReplaceTempView("relation");
Set<String> organizationSet = organizationMap.keySet();
List<eu.dnetlib.dhp.schema.dump.oaf.graph.Relation> relList = new ArrayList<>(); List<eu.dnetlib.dhp.schema.dump.oaf.graph.Relation> relList = new ArrayList<>();
@ -95,15 +96,9 @@ public class SparkOrganizationRelation implements Serializable {
}, Encoders.bean(MergedRels.class)) }, Encoders.bean(MergedRels.class))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.collectAsList() .collectAsList()
.forEach(mergedRels -> { .forEach(getMergedRelsConsumer(organizationMap, relList));
String oId = mergedRels.getOrganizationId();
organizationSet.remove(oId);
organizationMap
.get(oId)
.forEach(community -> addRelations(relList, community, mergedRels.getRepresentativeId()));
});
organizationSet organizationMap.keySet()
.forEach( .forEach(
oId -> organizationMap oId -> organizationMap
.get(oId) .get(oId)
@ -118,6 +113,17 @@ public class SparkOrganizationRelation implements Serializable {
} }
@NotNull
private static Consumer<MergedRels> getMergedRelsConsumer(OrganizationMap organizationMap, List<eu.dnetlib.dhp.schema.dump.oaf.graph.Relation> relList) {
return mergedRels -> {
String oId = mergedRels.getOrganizationId();
organizationMap
.get(oId)
.forEach(community -> addRelations(relList, community, mergedRels.getRepresentativeId()));
organizationMap.remove(oId);
};
}
private static void addRelations(List<eu.dnetlib.dhp.schema.dump.oaf.graph.Relation> relList, String community, private static void addRelations(List<eu.dnetlib.dhp.schema.dump.oaf.graph.Relation> relList, String community,
String organization) { String organization) {