changed method signature from set of mapkey entries to String representing path on file system where to find the map

2020-08-07 17:32:27 +02:00 · 2020-08-07 17:32:27 +02:00 · ae1b7fbfdb
parent 931fa2ff00
commit ae1b7fbfdb
1 changed files with 12 additions and 5 deletions
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java
@ -1,10 +1,17 @@
-
+/**
 * This class splits the dumped results according to the research community - research initiative/infrastructure
 * they are related to. The information about the community is found in the element "context.id" in the result.
 * Since the context that can be found in the result can be associated not only to communities, a community Map
 * is provided. It will guide the splitting process.
 * Note: the repartition(1) just before writing the results related to a community. This is a choice due
 * to uploading constraints (just one file for each community) As soon as a better solution will be in place
 * remove the repartition
 */
 package eu.dnetlib.dhp.oa.graph.dump.community;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import java.io.Serializable;
 import java.util.Objects;
 import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
@ -19,19 +26,19 @@ import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
 public class CommunitySplit implements Serializable {
-	public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, CommunityMap communityMap) {
+	public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath) {
 		SparkConf conf = new SparkConf();
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				Utils.removeOutputDir(spark, outputPath);
-				execSplit(spark, inputPath, outputPath, communityMap.keySet());// , inputClazz);
+				execSplit(spark, inputPath, outputPath, Utils.getCommunityMap(spark, communityMapPath).keySet());
 			});
 	}
 	private static void execSplit(SparkSession spark, String inputPath, String outputPath,
-		Set<String> communities) {// }, Class<R> inputClazz) {
+		Set<String> communities) {
 		Dataset<CommunityResult> result = Utils
 			.readPath(spark, inputPath + "/publication", CommunityResult.class)