From 7df492fa346bb20f614a2d06e7dbeb5085338f49 Mon Sep 17 00:00:00 2001
From: Miriam Baglioni <miriam.baglioni@isti.cnr.it>
Date: Tue, 6 Aug 2024 16:27:35 +0200
Subject: [PATCH] [AffiliationFromPublisher]refactoring

---
 .../PrepareAffiliationRelations.java          | 82 ++++++++++---------
 1 file changed, 42 insertions(+), 40 deletions(-)
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java
index 03b8fa73a..1964edce1 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java
@@ -89,49 +89,51 @@ public class PrepareAffiliationRelations implements Serializable {
 			isSparkSessionManaged,
 			spark -> {
 				Constants.removeOutputDir(spark, outputPath);
-
-				List<KeyValue> collectedFromCrossref = OafMapperUtils
-					.listKeyValues(ModelConstants.CROSSREF_ID, "Crossref");
-				JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelations(
-					spark, crossrefInputPath, collectedFromCrossref);
-
-				List<KeyValue> collectedFromPubmed = OafMapperUtils
-					.listKeyValues(ModelConstants.PUBMED_CENTRAL_ID, "Pubmed");
-				JavaPairRDD<Text, Text> pubmedRelations = prepareAffiliationRelations(
-					spark, pubmedInputPath, collectedFromPubmed);
-
-				List<KeyValue> collectedFromOpenAPC = OafMapperUtils
-					.listKeyValues(ModelConstants.OPEN_APC_ID, "OpenAPC");
-				JavaPairRDD<Text, Text> openAPCRelations = prepareAffiliationRelations(
-					spark, openapcInputPath, collectedFromOpenAPC);
-
-				List<KeyValue> collectedFromDatacite = OafMapperUtils
-					.listKeyValues(ModelConstants.DATACITE_ID, "Datacite");
-				JavaPairRDD<Text, Text> dataciteRelations = prepareAffiliationRelations(
-					spark, dataciteInputPath, collectedFromDatacite);
-
-				List<KeyValue> collectedFromWebCrawl = OafMapperUtils
-					.listKeyValues(OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME);
-				JavaPairRDD<Text, Text> webCrawlRelations = prepareAffiliationRelations(
-					spark, webcrawlInputPath, collectedFromWebCrawl);
-
-				List<KeyValue> collectedfromPublisher = OafMapperUtils
-						.listKeyValues(OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME);
-				JavaPairRDD<Text, Text> publisherRelations = prepareAffiliationRelationFromPublisher(
-						spark, publisherlInputPath, collectedfromPublisher);
-
-				crossrefRelations
-					.union(pubmedRelations)
-					.union(openAPCRelations)
-					.union(dataciteRelations)
-					.union(webCrawlRelations)
-						.union(publisherRelations)
-					.saveAsHadoopFile(
-						outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);
-
+				createActionSet(spark, crossrefInputPath, pubmedInputPath, openapcInputPath, dataciteInputPath, webcrawlInputPath, publisherlInputPath, outputPath);
 			});
 	}
 
+	private static void createActionSet(SparkSession spark, String crossrefInputPath, String pubmedInputPath, String openapcInputPath, String dataciteInputPath, String webcrawlInputPath, String publisherlInputPath, String outputPath) {
+		List<KeyValue> collectedFromCrossref = OafMapperUtils
+			.listKeyValues(ModelConstants.CROSSREF_ID, "Crossref");
+		JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelations(
+				spark, crossrefInputPath, collectedFromCrossref);
+
+		List<KeyValue> collectedFromPubmed = OafMapperUtils
+			.listKeyValues(ModelConstants.PUBMED_CENTRAL_ID, "Pubmed");
+		JavaPairRDD<Text, Text> pubmedRelations = prepareAffiliationRelations(
+				spark, pubmedInputPath, collectedFromPubmed);
+
+		List<KeyValue> collectedFromOpenAPC = OafMapperUtils
+			.listKeyValues(ModelConstants.OPEN_APC_ID, "OpenAPC");
+		JavaPairRDD<Text, Text> openAPCRelations = prepareAffiliationRelations(
+				spark, openapcInputPath, collectedFromOpenAPC);
+
+		List<KeyValue> collectedFromDatacite = OafMapperUtils
+			.listKeyValues(ModelConstants.DATACITE_ID, "Datacite");
+		JavaPairRDD<Text, Text> dataciteRelations = prepareAffiliationRelations(
+				spark, dataciteInputPath, collectedFromDatacite);
+
+		List<KeyValue> collectedFromWebCrawl = OafMapperUtils
+			.listKeyValues(OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME);
+		JavaPairRDD<Text, Text> webCrawlRelations = prepareAffiliationRelations(
+				spark, webcrawlInputPath, collectedFromWebCrawl);
+
+		List<KeyValue> collectedfromPublisher = OafMapperUtils
+				.listKeyValues(OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME);
+		JavaPairRDD<Text, Text> publisherRelations = prepareAffiliationRelationFromPublisher(
+				spark, publisherlInputPath, collectedfromPublisher);
+
+		crossrefRelations
+			.union(pubmedRelations)
+			.union(openAPCRelations)
+			.union(dataciteRelations)
+			.union(webCrawlRelations)
+				.union(publisherRelations)
+			.saveAsHadoopFile(
+					outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);
+	}
+
 	private static JavaPairRDD<Text,Text> prepareAffiliationRelationFromPublisher(SparkSession spark, String inputPath,
 																				  List<KeyValue> collectedfrom){
 		Dataset<Row> df = spark