[AffiliationFromPublisher]refactoring

This commit is contained in:
Miriam Baglioni 2024-08-06 16:27:35 +02:00
parent 675de07138
commit 7df492fa34
1 changed files with 42 additions and 40 deletions

View File

@ -89,7 +89,11 @@ public class PrepareAffiliationRelations implements Serializable {
isSparkSessionManaged,
spark -> {
Constants.removeOutputDir(spark, outputPath);
createActionSet(spark, crossrefInputPath, pubmedInputPath, openapcInputPath, dataciteInputPath, webcrawlInputPath, publisherlInputPath, outputPath);
});
}
private static void createActionSet(SparkSession spark, String crossrefInputPath, String pubmedInputPath, String openapcInputPath, String dataciteInputPath, String webcrawlInputPath, String publisherlInputPath, String outputPath) {
List<KeyValue> collectedFromCrossref = OafMapperUtils
.listKeyValues(ModelConstants.CROSSREF_ID, "Crossref");
JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelations(
@ -128,8 +132,6 @@ public class PrepareAffiliationRelations implements Serializable {
.union(publisherRelations)
.saveAsHadoopFile(
outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);
});
}
private static JavaPairRDD<Text,Text> prepareAffiliationRelationFromPublisher(SparkSession spark, String inputPath,