[AffiliationFromPublisher]refactoring
This commit is contained in:
parent
675de07138
commit
7df492fa34
|
@ -89,7 +89,11 @@ public class PrepareAffiliationRelations implements Serializable {
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
Constants.removeOutputDir(spark, outputPath);
|
Constants.removeOutputDir(spark, outputPath);
|
||||||
|
createActionSet(spark, crossrefInputPath, pubmedInputPath, openapcInputPath, dataciteInputPath, webcrawlInputPath, publisherlInputPath, outputPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void createActionSet(SparkSession spark, String crossrefInputPath, String pubmedInputPath, String openapcInputPath, String dataciteInputPath, String webcrawlInputPath, String publisherlInputPath, String outputPath) {
|
||||||
List<KeyValue> collectedFromCrossref = OafMapperUtils
|
List<KeyValue> collectedFromCrossref = OafMapperUtils
|
||||||
.listKeyValues(ModelConstants.CROSSREF_ID, "Crossref");
|
.listKeyValues(ModelConstants.CROSSREF_ID, "Crossref");
|
||||||
JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelations(
|
JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelations(
|
||||||
|
@ -128,8 +132,6 @@ public class PrepareAffiliationRelations implements Serializable {
|
||||||
.union(publisherRelations)
|
.union(publisherRelations)
|
||||||
.saveAsHadoopFile(
|
.saveAsHadoopFile(
|
||||||
outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);
|
outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);
|
||||||
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static JavaPairRDD<Text,Text> prepareAffiliationRelationFromPublisher(SparkSession spark, String inputPath,
|
private static JavaPairRDD<Text,Text> prepareAffiliationRelationFromPublisher(SparkSession spark, String inputPath,
|
||||||
|
|
Loading…
Reference in New Issue