[AffRo] used the collectedfrom openaire for all the relations imported as affRo output

This commit is contained in:
Miriam Baglioni 2024-09-25 17:23:49 +02:00
parent d0eba032cd
commit 0765641979
1 changed files with 9 additions and 18 deletions

View File

@ -98,35 +98,26 @@ public class PrepareAffiliationRelations implements Serializable {
private static void createActionSet(SparkSession spark, String crossrefInputPath, String pubmedInputPath, private static void createActionSet(SparkSession spark, String crossrefInputPath, String pubmedInputPath,
String openapcInputPath, String dataciteInputPath, String webcrawlInputPath, String publisherlInputPath, String openapcInputPath, String dataciteInputPath, String webcrawlInputPath, String publisherlInputPath,
String outputPath) { String outputPath) {
List<KeyValue> collectedFromCrossref = OafMapperUtils List<KeyValue> collectedfromOpenAIRE = OafMapperUtils
.listKeyValues(ModelConstants.CROSSREF_ID, "Crossref"); .listKeyValues(OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME);
JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelations( JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelations(
spark, crossrefInputPath, collectedFromCrossref); spark, crossrefInputPath, collectedfromOpenAIRE);
List<KeyValue> collectedFromPubmed = OafMapperUtils
.listKeyValues(ModelConstants.PUBMED_CENTRAL_ID, "Pubmed");
JavaPairRDD<Text, Text> pubmedRelations = prepareAffiliationRelations( JavaPairRDD<Text, Text> pubmedRelations = prepareAffiliationRelations(
spark, pubmedInputPath, collectedFromPubmed); spark, pubmedInputPath, collectedfromOpenAIRE);
List<KeyValue> collectedFromOpenAPC = OafMapperUtils
.listKeyValues(ModelConstants.OPEN_APC_ID, "OpenAPC");
JavaPairRDD<Text, Text> openAPCRelations = prepareAffiliationRelations( JavaPairRDD<Text, Text> openAPCRelations = prepareAffiliationRelations(
spark, openapcInputPath, collectedFromOpenAPC); spark, openapcInputPath, collectedfromOpenAIRE);
List<KeyValue> collectedFromDatacite = OafMapperUtils
.listKeyValues(ModelConstants.DATACITE_ID, "Datacite");
JavaPairRDD<Text, Text> dataciteRelations = prepareAffiliationRelations( JavaPairRDD<Text, Text> dataciteRelations = prepareAffiliationRelations(
spark, dataciteInputPath, collectedFromDatacite); spark, dataciteInputPath, collectedfromOpenAIRE);
List<KeyValue> collectedFromWebCrawl = OafMapperUtils
.listKeyValues(OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME);
JavaPairRDD<Text, Text> webCrawlRelations = prepareAffiliationRelations( JavaPairRDD<Text, Text> webCrawlRelations = prepareAffiliationRelations(
spark, webcrawlInputPath, collectedFromWebCrawl); spark, webcrawlInputPath, collectedfromOpenAIRE);
List<KeyValue> collectedfromPublisher = OafMapperUtils
.listKeyValues(OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME);
JavaPairRDD<Text, Text> publisherRelations = prepareAffiliationRelationFromPublisher( JavaPairRDD<Text, Text> publisherRelations = prepareAffiliationRelationFromPublisher(
spark, publisherlInputPath, collectedfromPublisher); spark, publisherlInputPath, collectedfromOpenAIRE);
crossrefRelations crossrefRelations
.union(pubmedRelations) .union(pubmedRelations)