9117_pubmed_affiliations_prod #357

Merged
claudio.atzori merged 4 commits from 9117_pubmed_affiliations_prod into master 2023-11-03 11:45:35 +01:00
4 changed files with 10 additions and 10 deletions
Showing only changes of commit 7e34dde774 - Show all commits

View File

@ -58,8 +58,8 @@ public class PrepareAffiliationRelations implements Serializable {
Boolean isSparkSessionManaged = Constants.isSparkSessionManaged(parser);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("inputPath");
log.info("inputPath: {}", inputPath);
final String crossrefInputPath = parser.get("crossrefInputPath");
log.info("crossrefInputPath: {}", crossrefInputPath);
final String pubmedInputPath = parser.get("pubmedInputPath");
log.info("pubmedInputPath: {}", pubmedInputPath);
@ -78,12 +78,12 @@ public class PrepareAffiliationRelations implements Serializable {
List<KeyValue> collectedFromCrossref = OafMapperUtils
.listKeyValues(ModelConstants.CROSSREF_ID, "Crossref");
JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelations(
spark, inputPath, collectedFromCrossref);
spark, crossrefInputPath, collectedFromCrossref);
List<KeyValue> collectedFromPubmed = OafMapperUtils
.listKeyValues(ModelConstants.PUBMED_CENTRAL_ID, "Pubmed");
JavaPairRDD<Text, Text> pubmedRelations = prepareAffiliationRelations(
spark, inputPath, collectedFromPubmed);
spark, pubmedInputPath, collectedFromPubmed);
crossrefRelations
.union(pubmedRelations)

View File

@ -6,8 +6,8 @@
"paramRequired": false
},
{
"paramName": "ip",
"paramLongName": "inputPath",
"paramName": "cip",
"paramLongName": "crossrefInputPath",
"paramDescription": "the path to get the input data from Crossref",
"paramRequired": true
},

View File

@ -31,6 +31,6 @@ spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListen
# The following is needed as a property of a workflow
oozie.wf.application.path=${oozieTopWfApplicationPath}
inputPath=/data/bip-affiliations/data.json
pubmedInputPath=/data/bip-affiiations/pubmed-data.json
crossrefInputPath=/data/bip-affiliations/data.json
pubmedInputPath=/data/bip-affiliations/pubmed-data.json
outputPath=/tmp/crossref-affiliations-output-v5

View File

@ -2,7 +2,7 @@
<parameters>
<property>
<name>inputPath</name>
<name>crossrefInputPath</name>
<description>the path where to find the inferred affiliation relations from Crossref</description>
</property>
<property>
@ -100,7 +100,7 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--inputPath</arg><arg>${inputPath}</arg>
<arg>--crossrefInputPath</arg><arg>${crossrefInputPath}</arg>
<arg>--pubmedInputPath</arg><arg>${pubmedInputPath}</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
</spark>