1
0
Fork 0

Renaming input param for crossref input path

This commit is contained in:
Serafeim Chatzopoulos 2023-10-25 12:05:02 -07:00
parent aad5982bf1
commit a82aaf57b2
4 changed files with 10 additions and 10 deletions

View File

@ -58,8 +58,8 @@ public class PrepareAffiliationRelations implements Serializable {
Boolean isSparkSessionManaged = Constants.isSparkSessionManaged(parser); Boolean isSparkSessionManaged = Constants.isSparkSessionManaged(parser);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged); log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("inputPath"); final String crossrefInputPath = parser.get("crossrefInputPath");
log.info("inputPath: {}", inputPath); log.info("crossrefInputPath: {}", crossrefInputPath);
final String pubmedInputPath = parser.get("pubmedInputPath"); final String pubmedInputPath = parser.get("pubmedInputPath");
log.info("pubmedInputPath: {}", pubmedInputPath); log.info("pubmedInputPath: {}", pubmedInputPath);
@ -78,12 +78,12 @@ public class PrepareAffiliationRelations implements Serializable {
List<KeyValue> collectedFromCrossref = OafMapperUtils List<KeyValue> collectedFromCrossref = OafMapperUtils
.listKeyValues(ModelConstants.CROSSREF_ID, "Crossref"); .listKeyValues(ModelConstants.CROSSREF_ID, "Crossref");
JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelations( JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelations(
spark, inputPath, collectedFromCrossref); spark, crossrefInputPath, collectedFromCrossref);
List<KeyValue> collectedFromPubmed = OafMapperUtils List<KeyValue> collectedFromPubmed = OafMapperUtils
.listKeyValues(ModelConstants.PUBMED_CENTRAL_ID, "Pubmed"); .listKeyValues(ModelConstants.PUBMED_CENTRAL_ID, "Pubmed");
JavaPairRDD<Text, Text> pubmedRelations = prepareAffiliationRelations( JavaPairRDD<Text, Text> pubmedRelations = prepareAffiliationRelations(
spark, inputPath, collectedFromPubmed); spark, pubmedInputPath, collectedFromPubmed);
crossrefRelations crossrefRelations
.union(pubmedRelations) .union(pubmedRelations)

View File

@ -6,8 +6,8 @@
"paramRequired": false "paramRequired": false
}, },
{ {
"paramName": "ip", "paramName": "cip",
"paramLongName": "inputPath", "paramLongName": "crossrefInputPath",
"paramDescription": "the path to get the input data from Crossref", "paramDescription": "the path to get the input data from Crossref",
"paramRequired": true "paramRequired": true
}, },

View File

@ -31,6 +31,6 @@ spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListen
# The following is needed as a property of a workflow # The following is needed as a property of a workflow
oozie.wf.application.path=${oozieTopWfApplicationPath} oozie.wf.application.path=${oozieTopWfApplicationPath}
inputPath=/data/bip-affiliations/data.json crossrefInputPath=/data/bip-affiliations/data.json
pubmedInputPath=/data/bip-affiiations/pubmed-data.json pubmedInputPath=/data/bip-affiliations/pubmed-data.json
outputPath=/tmp/crossref-affiliations-output-v5 outputPath=/tmp/crossref-affiliations-output-v5

View File

@ -2,7 +2,7 @@
<parameters> <parameters>
<property> <property>
<name>inputPath</name> <name>crossrefInputPath</name>
<description>the path where to find the inferred affiliation relations from Crossref</description> <description>the path where to find the inferred affiliation relations from Crossref</description>
</property> </property>
<property> <property>
@ -100,7 +100,7 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts> </spark-opts>
<arg>--inputPath</arg><arg>${inputPath}</arg> <arg>--crossrefInputPath</arg><arg>${crossrefInputPath}</arg>
<arg>--pubmedInputPath</arg><arg>${pubmedInputPath}</arg> <arg>--pubmedInputPath</arg><arg>${pubmedInputPath}</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg> <arg>--outputPath</arg><arg>${outputPath}</arg>
</spark> </spark>