forked from D-Net/dnet-hadoop
Renaming input param for crossref input path
This commit is contained in:
parent
aad5982bf1
commit
a82aaf57b2
|
@ -58,8 +58,8 @@ public class PrepareAffiliationRelations implements Serializable {
|
||||||
Boolean isSparkSessionManaged = Constants.isSparkSessionManaged(parser);
|
Boolean isSparkSessionManaged = Constants.isSparkSessionManaged(parser);
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
final String inputPath = parser.get("inputPath");
|
final String crossrefInputPath = parser.get("crossrefInputPath");
|
||||||
log.info("inputPath: {}", inputPath);
|
log.info("crossrefInputPath: {}", crossrefInputPath);
|
||||||
|
|
||||||
final String pubmedInputPath = parser.get("pubmedInputPath");
|
final String pubmedInputPath = parser.get("pubmedInputPath");
|
||||||
log.info("pubmedInputPath: {}", pubmedInputPath);
|
log.info("pubmedInputPath: {}", pubmedInputPath);
|
||||||
|
@ -78,12 +78,12 @@ public class PrepareAffiliationRelations implements Serializable {
|
||||||
List<KeyValue> collectedFromCrossref = OafMapperUtils
|
List<KeyValue> collectedFromCrossref = OafMapperUtils
|
||||||
.listKeyValues(ModelConstants.CROSSREF_ID, "Crossref");
|
.listKeyValues(ModelConstants.CROSSREF_ID, "Crossref");
|
||||||
JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelations(
|
JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelations(
|
||||||
spark, inputPath, collectedFromCrossref);
|
spark, crossrefInputPath, collectedFromCrossref);
|
||||||
|
|
||||||
List<KeyValue> collectedFromPubmed = OafMapperUtils
|
List<KeyValue> collectedFromPubmed = OafMapperUtils
|
||||||
.listKeyValues(ModelConstants.PUBMED_CENTRAL_ID, "Pubmed");
|
.listKeyValues(ModelConstants.PUBMED_CENTRAL_ID, "Pubmed");
|
||||||
JavaPairRDD<Text, Text> pubmedRelations = prepareAffiliationRelations(
|
JavaPairRDD<Text, Text> pubmedRelations = prepareAffiliationRelations(
|
||||||
spark, inputPath, collectedFromPubmed);
|
spark, pubmedInputPath, collectedFromPubmed);
|
||||||
|
|
||||||
crossrefRelations
|
crossrefRelations
|
||||||
.union(pubmedRelations)
|
.union(pubmedRelations)
|
||||||
|
|
|
@ -6,8 +6,8 @@
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"paramName": "ip",
|
"paramName": "cip",
|
||||||
"paramLongName": "inputPath",
|
"paramLongName": "crossrefInputPath",
|
||||||
"paramDescription": "the path to get the input data from Crossref",
|
"paramDescription": "the path to get the input data from Crossref",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},
|
},
|
||||||
|
|
|
@ -31,6 +31,6 @@ spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListen
|
||||||
# The following is needed as a property of a workflow
|
# The following is needed as a property of a workflow
|
||||||
oozie.wf.application.path=${oozieTopWfApplicationPath}
|
oozie.wf.application.path=${oozieTopWfApplicationPath}
|
||||||
|
|
||||||
inputPath=/data/bip-affiliations/data.json
|
crossrefInputPath=/data/bip-affiliations/data.json
|
||||||
pubmedInputPath=/data/bip-affiiations/pubmed-data.json
|
pubmedInputPath=/data/bip-affiliations/pubmed-data.json
|
||||||
outputPath=/tmp/crossref-affiliations-output-v5
|
outputPath=/tmp/crossref-affiliations-output-v5
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
<parameters>
|
<parameters>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>inputPath</name>
|
<name>crossrefInputPath</name>
|
||||||
<description>the path where to find the inferred affiliation relations from Crossref</description>
|
<description>the path where to find the inferred affiliation relations from Crossref</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
|
@ -100,7 +100,7 @@
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputPath</arg><arg>${inputPath}</arg>
|
<arg>--crossrefInputPath</arg><arg>${crossrefInputPath}</arg>
|
||||||
<arg>--pubmedInputPath</arg><arg>${pubmedInputPath}</arg>
|
<arg>--pubmedInputPath</arg><arg>${pubmedInputPath}</arg>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
|
|
Loading…
Reference in New Issue