9117_pubmed_affiliations_prod #357
|
@ -58,8 +58,8 @@ public class PrepareAffiliationRelations implements Serializable {
|
|||
Boolean isSparkSessionManaged = Constants.isSparkSessionManaged(parser);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String inputPath = parser.get("inputPath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
final String crossrefInputPath = parser.get("crossrefInputPath");
|
||||
log.info("crossrefInputPath: {}", crossrefInputPath);
|
||||
|
||||
final String pubmedInputPath = parser.get("pubmedInputPath");
|
||||
log.info("pubmedInputPath: {}", pubmedInputPath);
|
||||
|
@ -78,12 +78,12 @@ public class PrepareAffiliationRelations implements Serializable {
|
|||
List<KeyValue> collectedFromCrossref = OafMapperUtils
|
||||
.listKeyValues(ModelConstants.CROSSREF_ID, "Crossref");
|
||||
JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelations(
|
||||
spark, inputPath, collectedFromCrossref);
|
||||
spark, crossrefInputPath, collectedFromCrossref);
|
||||
|
||||
List<KeyValue> collectedFromPubmed = OafMapperUtils
|
||||
.listKeyValues(ModelConstants.PUBMED_CENTRAL_ID, "Pubmed");
|
||||
JavaPairRDD<Text, Text> pubmedRelations = prepareAffiliationRelations(
|
||||
spark, inputPath, collectedFromPubmed);
|
||||
spark, pubmedInputPath, collectedFromPubmed);
|
||||
|
||||
crossrefRelations
|
||||
.union(pubmedRelations)
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "ip",
|
||||
"paramLongName": "inputPath",
|
||||
"paramName": "cip",
|
||||
"paramLongName": "crossrefInputPath",
|
||||
"paramDescription": "the path to get the input data from Crossref",
|
||||
"paramRequired": true
|
||||
},
|
||||
|
|
|
@ -31,6 +31,6 @@ spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListen
|
|||
# The following is needed as a property of a workflow
|
||||
oozie.wf.application.path=${oozieTopWfApplicationPath}
|
||||
|
||||
inputPath=/data/bip-affiliations/data.json
|
||||
pubmedInputPath=/data/bip-affiiations/pubmed-data.json
|
||||
crossrefInputPath=/data/bip-affiliations/data.json
|
||||
pubmedInputPath=/data/bip-affiliations/pubmed-data.json
|
||||
outputPath=/tmp/crossref-affiliations-output-v5
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
<parameters>
|
||||
|
||||
<property>
|
||||
<name>inputPath</name>
|
||||
<name>crossrefInputPath</name>
|
||||
<description>the path where to find the inferred affiliation relations from Crossref</description>
|
||||
</property>
|
||||
<property>
|
||||
|
@ -100,7 +100,7 @@
|
|||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||
</spark-opts>
|
||||
<arg>--inputPath</arg><arg>${inputPath}</arg>
|
||||
<arg>--crossrefInputPath</arg><arg>${crossrefInputPath}</arg>
|
||||
<arg>--pubmedInputPath</arg><arg>${pubmedInputPath}</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||
</spark>
|
||||
|
|
Loading…
Reference in New Issue