forked from D-Net/dnet-hadoop
[bulktagging] fixed workflow parameters
This commit is contained in:
parent
7c3041b276
commit
3c3bdb8318
|
@ -135,7 +135,7 @@ public class SparkBulkTagJob {
|
||||||
ModelSupport.entityTypes
|
ModelSupport.entityTypes
|
||||||
.keySet()
|
.keySet()
|
||||||
.parallelStream()
|
.parallelStream()
|
||||||
.filter(e -> ModelSupport.isResult(e))
|
.filter(ModelSupport::isResult)
|
||||||
.forEach(e -> {
|
.forEach(e -> {
|
||||||
removeOutputDir(spark, outputPath + e.name());
|
removeOutputDir(spark, outputPath + e.name());
|
||||||
ResultTagger resultTagger = new ResultTagger();
|
ResultTagger resultTagger = new ResultTagger();
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
[
|
[
|
||||||
|
|
||||||
{
|
{
|
||||||
"paramName":"s",
|
"paramName":"s",
|
||||||
"paramLongName":"sourcePath",
|
"paramLongName":"sourcePath",
|
||||||
|
@ -12,7 +11,6 @@
|
||||||
"paramDescription": "the json path associated to each selection field",
|
"paramDescription": "the json path associated to each selection field",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"paramName": "out",
|
"paramName": "out",
|
||||||
"paramLongName": "outputPath",
|
"paramLongName": "outputPath",
|
||||||
|
@ -25,19 +23,16 @@
|
||||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"paramName": "tg",
|
"paramName": "tg",
|
||||||
"paramLongName": "taggingConf",
|
"paramLongName": "taggingConf",
|
||||||
"paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed",
|
"paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed",
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"paramName": "bu",
|
"paramName": "bu",
|
||||||
"paramLongName": "baseURL",
|
"paramLongName": "baseURL",
|
||||||
"paramDescription": "this parameter is to specify the api to be queried (beta or production)",
|
"paramDescription": "this parameter is to specify the api to be queried (beta or production)",
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
}
|
}
|
||||||
|
|
||||||
]
|
]
|
|
@ -12,21 +12,10 @@
|
||||||
<name>outputPath</name>
|
<name>outputPath</name>
|
||||||
<description>the output path</description>
|
<description>the output path</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>postgresURL</name>
|
<name>baseURL</name>
|
||||||
<description>the url of the postgress server to query</description>
|
<description>the community API base URL</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
|
||||||
<name>postgresUser</name>
|
|
||||||
<description>the username to access the postgres db</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>postgresPassword</name>
|
|
||||||
<description>the postgres password</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
<global>
|
<global>
|
||||||
|
@ -104,7 +93,7 @@
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn-cluster</master>
|
<master>yarn-cluster</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>bulkTagging-publication</name>
|
<name>bulkTagging-result</name>
|
||||||
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
|
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
|
||||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
|
@ -120,14 +109,12 @@
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/</arg>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/</arg>
|
<arg>--outputPath</arg><arg>${outputPath}/</arg>
|
||||||
<arg>--pathMap</arg><arg>${pathMap}</arg>
|
<arg>--pathMap</arg><arg>${pathMap}</arg>
|
||||||
<arg>--production</arg><arg>${production}</arg>
|
<arg>--baseURL</arg><arg>${baseURL}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<end name="End"/>
|
<end name="End"/>
|
||||||
|
|
||||||
</workflow-app>
|
</workflow-app>
|
Loading…
Reference in New Issue