forked from D-Net/dnet-hadoop
[bulktagging] fixed workflow parameters
This commit is contained in:
parent
7c3041b276
commit
3c3bdb8318
|
@ -135,7 +135,7 @@ public class SparkBulkTagJob {
|
|||
ModelSupport.entityTypes
|
||||
.keySet()
|
||||
.parallelStream()
|
||||
.filter(e -> ModelSupport.isResult(e))
|
||||
.filter(ModelSupport::isResult)
|
||||
.forEach(e -> {
|
||||
removeOutputDir(spark, outputPath + e.name());
|
||||
ResultTagger resultTagger = new ResultTagger();
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
[
|
||||
|
||||
{
|
||||
"paramName":"s",
|
||||
"paramLongName":"sourcePath",
|
||||
|
@ -12,7 +11,6 @@
|
|||
"paramDescription": "the json path associated to each selection field",
|
||||
"paramRequired": true
|
||||
},
|
||||
|
||||
{
|
||||
"paramName": "out",
|
||||
"paramLongName": "outputPath",
|
||||
|
@ -25,19 +23,16 @@
|
|||
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||
"paramRequired": false
|
||||
},
|
||||
|
||||
{
|
||||
"paramName": "tg",
|
||||
"paramLongName": "taggingConf",
|
||||
"paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed",
|
||||
"paramRequired": false
|
||||
},
|
||||
|
||||
{
|
||||
"paramName": "bu",
|
||||
"paramLongName": "baseURL",
|
||||
"paramDescription": "this parameter is to specify the api to be queried (beta or production)",
|
||||
"paramRequired": false
|
||||
}
|
||||
|
||||
]
|
|
@ -12,21 +12,10 @@
|
|||
<name>outputPath</name>
|
||||
<description>the output path</description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>postgresURL</name>
|
||||
<description>the url of the postgress server to query</description>
|
||||
<name>baseURL</name>
|
||||
<description>the community API base URL</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>postgresUser</name>
|
||||
<description>the username to access the postgres db</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>postgresPassword</name>
|
||||
<description>the postgres password</description>
|
||||
</property>
|
||||
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
|
@ -104,7 +93,7 @@
|
|||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn-cluster</master>
|
||||
<mode>cluster</mode>
|
||||
<name>bulkTagging-publication</name>
|
||||
<name>bulkTagging-result</name>
|
||||
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
|
@ -120,14 +109,12 @@
|
|||
<arg>--sourcePath</arg><arg>${sourcePath}/</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/</arg>
|
||||
<arg>--pathMap</arg><arg>${pathMap}</arg>
|
||||
<arg>--production</arg><arg>${production}</arg>
|
||||
<arg>--baseURL</arg><arg>${baseURL}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
Loading…
Reference in New Issue