[bulktagging] fixed workflow parameters

This commit is contained in:
Claudio Atzori 2023-12-05 09:08:48 +01:00
parent 7c3041b276
commit 3c3bdb8318
3 changed files with 5 additions and 23 deletions

View File

@ -135,7 +135,7 @@ public class SparkBulkTagJob {
ModelSupport.entityTypes
.keySet()
.parallelStream()
.filter(e -> ModelSupport.isResult(e))
.filter(ModelSupport::isResult)
.forEach(e -> {
removeOutputDir(spark, outputPath + e.name());
ResultTagger resultTagger = new ResultTagger();

View File

@ -1,5 +1,4 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
@ -12,7 +11,6 @@
"paramDescription": "the json path associated to each selection field",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
@ -25,19 +23,16 @@
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName": "tg",
"paramLongName": "taggingConf",
"paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed",
"paramRequired": false
},
{
"paramName": "bu",
"paramLongName": "baseURL",
"paramDescription": "this parameter is to specify the api to be queried (beta or production)",
"paramRequired": false
}
]

View File

@ -12,21 +12,10 @@
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>postgresURL</name>
<description>the url of the postgress server to query</description>
<name>baseURL</name>
<description>the community API base URL</description>
</property>
<property>
<name>postgresUser</name>
<description>the username to access the postgres db</description>
</property>
<property>
<name>postgresPassword</name>
<description>the postgres password</description>
</property>
</parameters>
<global>
@ -104,7 +93,7 @@
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master>
<mode>cluster</mode>
<name>bulkTagging-publication</name>
<name>bulkTagging-result</name>
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts>
@ -120,14 +109,12 @@
<arg>--sourcePath</arg><arg>${sourcePath}/</arg>
<arg>--outputPath</arg><arg>${outputPath}/</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--production</arg><arg>${production}</arg>
<arg>--baseURL</arg><arg>${baseURL}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>