1
0
Fork 0

[bulktagging] fixed workflow parameters

This commit is contained in:
Claudio Atzori 2023-12-05 09:08:48 +01:00
parent 7c3041b276
commit 3c3bdb8318
3 changed files with 5 additions and 23 deletions

View File

@ -135,7 +135,7 @@ public class SparkBulkTagJob {
ModelSupport.entityTypes ModelSupport.entityTypes
.keySet() .keySet()
.parallelStream() .parallelStream()
.filter(e -> ModelSupport.isResult(e)) .filter(ModelSupport::isResult)
.forEach(e -> { .forEach(e -> {
removeOutputDir(spark, outputPath + e.name()); removeOutputDir(spark, outputPath + e.name());
ResultTagger resultTagger = new ResultTagger(); ResultTagger resultTagger = new ResultTagger();

View File

@ -1,5 +1,4 @@
[ [
{ {
"paramName":"s", "paramName":"s",
"paramLongName":"sourcePath", "paramLongName":"sourcePath",
@ -12,7 +11,6 @@
"paramDescription": "the json path associated to each selection field", "paramDescription": "the json path associated to each selection field",
"paramRequired": true "paramRequired": true
}, },
{ {
"paramName": "out", "paramName": "out",
"paramLongName": "outputPath", "paramLongName": "outputPath",
@ -25,19 +23,16 @@
"paramDescription": "true if the spark session is managed, false otherwise", "paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false "paramRequired": false
}, },
{ {
"paramName": "tg", "paramName": "tg",
"paramLongName": "taggingConf", "paramLongName": "taggingConf",
"paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed", "paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed",
"paramRequired": false "paramRequired": false
}, },
{ {
"paramName": "bu", "paramName": "bu",
"paramLongName": "baseURL", "paramLongName": "baseURL",
"paramDescription": "this parameter is to specify the api to be queried (beta or production)", "paramDescription": "this parameter is to specify the api to be queried (beta or production)",
"paramRequired": false "paramRequired": false
} }
] ]

View File

@ -12,21 +12,10 @@
<name>outputPath</name> <name>outputPath</name>
<description>the output path</description> <description>the output path</description>
</property> </property>
<property> <property>
<name>postgresURL</name> <name>baseURL</name>
<description>the url of the postgress server to query</description> <description>the community API base URL</description>
</property> </property>
<property>
<name>postgresUser</name>
<description>the username to access the postgres db</description>
</property>
<property>
<name>postgresPassword</name>
<description>the postgres password</description>
</property>
</parameters> </parameters>
<global> <global>
@ -104,7 +93,7 @@
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn-cluster</master> <master>yarn-cluster</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>bulkTagging-publication</name> <name>bulkTagging-result</name>
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class> <class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
<jar>dhp-enrichment-${projectVersion}.jar</jar> <jar>dhp-enrichment-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
@ -120,14 +109,12 @@
<arg>--sourcePath</arg><arg>${sourcePath}/</arg> <arg>--sourcePath</arg><arg>${sourcePath}/</arg>
<arg>--outputPath</arg><arg>${outputPath}/</arg> <arg>--outputPath</arg><arg>${outputPath}/</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg> <arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--production</arg><arg>${production}</arg> <arg>--baseURL</arg><arg>${baseURL}</arg>
</spark> </spark>
<ok to="End"/> <ok to="End"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<end name="End"/> <end name="End"/>
</workflow-app> </workflow-app>