added to the workflow the download of the topic excel file and one property needed to get the input path of the topic file in the hdfs filesystem

This commit is contained in:
Miriam Baglioni 2020-09-28 12:17:22 +02:00
parent c2abde4d9f
commit 0bf2d0db52
1 changed files with 21 additions and 4 deletions

View File

@ -10,6 +10,10 @@
<description>the url where to get the programme file</description> <description>the url where to get the programme file</description>
</property> </property>
<property>
<name>topicFileURL</name>
<description>the url where to get the topic file</description>
</property>
<property> <property>
<name>outputPath</name> <name>outputPath</name>
<description>path where to store the action set</description> <description>path where to store the action set</description>
@ -33,11 +37,11 @@
<action name="get_project_file"> <action name="get_project_file">
<java> <java>
<main-class>eu.dnetlib.dhp.actionmanager.project.csvutils.ReadCSV</main-class> <main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg> <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--fileURL</arg><arg>${projectFileURL}</arg> <arg>--fileURL</arg><arg>${projectFileURL}</arg>
<arg>--hdfsPath</arg><arg>${workingDir}/projects</arg> <arg>--hdfsPath</arg><arg>${workingDir}/projects</arg>
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject</arg> <arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.CSVProject</arg>
</java> </java>
<ok to="get_programme_file"/> <ok to="get_programme_file"/>
<error to="Kill"/> <error to="Kill"/>
@ -45,11 +49,23 @@
<action name="get_programme_file"> <action name="get_programme_file">
<java> <java>
<main-class>eu.dnetlib.dhp.actionmanager.project.csvutils.ReadCSV</main-class> <main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg> <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--fileURL</arg><arg>${programmeFileURL}</arg> <arg>--fileURL</arg><arg>${programmeFileURL}</arg>
<arg>--hdfsPath</arg><arg>${workingDir}/programme</arg> <arg>--hdfsPath</arg><arg>${workingDir}/programme</arg>
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme</arg> <arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme</arg>
</java>
<ok to="get_topic_file"/>
<error to="Kill"/>
</action>
<action name="get_topic_file">
<java>
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadExcel</main-class>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--fileURL</arg><arg>${topicFileURL}</arg>
<arg>--hdfsPath</arg><arg>${workingDir}/topic</arg>
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic</arg>
</java> </java>
<ok to="read_projects"/> <ok to="read_projects"/>
<error to="Kill"/> <error to="Kill"/>
@ -136,6 +152,7 @@
</spark-opts> </spark-opts>
<arg>--projectPath</arg><arg>${workingDir}/preparedProjects</arg> <arg>--projectPath</arg><arg>${workingDir}/preparedProjects</arg>
<arg>--programmePath</arg><arg>${workingDir}/preparedProgramme</arg> <arg>--programmePath</arg><arg>${workingDir}/preparedProgramme</arg>
<arg>--topicPath</arg><arg>${workingDir}/topic</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg> <arg>--outputPath</arg><arg>${outputPath}</arg>
</spark> </spark>
<ok to="End"/> <ok to="End"/>