forked from D-Net/dnet-hadoop
added to the workflow the download of the topic excel file and one property needed to get the input path of the topic file in the hdfs filesystem
This commit is contained in:
parent
c2abde4d9f
commit
0bf2d0db52
|
@ -10,6 +10,10 @@
|
||||||
<description>the url where to get the programme file</description>
|
<description>the url where to get the programme file</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>topicFileURL</name>
|
||||||
|
<description>the url where to get the topic file</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>outputPath</name>
|
<name>outputPath</name>
|
||||||
<description>path where to store the action set</description>
|
<description>path where to store the action set</description>
|
||||||
|
@ -33,11 +37,11 @@
|
||||||
|
|
||||||
<action name="get_project_file">
|
<action name="get_project_file">
|
||||||
<java>
|
<java>
|
||||||
<main-class>eu.dnetlib.dhp.actionmanager.project.csvutils.ReadCSV</main-class>
|
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV</main-class>
|
||||||
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||||
<arg>--fileURL</arg><arg>${projectFileURL}</arg>
|
<arg>--fileURL</arg><arg>${projectFileURL}</arg>
|
||||||
<arg>--hdfsPath</arg><arg>${workingDir}/projects</arg>
|
<arg>--hdfsPath</arg><arg>${workingDir}/projects</arg>
|
||||||
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject</arg>
|
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.CSVProject</arg>
|
||||||
</java>
|
</java>
|
||||||
<ok to="get_programme_file"/>
|
<ok to="get_programme_file"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -45,11 +49,23 @@
|
||||||
|
|
||||||
<action name="get_programme_file">
|
<action name="get_programme_file">
|
||||||
<java>
|
<java>
|
||||||
<main-class>eu.dnetlib.dhp.actionmanager.project.csvutils.ReadCSV</main-class>
|
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV</main-class>
|
||||||
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||||
<arg>--fileURL</arg><arg>${programmeFileURL}</arg>
|
<arg>--fileURL</arg><arg>${programmeFileURL}</arg>
|
||||||
<arg>--hdfsPath</arg><arg>${workingDir}/programme</arg>
|
<arg>--hdfsPath</arg><arg>${workingDir}/programme</arg>
|
||||||
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme</arg>
|
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="get_topic_file"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="get_topic_file">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.actionmanager.project.utils.ReadExcel</main-class>
|
||||||
|
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--fileURL</arg><arg>${topicFileURL}</arg>
|
||||||
|
<arg>--hdfsPath</arg><arg>${workingDir}/topic</arg>
|
||||||
|
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic</arg>
|
||||||
</java>
|
</java>
|
||||||
<ok to="read_projects"/>
|
<ok to="read_projects"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -136,6 +152,7 @@
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--projectPath</arg><arg>${workingDir}/preparedProjects</arg>
|
<arg>--projectPath</arg><arg>${workingDir}/preparedProjects</arg>
|
||||||
<arg>--programmePath</arg><arg>${workingDir}/preparedProgramme</arg>
|
<arg>--programmePath</arg><arg>${workingDir}/preparedProgramme</arg>
|
||||||
|
<arg>--topicPath</arg><arg>${workingDir}/topic</arg>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
|
|
Loading…
Reference in New Issue