created structure of oozie wf for ORCID

This commit is contained in:
Sandro La Bruzzo 2020-04-20 10:24:57 +02:00
parent 4d0d9de07e
commit eef60bb9f4
3 changed files with 62 additions and 2 deletions

View File

@ -8,11 +8,12 @@ import java.util.Properties;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class OrcidDSManager { public class OrcidDSManager {
private static final Logger logger = Logger.getLogger(OrcidDSManager.class); private static final Logger logger = LoggerFactory.getLogger(OrcidDSManager.class);
private String hdfsServerUri; private String hdfsServerUri;
private String hadoopUsername; private String hadoopUsername;

View File

@ -0,0 +1,18 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.action.sharelib.for.java</name>
<value>spark2</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,41 @@
<workflow-app name="import Crossref from index into HDFS" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>workingPath</name>
<description>the working dir base path</description>
</property>
</parameters>
<start to="ResetWorkingPath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="ResetWorkingPath">
<fs>
<delete path='${workingPath}/input/orcid'/>
<mkdir path='${workingPath}/input/orcid'/>
</fs>
<ok to="ImportOrcidSummary"/>
<error to="Kill"/>
</action>
<action name="ImportOrcidSummary">
<java>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<main-class>eu.dnetlib.doiboost.orcid.OrcidDSManager</main-class>
<!-- ENRICO FILL THE RIGHT ARG SECTION -->
<arg>-t</arg><arg>${workingPath}/input/crossref/index_dump</arg>
<arg>-n</arg><arg>${nameNode}</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>