posprocessing script correctly run as hive2 action

This commit is contained in:
Claudio Atzori 2020-03-23 17:40:39 +01:00
parent 658d40ccbe
commit 8b0ba3d76a
3 changed files with 13 additions and 7 deletions

View File

@ -19,6 +19,10 @@
<name>hive_metastore_uris</name> <name>hive_metastore_uris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value> <value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property> </property>
<property>
<name>hive_jdbc_url</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property> <property>
<name>hive_db_name</name> <name>hive_db_name</name>
<value>openaire</value> <value>openaire</value>

View File

@ -1,4 +1,6 @@
CREATE view result as DROP VIEW IF EXISTS ${hive_db_name}.result;
CREATE VIEW IF NOT EXISTS result as
select id, dateofcollection, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hive_db_name}.publication p select id, dateofcollection, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hive_db_name}.publication p
union all union all
select id, dateofcollection, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hive_db_name}.dataset d select id, dateofcollection, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hive_db_name}.dataset d

View File

@ -38,17 +38,17 @@
</configuration> </configuration>
</global> </global>
<start to="MapGraphIntoDataFrame"/> <start to="MapGraphAsHiveDB"/>
<kill name="Kill"> <kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill> </kill>
<action name="MapGraphIntoDataFrame"> <action name="MapGraphAsHiveDB">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>MapGraphIntoDataFrame</name> <name>MapGraphAsHiveDB</name>
<class>eu.dnetlib.dhp.graph.SparkGraphImporterJob</class> <class>eu.dnetlib.dhp.graph.SparkGraphImporterJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
@ -74,11 +74,11 @@
<name-node>${nameNode}</name-node> <name-node>${nameNode}</name-node>
<configuration> <configuration>
<property> <property>
<name>oozie.hive.defaults</name> <name>hive.metastore.uris</name>
<value>hive-site.xml</value> <value>${hive_metastore_uris}</value>
</property> </property>
</configuration> </configuration>
<jdbc-url>jdbc:hive2://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/${hive_db_name}</jdbc-url> <jdbc-url>${hive_jdbc_url}/${hive_db_name}</jdbc-url>
<script>lib/scripts/postprocessing.sql</script> <script>lib/scripts/postprocessing.sql</script>
<param>hive_db_name=${hive_db_name}</param> <param>hive_db_name=${hive_db_name}</param>
</hive2> </hive2>