posprocessing script correctly run as hive2 action

This commit is contained in:
Claudio Atzori 2020-03-23 17:40:39 +01:00
parent 658d40ccbe
commit 8b0ba3d76a
3 changed files with 13 additions and 7 deletions

View File

@ -19,6 +19,10 @@
<name>hive_metastore_uris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hive_jdbc_url</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hive_db_name</name>
<value>openaire</value>

View File

@ -1,4 +1,6 @@
CREATE view result as
DROP VIEW IF EXISTS ${hive_db_name}.result;
CREATE VIEW IF NOT EXISTS result as
select id, dateofcollection, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hive_db_name}.publication p
union all
select id, dateofcollection, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hive_db_name}.dataset d

View File

@ -38,17 +38,17 @@
</configuration>
</global>
<start to="MapGraphIntoDataFrame"/>
<start to="MapGraphAsHiveDB"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="MapGraphIntoDataFrame">
<action name="MapGraphAsHiveDB">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>MapGraphIntoDataFrame</name>
<name>MapGraphAsHiveDB</name>
<class>eu.dnetlib.dhp.graph.SparkGraphImporterJob</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
@ -74,11 +74,11 @@
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>oozie.hive.defaults</name>
<value>hive-site.xml</value>
<name>hive.metastore.uris</name>
<value>${hive_metastore_uris}</value>
</property>
</configuration>
<jdbc-url>jdbc:hive2://iis-cdh5-test-gw.ocean.icm.edu.pl:21050/${hive_db_name}</jdbc-url>
<jdbc-url>${hive_jdbc_url}/${hive_db_name}</jdbc-url>
<script>lib/scripts/postprocessing.sql</script>
<param>hive_db_name=${hive_db_name}</param>
</hive2>