convert_hive_to_spark_actions #1

Merged
antonis.lempesis merged 20 commits from convert_hive_to_spark_actions into beta 2024-09-23 13:53:29 +02:00
5 changed files with 11 additions and 2 deletions
Showing only changes of commit 0b897f2f66 - Show all commits

View File

@ -20,7 +20,8 @@ WHERE project_tmp.id IN (SELECT pr.id
${stats_db_name}.result r ${stats_db_name}.result r
WHERE pr.result = r.id WHERE pr.result = r.id
AND r.type = 'publication'); /*EOS*/ AND r.type = 'publication'); /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.stored purge; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.project purge; /*EOS*/
CREATE TABLE ${stats_db_name}.project stored as parquet as CREATE TABLE ${stats_db_name}.project stored as parquet as
SELECT p.id, SELECT p.id,

View File

@ -965,6 +965,8 @@ select allresults.ri_initiative, result_findable.no_result_findable/allresults.n
from allresults from allresults
join result_findable on result_findable.ri_initiative=allresults.ri_initiative; /*EOS*/ join result_findable on result_findable.ri_initiative=allresults.ri_initiative; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_publicly_funded purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_publicly_funded stored as parquet as create table if not exists ${stats_db_name}.indi_pub_publicly_funded stored as parquet as
with org_names_pids as with org_names_pids as
(select org.id,name, pid from ${stats_db_name}.organization org (select org.id,name, pid from ${stats_db_name}.organization org

View File

@ -58,6 +58,8 @@ FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.software_concepts purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_concepts STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.software_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case SELECT substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id

View File

@ -50,6 +50,8 @@ FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false; /*EOS*/ and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_classifications purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type SELECT substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
@ -72,7 +74,7 @@ SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS dataso
FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p
LEFT OUTER JOIN(SELECT substr(d.id, 4) id LEFT OUTER JOIN (SELECT substr(d.id, 4) id
from ${openaire_db_name}.datasource d from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; /*EOS*/ WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; /*EOS*/

View File

@ -51,6 +51,8 @@ WHERE d1.datainfo.deletedbyinference = FALSE and d1.datainfo.invisible=false; /*
-- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table. -- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table.
-- Creating a temporary dual table that will be removed after the following insert -- Creating a temporary dual table that will be removed after the following insert
DROP TABLE IF EXISTS ${stats_db_name}.dual purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dual ( dummy CHAR(1)); /*EOS*/ CREATE TABLE ${stats_db_name}.dual ( dummy CHAR(1)); /*EOS*/
INSERT INTO ${stats_db_name}.dual VALUES ('X'); /*EOS*/ INSERT INTO ${stats_db_name}.dual VALUES ('X'); /*EOS*/