convert_hive_to_spark_actions #1

Merged
antonis.lempesis merged 20 commits from convert_hive_to_spark_actions into beta 2024-09-23 13:53:29 +02:00
5 changed files with 11 additions and 2 deletions
Showing only changes of commit 0b897f2f66 - Show all commits

View File

@ -20,7 +20,8 @@ WHERE project_tmp.id IN (SELECT pr.id
${stats_db_name}.result r
WHERE pr.result = r.id
AND r.type = 'publication'); /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.stored purge; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.project purge; /*EOS*/
CREATE TABLE ${stats_db_name}.project stored as parquet as
SELECT p.id,

View File

@ -965,6 +965,8 @@ select allresults.ri_initiative, result_findable.no_result_findable/allresults.n
from allresults
join result_findable on result_findable.ri_initiative=allresults.ri_initiative; /*EOS*/
drop table if exists ${stats_db_name}.indi_pub_publicly_funded purge; /*EOS*/
create table if not exists ${stats_db_name}.indi_pub_publicly_funded stored as parquet as
with org_names_pids as
(select org.id,name, pid from ${stats_db_name}.organization org

View File

@ -58,6 +58,8 @@ FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.software_concepts purge; /*EOS*/
CREATE TABLE ${stats_db_name}.software_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id

View File

@ -50,6 +50,8 @@ FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false; /*EOS*/
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_classifications purge; /*EOS*/
CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
@ -72,7 +74,7 @@ SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS dataso
FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false) p
LEFT OUTER JOIN(SELECT substr(d.id, 4) id
LEFT OUTER JOIN (SELECT substr(d.id, 4) id
from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; /*EOS*/

View File

@ -51,6 +51,8 @@ WHERE d1.datainfo.deletedbyinference = FALSE and d1.datainfo.invisible=false; /*
-- Updating temporary table with everything that is not based on results -> This is done with the following "dual" table.
-- Creating a temporary dual table that will be removed after the following insert
DROP TABLE IF EXISTS ${stats_db_name}.dual purge; /*EOS*/
CREATE TABLE ${stats_db_name}.dual ( dummy CHAR(1)); /*EOS*/
INSERT INTO ${stats_db_name}.dual VALUES ('X'); /*EOS*/