2020-06-15 18:57:40 +02:00
|
|
|
------------------------------------------------------
|
|
|
|
------------------------------------------------------
|
|
|
|
-- Project table/view and Project related tables/views
|
|
|
|
------------------------------------------------------
|
|
|
|
------------------------------------------------------
|
2022-03-22 15:16:08 +01:00
|
|
|
CREATE TABLE ${stats_db_name}.project_oids STORED AS PARQUET AS
|
2021-02-14 02:14:24 +01:00
|
|
|
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
2022-02-03 11:37:10 +01:00
|
|
|
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids
|
|
|
|
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false;
|
|
|
|
|
2022-03-22 15:16:08 +01:00
|
|
|
CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS
|
2021-02-14 02:14:24 +01:00
|
|
|
SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization
|
|
|
|
from ${openaire_db_name}.relation r
|
2022-07-29 15:34:50 +02:00
|
|
|
WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
|
2022-02-03 11:37:10 +01:00
|
|
|
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
2020-06-15 18:57:40 +02:00
|
|
|
|
2022-03-22 15:16:08 +01:00
|
|
|
CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
|
2021-07-28 11:28:04 +02:00
|
|
|
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
|
2021-02-14 02:14:24 +01:00
|
|
|
FROM ${openaire_db_name}.relation r
|
2022-07-29 15:34:50 +02:00
|
|
|
WHERE r.reltype = 'resultProject' and r.target like '40|%'
|
2022-02-03 11:37:10 +01:00
|
|
|
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
2020-06-15 18:57:40 +02:00
|
|
|
|
2022-03-22 15:16:08 +01:00
|
|
|
create table ${stats_db_name}.project_classification STORED AS PARQUET as
|
2021-07-28 15:31:29 +02:00
|
|
|
select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
|
2021-07-28 17:50:31 +02:00
|
|
|
from ${openaire_db_name}.project p
|
2021-07-28 15:31:29 +02:00
|
|
|
lateral view explode(p.h2020classification) classifs as class
|
2022-02-03 11:37:10 +01:00
|
|
|
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null;
|
2021-07-28 15:31:29 +02:00
|
|
|
|
2021-02-14 02:14:24 +01:00
|
|
|
CREATE TABLE ${stats_db_name}.project_tmp
|
|
|
|
(
|
|
|
|
id STRING,
|
|
|
|
acronym STRING,
|
|
|
|
title STRING,
|
|
|
|
funder STRING,
|
|
|
|
funding_lvl0 STRING,
|
|
|
|
funding_lvl1 STRING,
|
|
|
|
funding_lvl2 STRING,
|
|
|
|
ec39 STRING,
|
|
|
|
type STRING,
|
|
|
|
startdate STRING,
|
|
|
|
enddate STRING,
|
|
|
|
start_year INT,
|
|
|
|
end_year INT,
|
|
|
|
duration INT,
|
|
|
|
haspubs STRING,
|
|
|
|
numpubs INT,
|
|
|
|
daysforlastpub INT,
|
|
|
|
delayedpubs INT,
|
|
|
|
callidentifier STRING,
|
2021-06-10 01:35:46 +02:00
|
|
|
code STRING,
|
|
|
|
totalcost FLOAT
|
2021-02-14 02:14:24 +01:00
|
|
|
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
|
2020-06-15 18:57:40 +02:00
|
|
|
|
2021-02-14 02:14:24 +01:00
|
|
|
INSERT INTO ${stats_db_name}.project_tmp
|
|
|
|
SELECT substr(p.id, 4) AS id,
|
|
|
|
p.acronym.value AS acronym,
|
|
|
|
p.title.value AS title,
|
|
|
|
xpath_string(p.fundingtree[0].value, '//funder/name') AS funder,
|
|
|
|
xpath_string(p.fundingtree[0].value, '//funding_level_0/name') AS funding_lvl0,
|
|
|
|
xpath_string(p.fundingtree[0].value, '//funding_level_1/name') AS funding_lvl1,
|
|
|
|
xpath_string(p.fundingtree[0].value, '//funding_level_2/name') AS funding_lvl2,
|
|
|
|
p.ecsc39.value AS ec39,
|
|
|
|
p.contracttype.classname AS type,
|
|
|
|
p.startdate.value AS startdate,
|
|
|
|
p.enddate.value AS enddate,
|
|
|
|
year(p.startdate.value) AS start_year,
|
|
|
|
year(p.enddate.value) AS end_year,
|
|
|
|
CAST(MONTHS_BETWEEN(p.enddate.value, p.startdate.value) AS INT) AS duration,
|
|
|
|
'no' AS haspubs,
|
|
|
|
0 AS numpubs,
|
|
|
|
0 AS daysforlastpub,
|
|
|
|
0 AS delayedpubs,
|
|
|
|
p.callidentifier.value AS callidentifier,
|
2021-06-10 01:35:46 +02:00
|
|
|
p.code.value AS code,
|
|
|
|
p.totalcost AS totalcost
|
2021-02-14 02:14:24 +01:00
|
|
|
FROM ${openaire_db_name}.project p
|
2022-02-03 11:37:10 +01:00
|
|
|
WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
2020-07-06 14:16:58 +02:00
|
|
|
|
2022-03-22 15:16:08 +01:00
|
|
|
create table ${stats_db_name}.funder STORED AS PARQUET as
|
2021-02-14 02:14:24 +01:00
|
|
|
select distinct xpath_string(fund, '//funder/id') as id,
|
|
|
|
xpath_string(fund, '//funder/name') as name,
|
|
|
|
xpath_string(fund, '//funder/shortname') as shortname
|
2021-06-10 01:35:46 +02:00
|
|
|
from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund;
|