This commit is contained in:
Claudio Atzori 2021-06-16 15:54:18 +02:00
commit 74833d04f1
4 changed files with 32 additions and 23 deletions

View File

@ -41,7 +41,8 @@ SELECT p.id,
CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.daysForlastPub END AS daysforlastpub, CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.daysForlastPub END AS daysforlastpub,
CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.dp END AS delayedpubs, CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.dp END AS delayedpubs,
p.callidentifier, p.callidentifier,
p.code p.code,
p.totalcost
FROM ${stats_db_name}.project_tmp p FROM ${stats_db_name}.project_tmp p
LEFT JOIN (SELECT pr.id, count(distinct pr.result) AS np LEFT JOIN (SELECT pr.id, count(distinct pr.result) AS np
FROM ${stats_db_name}.project_results pr FROM ${stats_db_name}.project_results pr

View File

@ -30,10 +30,21 @@ from rcount
group by rcount.pid; group by rcount.pid;
create view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture; create view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture;
--
-- ANALYZE TABLE ${stats_db_name}.result_projectcount COMPUTE STATISTICS; create table ${stats_db_name}.result_instance stored as parquet as
-- ANALYZE TABLE ${stats_db_name}.result_projectcount COMPUTE STATISTICS FOR COLUMNS; select distinct r.*
-- ANALYZE TABLE ${stats_db_name}.result_fundercount COMPUTE STATISTICS; from (
-- ANALYZE TABLE ${stats_db_name}.result_fundercount COMPUTE STATISTICS FOR COLUMNS; select substr(r.id, 4) as id, inst.accessright.classname as accessright, substr(inst.collectedfrom.key, 4) as collectedfrom,
-- ANALYZE TABLE ${stats_db_name}.project_resultcount COMPUTE STATISTICS; substr(inst.hostedby.key, 4) as hostedby, inst.dateofacceptance.value as dateofacceptance, inst.license.value as license, p.qualifier.classname as pidtype, p.value as pid
-- ANALYZE TABLE ${stats_db_name}.project_resultcount COMPUTE STATISTICS FOR COLUMNS; from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view explode(inst.pid) pids as p) r
join ${stats_db_name}.result res on res.id=r.id;
create table ${stats_db_name}.result_apc as
select r.id, r.amount, r.currency
from (
select substr(r.id, 4) as id, inst.processingchargeamount.value as amount, inst.processingchargecurrency.value as currency
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r
join ${stats_db_name}.result res on res.id=r.id
where r.amount is not null;
create view ${stats_db_name}.issn_gold_oa_dataset as select * from stats_ext.issn_gold_oa_dataset;

View File

@ -16,7 +16,13 @@ create table TARGET.result as
select distinct * from ( select distinct * from (
select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id) select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id)
union all union all
select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) ) foo; select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id)
union all
select * from SOURCE.result r where exists (select 1 from SOURCE.result_project rp join SOURCE.project p on p.id=rp.project join SOURCE.project_organizations po on po.id=p.id join SOURCE.organization o on o.id=po.organization where ro.id=r.id and o.name in (
'GEORG-AUGUST-UNIVERSITAT GOTTINGEN STIFTUNG OFFENTLICHEN RECHTS',
'ATHINA-EREVNITIKO KENTRO KAINOTOMIAS STIS TECHNOLOGIES TIS PLIROFORIAS, TON EPIKOINONION KAI TIS GNOSIS',
'Consiglio Nazionale delle Ricerche',
'Universidade do Minho') )) foo;
compute stats TARGET.result; compute stats TARGET.result;
create table TARGET.result_citations as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.result_citations as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id);

View File

@ -39,7 +39,8 @@ CREATE TABLE ${stats_db_name}.project_tmp
daysforlastpub INT, daysforlastpub INT,
delayedpubs INT, delayedpubs INT,
callidentifier STRING, callidentifier STRING,
code STRING code STRING,
totalcost FLOAT
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true'); ) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
INSERT INTO ${stats_db_name}.project_tmp INSERT INTO ${stats_db_name}.project_tmp
@ -62,7 +63,8 @@ SELECT substr(p.id, 4) AS id,
0 AS daysforlastpub, 0 AS daysforlastpub,
0 AS delayedpubs, 0 AS delayedpubs,
p.callidentifier.value AS callidentifier, p.callidentifier.value AS callidentifier,
p.code.value AS code p.code.value AS code,
p.totalcost AS totalcost
FROM ${openaire_db_name}.project p FROM ${openaire_db_name}.project p
WHERE p.datainfo.deletedbyinference = false; WHERE p.datainfo.deletedbyinference = false;
@ -70,15 +72,4 @@ create table ${stats_db_name}.funder as
select distinct xpath_string(fund, '//funder/id') as id, select distinct xpath_string(fund, '//funder/id') as id,
xpath_string(fund, '//funder/name') as name, xpath_string(fund, '//funder/name') as name,
xpath_string(fund, '//funder/shortname') as shortname xpath_string(fund, '//funder/shortname') as shortname
from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund; from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund;
-- ANALYZE TABLE ${stats_db_name}.project_oids COMPUTE STATISTICS;
-- ANALYZE TABLE ${stats_db_name}.project_oids COMPUTE STATISTICS FOR COLUMNS;
-- ANALYZE TABLE ${stats_db_name}.project_organizations COMPUTE STATISTICS;
-- ANALYZE TABLE ${stats_db_name}.project_organizations COMPUTE STATISTICS FOR COLUMNS;
-- ANALYZE TABLE ${stats_db_name}.project_results COMPUTE STATISTICS;
-- ANALYZE TABLE ${stats_db_name}.project_results COMPUTE STATISTICS FOR COLUMNS;
-- ANALYZE TABLE ${stats_db_name}.project_tmp COMPUTE STATISTICS;
-- ANALYZE TABLE ${stats_db_name}.project_tmp COMPUTE STATISTICS FOR COLUMNS;
-- ANALYZE TABLE ${stats_db_name}.funder COMPUTE STATISTICS;
-- ANALYZE TABLE ${stats_db_name}.funder COMPUTE STATISTICS FOR COLUMNS;