From d413b24611765245315203f862566aa24e07d973 Mon Sep 17 00:00:00 2001 From: antleb Date: Thu, 10 Jun 2021 02:35:46 +0300 Subject: [PATCH] added instances, orgs for monitor, totalcost for projects, apcs --- .../graph/stats/oozie_app/scripts/step11.sql | 3 ++- .../stats/oozie_app/scripts/step16_6.sql | 25 +++++++++++++------ .../scripts/step20-createMonitorDB.sql | 8 +++++- .../graph/stats/oozie_app/scripts/step6.sql | 19 ++++---------- 4 files changed, 32 insertions(+), 23 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql index d26169fd6..b977302df 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql @@ -41,7 +41,8 @@ SELECT p.id, CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.daysForlastPub END AS daysforlastpub, CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.dp END AS delayedpubs, p.callidentifier, - p.code + p.code, + p.totalcost FROM ${stats_db_name}.project_tmp p LEFT JOIN (SELECT pr.id, count(distinct pr.result) AS np FROM ${stats_db_name}.project_results pr diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql index 528aaff52..5280cf3e3 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql @@ -30,10 +30,21 @@ from rcount group by rcount.pid; create view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture; --- --- ANALYZE TABLE ${stats_db_name}.result_projectcount COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.result_projectcount COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.result_fundercount COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.result_fundercount COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.project_resultcount COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.project_resultcount COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file + +create table ${stats_db_name}.result_instance as +select distinct r.* +from ( + select substr(r.id, 4) as id, inst.accessright.classname as accessright, substr(inst.collectedfrom.key, 4) as collectedfrom, + substr(inst.hostedby.key, 4) as hostedby, inst.dateofacceptance.value as dateofacceptance, inst.license.value as license, p.qualifier.classname as pidtype, p.value as pid + from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view explode(inst.pid) pids as p) r +join ${stats_db_name}.result res on res.id=r.id; + +create table ${stats_db_name}.result_apc as +select r.id, r.amount, r.currency +from ( + select substr(r.id, 4) as id, inst.processingchargeamount.value as amount, inst.processingchargecurrency.value as currency + from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r +join ${stats_db_name}.result res on res.id=r.id +where r.amount is not null; + +create view ${stats_db_name}.issn_gold_oa_dataset as select * from stats_ext.issn_gold_oa_dataset; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index af5e2a6a4..74aa8536c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -16,7 +16,13 @@ create table TARGET.result as select distinct * from ( select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id) union all - select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) ) foo; + select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id) + union all + select * from SOURCE.result r where exists (select 1 from SOURCE.result_project rp join SOURCE.project p on p.id=rp.project join SOURCE.project_organizations po on po.id=p.id join SOURCE.organization o on o.id=po.organization where ro.id=r.id and o.name in ( + 'GEORG-AUGUST-UNIVERSITAT GOTTINGEN STIFTUNG OFFENTLICHEN RECHTS', + 'ATHINA-EREVNITIKO KENTRO KAINOTOMIAS STIS TECHNOLOGIES TIS PLIROFORIAS, TON EPIKOINONION KAI TIS GNOSIS', + 'Consiglio Nazionale delle Ricerche', + 'Universidade do Minho') )) foo; compute stats TARGET.result; create table TARGET.result_citations as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql index 23ef03bc9..5d81e97bb 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -39,7 +39,8 @@ CREATE TABLE ${stats_db_name}.project_tmp daysforlastpub INT, delayedpubs INT, callidentifier STRING, - code STRING + code STRING, + totalcost FLOAT ) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true'); INSERT INTO ${stats_db_name}.project_tmp @@ -62,7 +63,8 @@ SELECT substr(p.id, 4) AS id, 0 AS daysforlastpub, 0 AS delayedpubs, p.callidentifier.value AS callidentifier, - p.code.value AS code + p.code.value AS code, + p.totalcost AS totalcost FROM ${openaire_db_name}.project p WHERE p.datainfo.deletedbyinference = false; @@ -70,15 +72,4 @@ create table ${stats_db_name}.funder as select distinct xpath_string(fund, '//funder/id') as id, xpath_string(fund, '//funder/name') as name, xpath_string(fund, '//funder/shortname') as shortname -from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund; - --- ANALYZE TABLE ${stats_db_name}.project_oids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.project_oids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.project_organizations COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.project_organizations COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.project_results COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.project_results COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.project_tmp COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.project_tmp COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.funder COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.funder COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file +from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund; \ No newline at end of file