compute stats for the used and new impala tables

This commit is contained in:
Antonis Lempesis 2020-07-03 01:14:34 +03:00 committed by Spyros Zoupanos
parent e6f50de6ef
commit dd3d6a6e15
1 changed files with 23 additions and 1 deletions

View File

@ -1,7 +1,19 @@
---------------------------------------------------- ----------------------------------------------------
-- Shortcuts for various definitions in stats db --- -- Shortcuts for various definitions in stats db ---
-- since these statements are executed using Impala,
-- we'll have to compute the stats for the tables we use
---------------------------------------------------- ----------------------------------------------------
COMPUTE STATS result;
COMPUTE STATS result_sources;
COMPUTE STATS datasource;
COMPUTE STATS result_datasources;
COMPUTE STATS datasource_sources;
COMPUTE STATS country;
COMPUTE STATS result_organization;
COMPUTE STATS organization;
COMPUTE STATS datasource_organizations;
-- Peer reviewed: -- Peer reviewed:
-- Results that have been collected from Crossref -- Results that have been collected from Crossref
create table ${stats_db_name}.result_peerreviewed as create table ${stats_db_name}.result_peerreviewed as
@ -18,6 +30,8 @@ select distinct r.id as id, false as peer_reviewed
from ${stats_db_name}.result r from ${stats_db_name}.result r
where r.id not in (select id from peer_reviewed); where r.id not in (select id from peer_reviewed);
COMPUTE STATS result_peerreviewed;
-- Green OA: -- Green OA:
-- OA results that are hosted by an Institutional repository and have NOT been harvested from a DOAJ journal. -- OA results that are hosted by an Institutional repository and have NOT been harvested from a DOAJ journal.
create table ${stats_db_name}.result_greenoa as create table ${stats_db_name}.result_greenoa as
@ -39,6 +53,8 @@ select distinct r.id as id, false as green
from ${stats_db_name}.result r from ${stats_db_name}.result r
where r.id not in (select id from result_green); where r.id not in (select id from result_green);
COMPUTE STATS result_greenoa;
-- GOLD OA: -- GOLD OA:
-- OA results that have been harvested from a DOAJ journal. -- OA results that have been harvested from a DOAJ journal.
create table ${stats_db_name}.result_gold as create table ${stats_db_name}.result_gold as
@ -57,6 +73,8 @@ select distinct r.id, false as gold
from ${stats_db_name}.result r from ${stats_db_name}.result r
where r.id not in (select id from result_gold); where r.id not in (select id from result_gold);
COMPUTE STATS result_gold;
-- shortcut result-country through the organization affiliation -- shortcut result-country through the organization affiliation
create table ${stats_db_name}.result_affiliated_country as create table ${stats_db_name}.result_affiliated_country as
select r.id as id, o.country as country select r.id as id, o.country as country
@ -65,6 +83,8 @@ join ${stats_db_name}.result_organization ro on ro.id=r.id
join ${stats_db_name}.organization o on o.id=ro.organization join ${stats_db_name}.organization o on o.id=ro.organization
where o.country is not null and o.country!=''; where o.country is not null and o.country!='';
COMPUTE STATS result_affiliated_country;
-- shortcut result-country through datasource of deposition -- shortcut result-country through datasource of deposition
create table ${stats_db_name}.result_deposited_country as create table ${stats_db_name}.result_deposited_country as
select r.id as id, o.country as country select r.id as id, o.country as country
@ -74,3 +94,5 @@ join ${stats_db_name}.datasource d on d.id=rd.datasource
join ${stats_db_name}.datasource_organizations dor on dor.id=d.id join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
join ${stats_db_name}.organization o on o.id=dor.organization join ${stats_db_name}.organization o on o.id=dor.organization
where o.country is not null and o.country!=''; where o.country is not null and o.country!='';
COMPUTE STATS result_deposited_country;