From dd3d6a6e154be3d270a290f09b3526a544d5189e Mon Sep 17 00:00:00 2001 From: antleb Date: Fri, 3 Jul 2020 01:14:34 +0300 Subject: [PATCH] compute stats for the used and new impala tables --- .../graph/stats/oozie_app/scripts/step16.sql | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql index bc10831cc..3d21a9d68 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql @@ -1,7 +1,19 @@ ---------------------------------------------------- -- Shortcuts for various definitions in stats db --- +-- since these statements are executed using Impala, +-- we'll have to compute the stats for the tables we use ---------------------------------------------------- +COMPUTE STATS result; +COMPUTE STATS result_sources; +COMPUTE STATS datasource; +COMPUTE STATS result_datasources; +COMPUTE STATS datasource_sources; +COMPUTE STATS country; +COMPUTE STATS result_organization; +COMPUTE STATS organization; +COMPUTE STATS datasource_organizations; + -- Peer reviewed: -- Results that have been collected from Crossref create table ${stats_db_name}.result_peerreviewed as @@ -18,6 +30,8 @@ select distinct r.id as id, false as peer_reviewed from ${stats_db_name}.result r where r.id not in (select id from peer_reviewed); +COMPUTE STATS result_peerreviewed; + -- Green OA: -- OA results that are hosted by an Institutional repository and have NOT been harvested from a DOAJ journal. create table ${stats_db_name}.result_greenoa as @@ -39,6 +53,8 @@ select distinct r.id as id, false as green from ${stats_db_name}.result r where r.id not in (select id from result_green); +COMPUTE STATS result_greenoa; + -- GOLD OA: -- OA results that have been harvested from a DOAJ journal. create table ${stats_db_name}.result_gold as @@ -57,6 +73,8 @@ select distinct r.id, false as gold from ${stats_db_name}.result r where r.id not in (select id from result_gold); +COMPUTE STATS result_gold; + -- shortcut result-country through the organization affiliation create table ${stats_db_name}.result_affiliated_country as select r.id as id, o.country as country @@ -65,6 +83,8 @@ join ${stats_db_name}.result_organization ro on ro.id=r.id join ${stats_db_name}.organization o on o.id=ro.organization where o.country is not null and o.country!=''; +COMPUTE STATS result_affiliated_country; + -- shortcut result-country through datasource of deposition create table ${stats_db_name}.result_deposited_country as select r.id as id, o.country as country @@ -73,4 +93,6 @@ join ${stats_db_name}.result_datasources rd on rd.id=r.id join ${stats_db_name}.datasource d on d.id=rd.datasource join ${stats_db_name}.datasource_organizations dor on dor.id=d.id join ${stats_db_name}.organization o on o.id=dor.organization -where o.country is not null and o.country!=''; \ No newline at end of file +where o.country is not null and o.country!=''; + +COMPUTE STATS result_deposited_country; \ No newline at end of file