From 2ed9c5504c465e1b7ce62e5d6e890f899f4c4204 Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Thu, 3 Oct 2024 10:55:56 +0300 Subject: [PATCH] computing stats after every table creation --- .../graph/stats/oozie_app/scripts/step10.sql | 7 ++ .../graph/stats/oozie_app/scripts/step13.sql | 22 +++++- .../graph/stats/oozie_app/scripts/step14.sql | 14 ++++ .../graph/stats/oozie_app/scripts/step15.sql | 12 ++++ .../stats/oozie_app/scripts/step15_5.sql | 68 +++++++++++++------ .../graph/stats/oozie_app/scripts/step2.sql | 16 +++++ .../graph/stats/oozie_app/scripts/step3.sql | 19 +++++- .../graph/stats/oozie_app/scripts/step4.sql | 19 +++++- .../graph/stats/oozie_app/scripts/step5.sql | 19 +++++- .../graph/stats/oozie_app/scripts/step6.sql | 15 +++- .../graph/stats/oozie_app/scripts/step7.sql | 15 ++-- .../graph/stats/oozie_app/scripts/step8.sql | 36 +++++++--- .../graph/stats/oozie_app/scripts/step9.sql | 2 + 13 files changed, 222 insertions(+), 42 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql index 48d8961ff..e1297f5e2 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql @@ -37,15 +37,22 @@ FROM ${external_stats_db_name}.licenses_normalized; /*EOS*/ create or replace view ${stats_db_name}.usage_stats as select * from openaire_prod_usage_stats.usage_stats; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.usage_stats COMPUTE STATISTICS; /*EOS*/ + create or replace view ${stats_db_name}.downloads_stats as select * from openaire_prod_usage_stats.downloads_stats; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.download_stats COMPUTE STATISTICS; /*EOS*/ + create or replace view ${stats_db_name}.pageviews_stats as select * from openaire_prod_usage_stats.pageviews_stats; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.pageviews COMPUTE STATISTICS; /*EOS*/ + create or replace view ${stats_db_name}.views_stats as select * from openaire_prod_usage_stats.views_stats; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.views_stats COMPUTE STATISTICS; /*EOS*/ ------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------ -- Creation date of the database diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql index a590c190e..b0164b038 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql @@ -20,6 +20,8 @@ LEFT OUTER JOIN from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.publication_sources COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.dataset_sources purge; /*EOS*/ CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources STORED AS PARQUET as @@ -32,7 +34,9 @@ LEFT OUTER JOIN SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/ - + +ANALYSE TABLE ${stats_db_name}.dataset_sources COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.software_sources purge; /*EOS*/ CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources STORED AS PARQUET as @@ -45,7 +49,9 @@ LEFT OUTER JOIN SELECT substr(d.id, 4) id from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/ - + +ANALYSE TABLE ${stats_db_name}.software_sources COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_sources purge; /*EOS*/ CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources STORED AS PARQUET as @@ -59,6 +65,8 @@ LEFT OUTER JOIN from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.otherresearchproduct_sources COMPUTE STATISTICS; /*EOS*/ + CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_sources AS SELECT * FROM ${stats_db_name}.publication_sources UNION ALL @@ -80,6 +88,8 @@ from ( LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.result_orcid COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.result_result purge; /*EOS*/ CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_result stored as parquet as @@ -95,6 +105,8 @@ where reltype='resultResult' and r2.resulttype.classname != 'other' and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.result_result COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.result_citations_oc purge; /*EOS*/ CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_citations_oc stored as parquet as @@ -112,6 +124,8 @@ where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:cr and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE group by substr(target, 4); /*EOS*/ +ANALYSE TABLE ${stats_db_name}.result_citations COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.result_references_oc purge; /*EOS*/ CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_references_oc stored as parquet as @@ -127,4 +141,6 @@ where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:cr and r1.resulttype.classname != 'other' and r2.resulttype.classname != 'other' and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE -group by substr(source, 4); /*EOS*/ \ No newline at end of file +group by substr(source, 4); /*EOS*/ + +ANALYSE TABLE ${stats_db_name}.result_references_oc COMPUTE STATISTICS; /*EOS*/ \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql index 9e71b88f5..4cabc983b 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql @@ -14,6 +14,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.dataset_licenses purge; /*EOS*/ CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses STORED AS PARQUET AS @@ -21,6 +23,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.dataset_licenses COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.software_licenses purge; /*EOS*/ CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses STORED AS PARQUET AS @@ -28,6 +32,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.software_licenses COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_licenses purge; /*EOS*/ CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses STORED AS PARQUET AS @@ -35,6 +41,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, licenses.value as type from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.otherresearproduct_licenses COMPUTE STATISTICS; /*EOS*/ + CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS SELECT * FROM ${stats_db_name}.publication_licenses UNION ALL @@ -50,6 +58,8 @@ CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_pids STORED AS PARQUET select /*+ COALESCE(100) */ substr(o.id, 4) as id, ppid.qualifier.classname as type, ppid.value as pid from ${openaire_db_name}.organization o lateral view explode(o.pid) pids as ppid; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.organization_pids COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.organization_sources purge; /*EOS*/ CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_sources STORED AS PARQUET as @@ -62,9 +72,13 @@ FROM ( from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.organization_sources COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.result_accessroute purge; /*EOS*/ CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_accessroute STORED AS PARQUET as select /*+ COALESCE(100) */ distinct substr(id,4) as id, accessroute from ${openaire_db_name}.result lateral view explode (instance.accessright.openaccessroute) openaccessroute as accessroute WHERE datainfo.deletedbyinference=false and datainfo.invisible = FALSE; /*EOS*/ + +ANALYSE TABLE ${stats_db_name}.result_accessroute COMPUTE STATISTICS; /*EOS*/ \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql index 08609afff..b0a86a993 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql @@ -26,6 +26,8 @@ from ( left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id where peer_reviewed.id is null) pr; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.publication_refereed COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge; /*EOS*/ CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as with peer_reviewed as ( @@ -44,6 +46,8 @@ from ( left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id where peer_reviewed.id is null) pr; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.dataset_refereed COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge; /*EOS*/ CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as with peer_reviewed as ( @@ -62,6 +66,8 @@ from ( left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id where peer_reviewed.id is null) pr; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.software_refereed COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge; /*EOS*/ CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as with peer_reviewed as ( @@ -80,6 +86,8 @@ from ( left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id where peer_reviewed.id is null) pr; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.otherresearchproduct_refereed COMPUTE STATISTICS; /*EOS*/ + CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as select * from ${stats_db_name}.publication_refereed union all @@ -97,6 +105,8 @@ cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.va from ${openaire_db_name}.result lateral view explode(measures) measures as measures_ids where measures_ids.id!='views' and measures_ids.id!='downloads'; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.indi_impact_measures COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.result_apc_affiliations purge; /*EOS*/ create table if not exists ${stats_db_name}.result_apc_affiliations STORED AS PARQUET as @@ -107,3 +117,5 @@ from ${openaire_db_name}.relation rel join ${openaire_db_name}.organization o on o.id=rel.source join ${openaire_db_name}.result r on r.id=rel.target where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0; /*EOS*/ + +ANALYSE TABLE ${stats_db_name}.result_apc_affiliations COMPUTE STATISTICS; /*EOS*/ \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql index d61b4d2ef..aaa6eb528 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql @@ -6,36 +6,56 @@ set mapred.job.queue.name=analytics; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.result_projectcount purge; /*EOS*/ create table if not exists ${stats_db_name}.result_projectcount STORED AS PARQUET as -select /*+ COALESCE(100) */ r.id, count(distinct p.id) as count +select /*+ COALESCE(100) */ r.id, count(distinct rp.project) as count from ${stats_db_name}.result r left outer join ${stats_db_name}.result_projects rp on rp.id=r.id -left outer join ${stats_db_name}.project p on p.id=rp.project group by r.id; /*EOS*/ -DROP TABLE IF EXISTS ${stats_db_name}.result_fundercount purge; /*EOS*/ - -create table if not exists ${stats_db_name}.result_fundercount STORED AS PARQUET as -select /*+ COALESCE(100) */ r.id, count(distinct p.funder) as count -from ${stats_db_name}.result r -left outer join ${stats_db_name}.result_projects rp on rp.id=r.id -left outer join ${stats_db_name}.project p on p.id=rp.project -group by r.id; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.result_projectcount COMPUTE STATISTICS; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.project_resultcount purge; /*EOS*/ +create table if not exists ${stats_db_name}.project_res stored as parquet as +select distinct r.id as res, r.type, p.id as pid +from ${stats_db_name}.project p +left outer join ${stats_db_name}.result_projects rp on rp.project=p.id +left outer join ${stats_db_name}.result r on r.id=rp.id; /*EOS*/ + +ANALYSE TABLE ${stats_db_name}.project_res COMPUTE STATISTICS; /*EOS*/ + + create table if not exists ${stats_db_name}.project_resultcount STORED AS PARQUET as -with rcount as ( - select p.id as pid, count(distinct r.id) as `count`, r.type as type - from ${stats_db_name}.project p - left outer join ${stats_db_name}.result_projects rp on rp.project=p.id - left outer join ${stats_db_name}.result r on r.id=rp.id - group by r.type, p.id ) -select /*+ COALESCE(100) */ rcount.pid, sum(case when rcount.type='publication' then rcount.count else 0 end) as publications, - sum(case when rcount.type='dataset' then rcount.count else 0 end) as datasets, - sum(case when rcount.type='software' then rcount.count else 0 end) as software, - sum(case when rcount.type='other' then rcount.count else 0 end) as other -from rcount -group by rcount.pid; /*EOS*/ +select pid, + sum(case when rp.type='publication' then 1 else 0 end) as publications, + sum(case when rp.type='dataset' then 1 else 0 end) as datasets, + sum(case when rp.type='software' then 1 else 0 end) as software, + sum(case when rp.type='other' then 1 else 0 end) as other +from ${stats_db_name}.project_res +group by pid; /*EOS*/ + +ANALYSE TABLE ${stats_db_name}.project_resultcount COMPUTE STATISTICS; /*EOS*/ + +drop table ${stats_db_name}.project_res; /*EOS*/ + +DROP TABLE IF EXISTS ${stats_db_name}.result_fundercount purge; /*EOS*/ +drop table if exists ${stats_db_name}.result_funder purge; /*EOS*/ + +create table if not exists ${stats_db_name}.result_funder stored as parquet as +select distinct rp.id, p.funder +from ${stats_db_name}.result_projects rp +join ${stats_db_name}.project p on p.id=rp.project; /*EOS*/ + +ANALYSE TABLE ${stats_db_name}.result_funder COMPUTE STATISTICS; /*EOS*/ + +create table if not exists ${stats_db_name}.result_fundercount STORED AS PARQUET as +select /*+ COALESCE(100) */ r.id, count(rf.funder) as count +from ${stats_db_name}.result r +left outer join ${stats_db_name}.result_funder rf on rf.id=r.id +group by r.id; /*EOS*/ + +ANALYSE TABLE ${stats_db_name}.result_fundercount COMPUTE STATISTICS; /*EOS*/ + +drop table ${stats_db_name}.result_funder; /*EOS*/ create or replace view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture; /*EOS*/ create or replace view ${stats_db_name}.rndgdpexpenditure as select * from stats_ext.rndgdpexpenditure; /*EOS*/ @@ -55,6 +75,8 @@ from ( from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view outer explode(inst.pid) pids as p) r join ${stats_db_name}.result res on res.id=r.id; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.result_instance COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.result_apc purge; /*EOS*/ create table if not exists ${stats_db_name}.result_apc STORED AS PARQUET as @@ -65,4 +87,6 @@ from ( join ${stats_db_name}.result res on res.id=r.id where r.amount is not null; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.result_apc COMPUTE STATISTICS; /*EOS*/ + create or replace view ${stats_db_name}.issn_gold_oa_dataset as select * from ${external_stats_db_name}.issn_gold_oa_dataset; /*EOS*/ \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql index 0abec2358..e09aa1362 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql @@ -41,6 +41,7 @@ select /*+ COALESCE(100) */ from ${openaire_db_name}.publication pub left outer join pub_delayed on pub.id=pub_delayed.pub_id where pub.datainfo.deletedbyinference = false and pub.datainfo.invisible = false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.publication COMPUTE STATISTICS; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.publication_classifications purge; /*EOS*/ @@ -50,6 +51,7 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, instancetype.classname as typ from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.instancetype) instances as instancetype where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.publication_classifications COMPUTE STATISTICS; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.publication_concepts purge; /*EOS*/ @@ -62,6 +64,8 @@ from ${openaire_db_name}.publication p LATERAL VIEW explode(p.context) contexts as context where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.publication_concepts COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.publication_datasources purge; /*EOS*/ CREATE TABLE ${stats_db_name}.publication_datasources STORED AS PARQUET as @@ -75,6 +79,8 @@ FROM ( from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.publication_datasources COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.publication_languages purge; /*EOS*/ CREATE TABLE ${stats_db_name}.publication_languages STORED AS PARQUET AS @@ -82,6 +88,8 @@ select /*+ COALESCE(100) */ substr(p.id, 4) as id, p.language.classname as langu FROM ${openaire_db_name}.publication p where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.publication_languages COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.publication_oids purge; /*EOS*/ CREATE TABLE ${stats_db_name}.publication_oids STORED AS PARQUET AS @@ -90,6 +98,8 @@ FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.publication_oids COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.publication_pids purge; /*EOS*/ CREATE TABLE ${stats_db_name}.publication_pids STORED AS PARQUET AS @@ -98,6 +108,8 @@ FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.publication_pids COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.publication_topics purge; /*EOS*/ CREATE TABLE ${stats_db_name}.publication_topics STORED AS PARQUET as @@ -106,6 +118,8 @@ FROM ${openaire_db_name}.publication p LATERAL VIEW explode(p.subject) subjects AS subject where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.publication_topics COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.publication_citations purge; /*EOS*/ CREATE TABLE ${stats_db_name}.publication_citations STORED AS PARQUET AS @@ -114,3 +128,5 @@ FROM ${openaire_db_name}.publication p lateral view explode(p.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" and p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ + +ANALYSE TABLE ${stats_db_name}.publication_citations COMPUTE STATISTICS; /*EOS*/ \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql index 0e1e02b12..dbe73d276 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql @@ -42,6 +42,7 @@ from ${openaire_db_name}.dataset datast left outer join datast_delayed on datast.id=datast_delayed.datast_id where datast.datainfo.deletedbyinference = false and datast.datainfo.invisible = false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.dataset COMPUTE STATISTICS; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.dataset_citations purge; /*EOS*/ @@ -52,6 +53,8 @@ FROM ${openaire_db_name}.dataset d WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.dataset_citations COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.dataset_classifications purge; /*EOS*/ CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS @@ -60,6 +63,8 @@ FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.dataset_classifications COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.dataset_concepts purge; /*EOS*/ CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS @@ -71,6 +76,8 @@ from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.context) contexts as context where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.dataset_concepts COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.dataset_datasources purge; /*EOS*/ CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS @@ -85,6 +92,8 @@ FROM ( FROM ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.dataset_datasources COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.dataset_languages purge; /*EOS*/ CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS @@ -92,6 +101,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS langu FROM ${openaire_db_name}.dataset p where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.dataset_languages COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.dataset_oids purge; /*EOS*/ CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS @@ -100,6 +111,8 @@ FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.dataset_oids COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.dataset_pids purge; /*EOS*/ CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS @@ -108,10 +121,14 @@ FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.dataset_pids COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.dataset_topics purge; /*EOS*/ CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ \ No newline at end of file +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ + +ANALYSE TABLE ${stats_db_name}.dataset_topics COMPUTE STATISTICS; /*EOS*/ \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql index 0ccb17fcc..3df21a6ad 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql @@ -42,6 +42,7 @@ from ${openaire_db_name}.software soft left outer join soft_delayed on soft.id=soft_delayed.soft_id where soft.datainfo.deletedbyinference = false and soft.datainfo.invisible = false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.software COMPUTE STATISTICS; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.software_citations purge; /*EOS*/ @@ -52,6 +53,8 @@ FROM ${openaire_db_name}.software s where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.software_citations COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.software_classifications purge; /*EOS*/ CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS @@ -60,6 +63,8 @@ FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.software_classifications COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.software_concepts purge; /*EOS*/ CREATE TABLE ${stats_db_name}.software_concepts STORED AS PARQUET AS @@ -71,6 +76,8 @@ FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.context) contexts AS context where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.software_concepts COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.software_datasources purge; /*EOS*/ CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS @@ -85,6 +92,8 @@ FROM ( FROM ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.software_datasources COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.software_languages purge; /*EOS*/ CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS @@ -92,6 +101,8 @@ select /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS langu FROM ${openaire_db_name}.software p where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.software_languages COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.software_oids purge; /*EOS*/ CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS @@ -100,6 +111,8 @@ FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.software_oids COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.software_pids purge; /*EOS*/ CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS @@ -108,10 +121,14 @@ FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.software_pids COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.software_topics purge; /*EOS*/ CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ \ No newline at end of file +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ + +ANALYSE TABLE ${stats_db_name}.software_topics COMPUTE STATISTICS; /*EOS*/ \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql index cd7834d84..78fecb4d7 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql @@ -42,6 +42,7 @@ from ${openaire_db_name}.otherresearchproduct other left outer join other_delayed on other.id=other_delayed.other_id where other.datainfo.deletedbyinference = false and other.datainfo.invisible = false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.otherresearchproduct COMPUTE STATISTICS; /*EOS*/ -- Otherresearchproduct_citations DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_citations purge; /*EOS*/ @@ -52,6 +53,8 @@ FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.otherresearchproduct_citations COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_classifications purge; /*EOS*/ CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications STORED AS PARQUET AS @@ -59,6 +62,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, instancetype.classname AS typ FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.otherresearchproduct_classifications COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_concepts purge; /*EOS*/ CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS @@ -69,6 +74,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) as id, case FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.otherresearchproduct_concepts COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_datasources purge; /*EOS*/ CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS @@ -80,6 +87,8 @@ FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) A from ${openaire_db_name}.datasource d WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.otherresearchproduct_datasources COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_languages purge; /*EOS*/ CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS @@ -87,6 +96,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, p.language.classname AS langu FROM ${openaire_db_name}.otherresearchproduct p where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.otherresearchproduct_languages COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_oids purge; /*EOS*/ CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS @@ -94,6 +105,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.otherresearchproduct_oids COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_pids purge; /*EOS*/ CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS @@ -101,9 +114,13 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, ppid.qualifier.classname AS t FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.otherresearchproduct_pids COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_topics purge; /*EOS*/ CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ \ No newline at end of file +where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; /*EOS*/ + +ANALYSE TABLE ${stats_db_name}.otherresearchproduct_topics COMPUTE STATISTICS; /*EOS*/ \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql index d261c96e2..382a3ee96 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -12,6 +12,8 @@ SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.project_oids COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.project_organizations purge; /*EOS*/ CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS @@ -20,6 +22,8 @@ from ${openaire_db_name}.relation r WHERE r.reltype = 'projectOrganization' and r.source like '40|%' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.project_organizations COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.project_results purge; /*EOS*/ CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS @@ -28,6 +32,8 @@ FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultProject' and r.target like '40|%' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.project_results COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.project_classification purge; /*EOS*/ create table ${stats_db_name}.project_classification STORED AS PARQUET as @@ -36,6 +42,8 @@ from ${openaire_db_name}.project p lateral view explode(p.h2020classification) classifs as class where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.project_classification COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.project purge; /*EOS*/ CREATE TABLE ${stats_db_name}.project stored as parquet as @@ -99,6 +107,7 @@ left outer join num_pubs_pr on num_pubs_pr.pr_id = p.id left outer join num_pub_delayed npd on npd.pr_id=p.id where p.datainfo.deletedbyinference = false and p.datainfo.invisible = false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.project COMPUTE STATISTICS; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.funder purge; /*EOS*/ @@ -109,6 +118,8 @@ select /*+ COALESCE(100) */ distinct xpath_string(fund, '//funder/id') as xpath_string(fundingtree[0].value, '//funder/jurisdiction') as country from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.funder COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge; /*EOS*/ CREATE TABLE ${stats_db_name}.project_organization_contribution STORED AS PARQUET AS @@ -117,4 +128,6 @@ properties[0].value contribution, properties[1].value currency from ${openaire_db_name}.relation r LATERAL VIEW explode (r.properties) properties where properties[0].key='contribution' and r.reltype = 'projectOrganization' and r.source like '40|%' -and properties[0].value>0.0 and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/ \ No newline at end of file +and properties[0].value>0.0 and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/ + +ANALYSE TABLE ${stats_db_name}.project_organization_contribution COMPUTE STATISTICS; /*EOS*/ \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql index bffd59ef1..7c296cae7 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql @@ -131,6 +131,8 @@ DROP TABLE IF EXISTS ${stats_db_name}.result_fos_base_tmp purge; /*EOS*/ create table ${stats_db_name}.result_fos_base_tmp stored as parquet as select /*+ COALESCE(100) */ id, topic from ${stats_db_name}.result_topics where type='Fields of Science and Technology classification'; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.result_fos_base_tmp COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.result_fos purge; /*EOS*/ create table ${stats_db_name}.result_fos stored as parquet as @@ -145,8 +147,9 @@ from lvl1 join lvl3 on lvl3.id=lvl1.id and substr(lvl3.topic, 1, 4)=substr(lvl2.topic, 1, 4) join lvl4 on lvl4.id=lvl1.id and substr(lvl4.topic, 1, 6)=substr(lvl3.topic, 1, 6); /*EOS*/ -DROP TABLE ${stats_db_name}.result_fos_base_tmp purge; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.result_fos COMPUTE STATISTICS; /*EOS*/ +DROP TABLE ${stats_db_name}.result_fos_base_tmp purge; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.result_organization purge; /*EOS*/ @@ -157,10 +160,12 @@ WHERE r.reltype = 'resultOrganization' and r.target like '50|%' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.result_organization COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.result_projects purge; /*EOS*/ CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS -select /*+ COALESCE(100) */ pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance -FROM ${stats_db_name}.result r - JOIN ${stats_db_name}.project_results pr ON r.id = pr.result - JOIN ${stats_db_name}.project p ON p.id = pr.id; /*EOS*/ +select /*+ COALESCE(100) */ pr.result AS id, pr.id AS project, pr.provenance +FROM ${stats_db_name}.project_results pr; /*EOS*/ + +ANALYSE TABLE ${stats_db_name}.result_projects COMPUTE STATISTICS; /*EOS*/ \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql index 98225af14..90eb28315 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -9,13 +9,21 @@ set mapred.job.queue.name=analytics; /*EOS*/ ------------------------------------------------------------ DROP TABLE IF EXISTS ${stats_db_name}.datasource purge; /*EOS*/ +create table ${stats_db_name}.harested_datasources stored as parquet as +select distinct inst.hostedby.key as d_id +from ${openaire_db_name}.result lateral view outer explode (instance) insts as inst; /*EOS*/ + +ANALYSE TABLE ${stats_db_name}.harested_datasources COMPUTE STATISTICS; /*EOS*/ + +create table ${stats_db_name}.piwik_datasource stored as parquet as +select id, split(originalidd, '\\:')[1] as piwik_id +from ${openaire_db_name}.datasource + lateral view explode(originalid) temp as originalidd +where originalidd like "piwik:%"; /*EOS*/ + +ANALYSE TABLE ${stats_db_name}.piwik_datasource COMPUTE STATISTICS; /*EOS*/ + CREATE TABLE ${stats_db_name}.datasource stored as parquet as -with piwik_datasource as ( - select id, split(originalidd, '\\:')[1] as piwik_id - from ${openaire_db_name}.datasource - lateral view explode(originalid) temp as originalidd - where originalidd like "piwik:%" -) select /*+ COALESCE(100) */ substr(dtrce.id, 4) as id, case when dtrce.officialname.value='Unknown Repository' then 'Other' else dtrce.officialname.value end as name, @@ -31,10 +39,14 @@ select /*+ COALESCE(100) */ dtrce.journal.issnprinted as issn_printed, dtrce.journal.issnonline as issn_online from ${openaire_db_name}.datasource dtrce - left outer join (select inst.hostedby.key as d_id from ${openaire_db_name}.result lateral view outer explode (instance) insts as inst) res on res.d_id=dtrce.id - left outer join piwik_datasource piwik_d on piwik_d.id=dtrce.id +left outer join ${stats_db_name}.harested_datasources res on res.d_id=dtrce.id +left outer join ${stats_db_name}.piwik_datasource piwik_d on piwik_d.id=dtrce.id where dtrce.datainfo.deletedbyinference = false and dtrce.datainfo.invisible = false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.datasource COMPUTE STATISTICS; /*EOS*/ + +drop table ${stats_db_name}.harested_datasources; /*EOS*/ +drop table ${stats_db_name}.piwik_datasource; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.datasource_languages purge; /*EOS*/ @@ -43,6 +55,8 @@ SELECT /*+ COALESCE(100) */ substr(d.id, 4) AS id, langs.languages AS language FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.datasource_languages COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.datasource_oids purge; /*EOS*/ CREATE TABLE ${stats_db_name}.datasource_oids STORED AS PARQUET AS @@ -50,6 +64,8 @@ SELECT /*+ COALESCE(100) */ substr(d.id, 4) AS id, oids.ids AS oid FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.datasource_oids COMPUTE STATISTICS; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.datasource_organizations purge; /*EOS*/ CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS @@ -57,6 +73,8 @@ SELECT /*+ COALESCE(100) */ substr(r.target, 4) AS id, substr(r.source, 4) AS or FROM ${openaire_db_name}.relation r WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.source like '20|%' and r.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.datasource_organizations COMPUTE STATISTICS; /*EOS*/ + -- datasource sources: -- where the datasource info have been collected from. DROP TABLE IF EXISTS ${stats_db_name}.datasource_sources purge; /*EOS*/ @@ -66,6 +84,8 @@ select /*+ COALESCE(100) */ substr(d.id, 4) as id, substr(cf.key, 4) as datasour from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.datasource_sources COMPUTE STATISTICS; /*EOS*/ + CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS SELECT datasource AS id, id AS result FROM ${stats_db_name}.result_datasources; /*EOS*/ diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql index f504a5c12..6ec127427 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step9.sql @@ -15,6 +15,8 @@ SELECT /*+ COALESCE(100) */ substr(o.id, 4) as id, FROM ${openaire_db_name}.organization o WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible = FALSE; /*EOS*/ +ANALYSE TABLE ${stats_db_name}.organization COMPUTE STATISTICS; /*EOS*/ + CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS SELECT organization AS id, id AS datasource FROM ${stats_db_name}.datasource_organizations; /*EOS*/