From f3c179658a8d978796e31d6095b947e44665b2ef Mon Sep 17 00:00:00 2001 From: antleb Date: Mon, 30 Sep 2024 17:12:21 +0300 Subject: [PATCH] datasource table creation split in steps --- .../oozie_app/scripts/buildIrishMonitorDB.sql | 4 ++-- .../graph/stats/oozie_app/scripts/step8.sql | 24 ++++++++++++------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/scripts/buildIrishMonitorDB.sql b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/scripts/buildIrishMonitorDB.sql index 3f0922020..acad90f63 100644 --- a/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/scripts/buildIrishMonitorDB.sql +++ b/dhp-workflows/dhp-stats-monitor-irish/src/main/resources/eu/dnetlib/dhp/oa/graph/stats-monitor-irish/oozie_app/scripts/buildIrishMonitorDB.sql @@ -32,7 +32,7 @@ select distinct * from ( from SOURCE.result r join SOURCE.result_projects rp on rp.id=r.id join SOURCE.project p on p.id=rp.project - join openaire_prod_stats_monitor_ie_20231226b.irish_funders irf on irf.funder=p.funder + join TARGET.irish_funders irf on irf.funder=p.funder union all select r.* from SOURCE.result r @@ -238,4 +238,4 @@ create table TARGET.indi_pub_publicly_funded stored as parquet as select * from create table TARGET.indi_result_oa_with_license stored as parquet as select * from SOURCE.indi_result_oa_with_license orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.indi_result_oa_without_license stored as parquet as select * from SOURCE.indi_result_oa_without_license orig where exists (select 1 from TARGET.result r where r.id=orig.id); -create table TARGET.indi_result_under_transformative stored as parquet as select * from SOURCE.indi_result_under_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id); \ No newline at end of file +create table TARGET.indi_result_under_transformative stored as parquet as select * from SOURCE.indi_result_under_transformative orig where exists (select 1 from TARGET.result r where r.id=orig.id); diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql index 98225af14..7504eba18 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -8,14 +8,20 @@ set mapred.job.queue.name=analytics; /*EOS*/ ------------------------------------------------------------ ------------------------------------------------------------ DROP TABLE IF EXISTS ${stats_db_name}.datasource purge; /*EOS*/ +DROP TABLE IF EXISTS ${stats_db_name}.harested_datasources purge; /*EOS*/ +DROP TABLE IF EXISTS ${stats_db_name}.piwik_datasource purge; /*EOS*/ + +create table ${stats_db_name}.harested_datasources stored as parquet as +select distinct inst.hostedby.key as d_id +from ${openaire_db_name}.result lateral view outer explode (instance) insts as inst; /*EOS*/ + +create table ${stats_db_name}.piwik_datasource stored as parquet as +select id, split(originalidd, '\\:')[1] as piwik_id +from ${openaire_db_name}.datasource + lateral view explode(originalid) temp as originalidd +where originalidd like "piwik:%"; /*EOS*/ CREATE TABLE ${stats_db_name}.datasource stored as parquet as -with piwik_datasource as ( - select id, split(originalidd, '\\:')[1] as piwik_id - from ${openaire_db_name}.datasource - lateral view explode(originalid) temp as originalidd - where originalidd like "piwik:%" -) select /*+ COALESCE(100) */ substr(dtrce.id, 4) as id, case when dtrce.officialname.value='Unknown Repository' then 'Other' else dtrce.officialname.value end as name, @@ -31,10 +37,12 @@ select /*+ COALESCE(100) */ dtrce.journal.issnprinted as issn_printed, dtrce.journal.issnonline as issn_online from ${openaire_db_name}.datasource dtrce - left outer join (select inst.hostedby.key as d_id from ${openaire_db_name}.result lateral view outer explode (instance) insts as inst) res on res.d_id=dtrce.id - left outer join piwik_datasource piwik_d on piwik_d.id=dtrce.id +left outer join ${stats_db_name}.harested_datasources res on res.d_id=dtrce.id +left outer join ${stats_db_name}.piwik_datasource piwik_d on piwik_d.id=dtrce.id where dtrce.datainfo.deletedbyinference = false and dtrce.datainfo.invisible = false; /*EOS*/ +drop table ${stats_db_name}.harested_datasources; /*EOS*/ +drop table ${stats_db_name}.piwik_datasource; /*EOS*/ DROP TABLE IF EXISTS ${stats_db_name}.datasource_languages purge; /*EOS*/