Merge pull request 'datasource table creation split in steps' (#489) from antonis.lempesis/dnet-hadoop:beta into beta
Reviewed-on: #489
This commit is contained in:
commit
5734b80861
|
@ -32,7 +32,7 @@ select distinct * from (
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
join SOURCE.result_projects rp on rp.id=r.id
|
join SOURCE.result_projects rp on rp.id=r.id
|
||||||
join SOURCE.project p on p.id=rp.project
|
join SOURCE.project p on p.id=rp.project
|
||||||
join openaire_prod_stats_monitor_ie_20231226b.irish_funders irf on irf.funder=p.funder
|
join TARGET.irish_funders irf on irf.funder=p.funder
|
||||||
union all
|
union all
|
||||||
select r.*
|
select r.*
|
||||||
from SOURCE.result r
|
from SOURCE.result r
|
||||||
|
|
|
@ -8,14 +8,20 @@ set mapred.job.queue.name=analytics; /*EOS*/
|
||||||
------------------------------------------------------------
|
------------------------------------------------------------
|
||||||
------------------------------------------------------------
|
------------------------------------------------------------
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.datasource purge; /*EOS*/
|
DROP TABLE IF EXISTS ${stats_db_name}.datasource purge; /*EOS*/
|
||||||
|
DROP TABLE IF EXISTS ${stats_db_name}.harested_datasources purge; /*EOS*/
|
||||||
|
DROP TABLE IF EXISTS ${stats_db_name}.piwik_datasource purge; /*EOS*/
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.datasource stored as parquet as
|
create table ${stats_db_name}.harested_datasources stored as parquet as
|
||||||
with piwik_datasource as (
|
select distinct inst.hostedby.key as d_id
|
||||||
|
from ${openaire_db_name}.result lateral view outer explode (instance) insts as inst; /*EOS*/
|
||||||
|
|
||||||
|
create table ${stats_db_name}.piwik_datasource stored as parquet as
|
||||||
select id, split(originalidd, '\\:')[1] as piwik_id
|
select id, split(originalidd, '\\:')[1] as piwik_id
|
||||||
from ${openaire_db_name}.datasource
|
from ${openaire_db_name}.datasource
|
||||||
lateral view explode(originalid) temp as originalidd
|
lateral view explode(originalid) temp as originalidd
|
||||||
where originalidd like "piwik:%"
|
where originalidd like "piwik:%"; /*EOS*/
|
||||||
)
|
|
||||||
|
CREATE TABLE ${stats_db_name}.datasource stored as parquet as
|
||||||
select /*+ COALESCE(100) */
|
select /*+ COALESCE(100) */
|
||||||
substr(dtrce.id, 4) as id,
|
substr(dtrce.id, 4) as id,
|
||||||
case when dtrce.officialname.value='Unknown Repository' then 'Other' else dtrce.officialname.value end as name,
|
case when dtrce.officialname.value='Unknown Repository' then 'Other' else dtrce.officialname.value end as name,
|
||||||
|
@ -31,10 +37,12 @@ select /*+ COALESCE(100) */
|
||||||
dtrce.journal.issnprinted as issn_printed,
|
dtrce.journal.issnprinted as issn_printed,
|
||||||
dtrce.journal.issnonline as issn_online
|
dtrce.journal.issnonline as issn_online
|
||||||
from ${openaire_db_name}.datasource dtrce
|
from ${openaire_db_name}.datasource dtrce
|
||||||
left outer join (select inst.hostedby.key as d_id from ${openaire_db_name}.result lateral view outer explode (instance) insts as inst) res on res.d_id=dtrce.id
|
left outer join ${stats_db_name}.harested_datasources res on res.d_id=dtrce.id
|
||||||
left outer join piwik_datasource piwik_d on piwik_d.id=dtrce.id
|
left outer join ${stats_db_name}.piwik_datasource piwik_d on piwik_d.id=dtrce.id
|
||||||
where dtrce.datainfo.deletedbyinference = false and dtrce.datainfo.invisible = false; /*EOS*/
|
where dtrce.datainfo.deletedbyinference = false and dtrce.datainfo.invisible = false; /*EOS*/
|
||||||
|
|
||||||
|
drop table ${stats_db_name}.harested_datasources; /*EOS*/
|
||||||
|
drop table ${stats_db_name}.piwik_datasource; /*EOS*/
|
||||||
|
|
||||||
DROP TABLE IF EXISTS ${stats_db_name}.datasource_languages purge; /*EOS*/
|
DROP TABLE IF EXISTS ${stats_db_name}.datasource_languages purge; /*EOS*/
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue