[Stats wf] #372, #405 to production #406

Merged
claudio.atzori merged 28 commits from antonis.lempesis/dnet-hadoop:beta into master 2024-03-26 12:18:27 +01:00
1 changed files with 61 additions and 89 deletions
Showing only changes of commit f74c7e8689 - Show all commits

View File

@ -7,104 +7,76 @@
------------------------------------------------------
DROP TABLE IF EXISTS ${stats_db_name}.publication_refereed purge;
--CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as
--select substr(r.id, 4) as id, inst.refereed.classname as refereed
--from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as
with peer_reviewed as
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.publication r lateral
view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='peerReviewed'),
non_peer_reviewed as
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.publication r lateral
view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='nonPeerReviewed')
select peer_reviewed.* from peer_reviewed
union all
select non_peer_reviewed.* from non_peer_reviewed
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null;
with peer_reviewed as (
select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'),
non_peer_reviewed as (
select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
select distinct *
from (
select peer_reviewed.* from peer_reviewed
union all
select non_peer_reviewed.* from non_peer_reviewed
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null) pr;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge;
--CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
--select substr(r.id, 4) as id, inst.refereed.classname as refereed
--from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
with peer_reviewed as
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.dataset r lateral
view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='peerReviewed'),
non_peer_reviewed as
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.dataset r lateral
view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='nonPeerReviewed')
select peer_reviewed.* from peer_reviewed
union all
select non_peer_reviewed.* from non_peer_reviewed
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null;
with peer_reviewed as (
select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'),
non_peer_reviewed as (
select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
select distinct *
from (
select peer_reviewed.* from peer_reviewed
union all
select non_peer_reviewed.* from non_peer_reviewed
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null) pr;
DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge;
--CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
--select substr(r.id, 4) as id, inst.refereed.classname as refereed
--from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
with peer_reviewed as
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.software r lateral
view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='peerReviewed'),
non_peer_reviewed as
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.software r lateral
view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='nonPeerReviewed')
select peer_reviewed.* from peer_reviewed
union all
select non_peer_reviewed.* from non_peer_reviewed
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null;
with peer_reviewed as (
select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'),
non_peer_reviewed as (
select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
select distinct *
from (
select peer_reviewed.* from peer_reviewed
union all
select non_peer_reviewed.* from non_peer_reviewed
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null) pr;
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge;
--CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
--select substr(r.id, 4) as id, inst.refereed.classname as refereed
--from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
with peer_reviewed as
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.otherresearchproduct r lateral
view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='peerReviewed'),
non_peer_reviewed as
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.otherresearchproduct r lateral
view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='nonPeerReviewed')
select peer_reviewed.* from peer_reviewed
union all
select non_peer_reviewed.* from non_peer_reviewed
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null;
with peer_reviewed as (
select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'),
non_peer_reviewed as (
select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
select distinct *
from (
select peer_reviewed.* from peer_reviewed
union all
select non_peer_reviewed.* from non_peer_reviewed
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null) pr;
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as
select * from ${stats_db_name}.publication_refereed
@ -132,4 +104,4 @@ rel.properties[1].value apc_currency
from ${openaire_db_name}.relation rel
join ${openaire_db_name}.organization o on o.id=rel.source
join ${openaire_db_name}.result r on r.id=rel.target
where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0;
where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0;