[Stats wf] #372, #405 to production #406

Merged
claudio.atzori merged 28 commits from antonis.lempesis/dnet-hadoop:beta into master 2024-03-26 12:18:27 +01:00
1 changed files with 61 additions and 89 deletions
Showing only changes of commit f74c7e8689 - Show all commits

View File

@ -7,104 +7,76 @@
------------------------------------------------------ ------------------------------------------------------
DROP TABLE IF EXISTS ${stats_db_name}.publication_refereed purge; DROP TABLE IF EXISTS ${stats_db_name}.publication_refereed purge;
--CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as
--select substr(r.id, 4) as id, inst.refereed.classname as refereed
--from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as
with peer_reviewed as with peer_reviewed as (
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.publication r lateral from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
view explode(r.instance) instances as inst where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'),
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE non_peer_reviewed as (
and inst.refereed.classname='peerReviewed'), select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
non_peer_reviewed as from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
from ${openaire_db_name}.publication r lateral select distinct *
view explode(r.instance) instances as inst from (
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='nonPeerReviewed')
select peer_reviewed.* from peer_reviewed select peer_reviewed.* from peer_reviewed
union all union all
select non_peer_reviewed.* from non_peer_reviewed select non_peer_reviewed.* from non_peer_reviewed
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null; where peer_reviewed.id is null) pr;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge; DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge;
--CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
--select substr(r.id, 4) as id, inst.refereed.classname as refereed
--from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
with peer_reviewed as with peer_reviewed as (
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.dataset r lateral from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
view explode(r.instance) instances as inst where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'),
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE non_peer_reviewed as (
and inst.refereed.classname='peerReviewed'), select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
non_peer_reviewed as from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
from ${openaire_db_name}.dataset r lateral select distinct *
view explode(r.instance) instances as inst from (
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='nonPeerReviewed')
select peer_reviewed.* from peer_reviewed select peer_reviewed.* from peer_reviewed
union all union all
select non_peer_reviewed.* from non_peer_reviewed select non_peer_reviewed.* from non_peer_reviewed
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null; where peer_reviewed.id is null) pr;
DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge; DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge;
--CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
--select substr(r.id, 4) as id, inst.refereed.classname as refereed
--from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
with peer_reviewed as with peer_reviewed as (
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.software r lateral from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
view explode(r.instance) instances as inst where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'),
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE non_peer_reviewed as (
and inst.refereed.classname='peerReviewed'), select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
non_peer_reviewed as from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
from ${openaire_db_name}.software r lateral select distinct *
view explode(r.instance) instances as inst from (
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='nonPeerReviewed')
select peer_reviewed.* from peer_reviewed select peer_reviewed.* from peer_reviewed
union all union all
select non_peer_reviewed.* from non_peer_reviewed select non_peer_reviewed.* from non_peer_reviewed
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null; where peer_reviewed.id is null) pr;
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge; DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge;
--CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
--select substr(r.id, 4) as id, inst.refereed.classname as refereed
--from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
with peer_reviewed as with peer_reviewed as (
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.otherresearchproduct r lateral from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
view explode(r.instance) instances as inst where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'),
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE non_peer_reviewed as (
and inst.refereed.classname='peerReviewed'), select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
non_peer_reviewed as from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
from ${openaire_db_name}.otherresearchproduct r lateral select distinct *
view explode(r.instance) instances as inst from (
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE
and inst.refereed.classname='nonPeerReviewed')
select peer_reviewed.* from peer_reviewed select peer_reviewed.* from peer_reviewed
union all union all
select non_peer_reviewed.* from non_peer_reviewed select non_peer_reviewed.* from non_peer_reviewed
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null; where peer_reviewed.id is null) pr;
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as
select * from ${stats_db_name}.publication_refereed select * from ${stats_db_name}.publication_refereed