selecting distinct peer_reviewed

This commit is contained in:
Antonis Lempesis 2024-03-12 02:13:04 +02:00
parent 3c79720342
commit f74c7e8689
1 changed files with 61 additions and 89 deletions

View File

@ -7,104 +7,76 @@
------------------------------------------------------ ------------------------------------------------------
DROP TABLE IF EXISTS ${stats_db_name}.publication_refereed purge; DROP TABLE IF EXISTS ${stats_db_name}.publication_refereed purge;
--CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as
--select substr(r.id, 4) as id, inst.refereed.classname as refereed
--from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as
with peer_reviewed as with peer_reviewed as (
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.publication r lateral from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
view explode(r.instance) instances as inst where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'),
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE non_peer_reviewed as (
and inst.refereed.classname='peerReviewed'), select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
non_peer_reviewed as from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
from ${openaire_db_name}.publication r lateral select distinct *
view explode(r.instance) instances as inst from (
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE select peer_reviewed.* from peer_reviewed
and inst.refereed.classname='nonPeerReviewed') union all
select peer_reviewed.* from peer_reviewed select non_peer_reviewed.* from non_peer_reviewed
union all left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
select non_peer_reviewed.* from non_peer_reviewed where peer_reviewed.id is null) pr;
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge; DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge;
--CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
--select substr(r.id, 4) as id, inst.refereed.classname as refereed
--from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
with peer_reviewed as with peer_reviewed as (
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.dataset r lateral from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
view explode(r.instance) instances as inst where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'),
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE non_peer_reviewed as (
and inst.refereed.classname='peerReviewed'), select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
non_peer_reviewed as from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
from ${openaire_db_name}.dataset r lateral select distinct *
view explode(r.instance) instances as inst from (
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE select peer_reviewed.* from peer_reviewed
and inst.refereed.classname='nonPeerReviewed') union all
select peer_reviewed.* from peer_reviewed select non_peer_reviewed.* from non_peer_reviewed
union all left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
select non_peer_reviewed.* from non_peer_reviewed where peer_reviewed.id is null) pr;
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null;
DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge; DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge;
--CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
--select substr(r.id, 4) as id, inst.refereed.classname as refereed
--from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
with peer_reviewed as with peer_reviewed as (
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.software r lateral from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
view explode(r.instance) instances as inst where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'),
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE non_peer_reviewed as (
and inst.refereed.classname='peerReviewed'), select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
non_peer_reviewed as from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
from ${openaire_db_name}.software r lateral select distinct *
view explode(r.instance) instances as inst from (
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE select peer_reviewed.* from peer_reviewed
and inst.refereed.classname='nonPeerReviewed') union all
select peer_reviewed.* from peer_reviewed select non_peer_reviewed.* from non_peer_reviewed
union all left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
select non_peer_reviewed.* from non_peer_reviewed where peer_reviewed.id is null) pr;
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null;
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge; DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge;
--CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
--select substr(r.id, 4) as id, inst.refereed.classname as refereed
--from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
--where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
with peer_reviewed as with peer_reviewed as (
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.otherresearchproduct r lateral from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
view explode(r.instance) instances as inst where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'),
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE non_peer_reviewed as (
and inst.refereed.classname='peerReviewed'), select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed
non_peer_reviewed as from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed')
from ${openaire_db_name}.otherresearchproduct r lateral select distinct *
view explode(r.instance) instances as inst from (
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE select peer_reviewed.* from peer_reviewed
and inst.refereed.classname='nonPeerReviewed') union all
select peer_reviewed.* from peer_reviewed select non_peer_reviewed.* from non_peer_reviewed
union all left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
select non_peer_reviewed.* from non_peer_reviewed where peer_reviewed.id is null) pr;
left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id
where peer_reviewed.id is null;
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as
select * from ${stats_db_name}.publication_refereed select * from ${stats_db_name}.publication_refereed
@ -132,4 +104,4 @@ rel.properties[1].value apc_currency
from ${openaire_db_name}.relation rel from ${openaire_db_name}.relation rel
join ${openaire_db_name}.organization o on o.id=rel.source join ${openaire_db_name}.organization o on o.id=rel.source
join ${openaire_db_name}.result r on r.id=rel.target join ${openaire_db_name}.result r on r.id=rel.target
where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0; where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0;