From f74c7e8689f2a05b5ec9e85324a705438a3f683f Mon Sep 17 00:00:00 2001 From: antleb Date: Tue, 12 Mar 2024 02:13:04 +0200 Subject: [PATCH] selecting distinct peer_reviewed --- .../graph/stats/oozie_app/scripts/step15.sql | 150 +++++++----------- 1 file changed, 61 insertions(+), 89 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql index 343438cf4..ce6b6cc2f 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql @@ -7,104 +7,76 @@ ------------------------------------------------------ DROP TABLE IF EXISTS ${stats_db_name}.publication_refereed purge; - ---CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as ---select substr(r.id, 4) as id, inst.refereed.classname as refereed ---from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst ---where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as -with peer_reviewed as -(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed -from ${openaire_db_name}.publication r lateral -view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE -and inst.refereed.classname='peerReviewed'), -non_peer_reviewed as -(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed -from ${openaire_db_name}.publication r lateral -view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE -and inst.refereed.classname='nonPeerReviewed') -select peer_reviewed.* from peer_reviewed -union all -select non_peer_reviewed.* from non_peer_reviewed -left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id -where peer_reviewed.id is null; +with peer_reviewed as ( + select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed + from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst + where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'), +non_peer_reviewed as ( + select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed + from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst + where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed') +select distinct * +from ( + select peer_reviewed.* from peer_reviewed + union all + select non_peer_reviewed.* from non_peer_reviewed + left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id + where peer_reviewed.id is null) pr; DROP TABLE IF EXISTS ${stats_db_name}.dataset_refereed purge; - ---CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as ---select substr(r.id, 4) as id, inst.refereed.classname as refereed ---from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst ---where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as -with peer_reviewed as -(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed -from ${openaire_db_name}.dataset r lateral -view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE -and inst.refereed.classname='peerReviewed'), -non_peer_reviewed as -(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed -from ${openaire_db_name}.dataset r lateral -view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE -and inst.refereed.classname='nonPeerReviewed') -select peer_reviewed.* from peer_reviewed -union all -select non_peer_reviewed.* from non_peer_reviewed -left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id -where peer_reviewed.id is null; +with peer_reviewed as ( + select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed + from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst + where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'), +non_peer_reviewed as ( + select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed + from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst + where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed') +select distinct * +from ( + select peer_reviewed.* from peer_reviewed + union all + select non_peer_reviewed.* from non_peer_reviewed + left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id + where peer_reviewed.id is null) pr; DROP TABLE IF EXISTS ${stats_db_name}.software_refereed purge; - ---CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as ---select substr(r.id, 4) as id, inst.refereed.classname as refereed ---from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst ---where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as -with peer_reviewed as -(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed -from ${openaire_db_name}.software r lateral -view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE -and inst.refereed.classname='peerReviewed'), -non_peer_reviewed as -(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed -from ${openaire_db_name}.software r lateral -view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE -and inst.refereed.classname='nonPeerReviewed') -select peer_reviewed.* from peer_reviewed -union all -select non_peer_reviewed.* from non_peer_reviewed -left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id -where peer_reviewed.id is null; +with peer_reviewed as ( + select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed + from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst + where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'), +non_peer_reviewed as ( + select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed + from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst + where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed') +select distinct * +from ( + select peer_reviewed.* from peer_reviewed + union all + select non_peer_reviewed.* from non_peer_reviewed + left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id + where peer_reviewed.id is null) pr; DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_refereed purge; - ---CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as ---select substr(r.id, 4) as id, inst.refereed.classname as refereed ---from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst ---where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as -with peer_reviewed as -(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed -from ${openaire_db_name}.otherresearchproduct r lateral -view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE -and inst.refereed.classname='peerReviewed'), -non_peer_reviewed as -(select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed -from ${openaire_db_name}.otherresearchproduct r lateral -view explode(r.instance) instances as inst -where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE -and inst.refereed.classname='nonPeerReviewed') -select peer_reviewed.* from peer_reviewed -union all -select non_peer_reviewed.* from non_peer_reviewed -left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id -where peer_reviewed.id is null; +with peer_reviewed as ( + select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed + from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst + where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='peerReviewed'), +non_peer_reviewed as ( + select distinct substr(r.id, 4) as id, inst.refereed.classname as refereed + from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst + where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE and inst.refereed.classname='nonPeerReviewed') +select distinct * +from ( + select peer_reviewed.* from peer_reviewed + union all + select non_peer_reviewed.* from non_peer_reviewed + left join peer_reviewed on peer_reviewed.id=non_peer_reviewed.id + where peer_reviewed.id is null) pr; CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as select * from ${stats_db_name}.publication_refereed @@ -132,4 +104,4 @@ rel.properties[1].value apc_currency from ${openaire_db_name}.relation rel join ${openaire_db_name}.organization o on o.id=rel.source join ${openaire_db_name}.result r on r.id=rel.target -where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0; +where rel.subreltype = 'affiliation' and rel.datainfo.deletedbyinference = false and size(rel.properties)>0; \ No newline at end of file