Changes to indicators

Fixes on open access colours indicators
- indi_pub_green_oa
- indi_pub_gold_oa
- indi_pub_hybrid
- indi_pub_bronze_oa
- indi_pub_diamond
This commit is contained in:
dimitrispie 2023-12-01 13:38:19 +02:00
parent a94a54a2d0
commit 76594ded23
7 changed files with 162 additions and 34 deletions

View File

@ -1,6 +1,18 @@
-- Sprint 1 ---- -- Sprint 1 ----
drop table if exists ${stats_db_name}.indi_pub_green_oa purge; drop table if exists ${stats_db_name}.indi_pub_green_oa purge;
--create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as
--select distinct p.id, coalesce(green_oa, 0) as green_oa
--from ${stats_db_name}.publication p
-- left outer join (
-- select p.id, 1 as green_oa
-- from ${stats_db_name}.publication p
-- join ${stats_db_name}.result_instance ri on ri.id = p.id
-- join ${stats_db_name}.datasource on datasource.id = ri.hostedby
-- where datasource.type like '%Repository%'
-- and (ri.accessright = 'Open Access'
-- or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp
-- on p.id= tmp.id;
create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as
select distinct p.id, coalesce(green_oa, 0) as green_oa select distinct p.id, coalesce(green_oa, 0) as green_oa
from ${stats_db_name}.publication p from ${stats_db_name}.publication p
@ -11,7 +23,7 @@ from ${stats_db_name}.publication p
join ${stats_db_name}.datasource on datasource.id = ri.hostedby join ${stats_db_name}.datasource on datasource.id = ri.hostedby
where datasource.type like '%Repository%' where datasource.type like '%Repository%'
and (ri.accessright = 'Open Access' and (ri.accessright = 'Open Access'
or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and datasource.name!='Other') tmp
on p.id= tmp.id; on p.id= tmp.id;
drop table if exists ${stats_db_name}.indi_pub_grey_lit purge; drop table if exists ${stats_db_name}.indi_pub_grey_lit purge;
@ -183,11 +195,20 @@ drop table if exists ${stats_db_name}.tmp purge;
---- Sprint 4 ---- ---- Sprint 4 ----
drop table if exists ${stats_db_name}.indi_pub_diamond purge; drop table if exists ${stats_db_name}.indi_pub_diamond purge;
--create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as
--select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
--from ${stats_db_name}.publication_datasources pd
-- left outer join (
-- select pd.id, 1 as in_diamond_journal from ${stats_db_name}.publication_datasources pd
-- join ${stats_db_name}.datasource d on d.id=pd.datasource
-- join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
-- and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp
-- on pd.id=tmp.id;
create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as
select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
from ${stats_db_name}.publication_datasources pd from ${stats_db_name}.publication_datasources pd
left outer join ( left outer join (select pd.id, 1 as in_diamond_journal from ${stats_db_name}.publication_datasources pd
select pd.id, 1 as in_diamond_journal from ${stats_db_name}.publication_datasources pd
join ${stats_db_name}.datasource d on d.id=pd.datasource join ${stats_db_name}.datasource d on d.id=pd.datasource
join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp
@ -312,28 +333,55 @@ drop table if exists ${stats_db_name}.indi_pub_gold_oa purge;
-- JOIN gold_oa on issn.issn = gold_oa.issn) tmp -- JOIN gold_oa on issn.issn = gold_oa.issn) tmp
-- on pd.id=tmp.id; -- on pd.id=tmp.id;
--create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as
--with gold_oa as (
--SELECT issn,issn_l from stats_ext.issn_gold_oa_dataset_v5),
--issn AS (SELECT * FROM
--(SELECT id,issn_printed as issn FROM ${stats_db_name}.datasource
--WHERE issn_printed IS NOT NULL
--UNION ALL
--SELECT id, issn_online as issn FROM ${stats_db_name}.datasource
--WHERE issn_online IS NOT NULL or id like '%doajarticles%') as issn
--WHERE LENGTH(issn) > 7),
--alljournals AS(select issn, issn_l from stats_ext.alljournals
--where journal_is_in_doaj=true or journal_is_oa=true)
--SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
--FROM ${stats_db_name}.publication_datasources pd
--left outer join (
--select pd.id, 1 as is_gold FROM ${stats_db_name}.publication_datasources pd
--JOIN issn on issn.id=pd.datasource
--JOIN gold_oa on issn.issn = gold_oa.issn
--join alljournals on issn.issn=alljournals.issn
--left outer join ${stats_db_name}.result_instance ri on ri.id=pd.id
--and ri.accessright!='Closed Access' and ri.accessright_uw='gold') tmp
--on pd.id=tmp.id;
create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as
with gold_oa as ( with gold_oa as (
SELECT issn,issn_l from stats_ext.issn_gold_oa_dataset_v5), select distinct issn from (
issn AS (SELECT * FROM SELECT issn_l as issn from stats_ext.issn_gold_oa_dataset_v5
(SELECT id,issn_printed as issn FROM ${stats_db_name}.datasource
WHERE issn_printed IS NOT NULL
UNION ALL UNION ALL
SELECT id, issn_online as issn FROM ${stats_db_name}.datasource SELECT issn as issn from stats_ext.issn_gold_oa_dataset_v5
WHERE issn_online IS NOT NULL or id like '%doajarticles%') as issn UNION ALL
WHERE LENGTH(issn) > 7), select issn from stats_ext.alljournals where journal_is_in_doaj=true or journal_is_oa=true
alljournals AS(select issn, issn_l from stats_ext.alljournals UNION ALL
where journal_is_in_doaj=true or journal_is_oa=true) select issn_l as issn from stats_ext.alljournals where journal_is_in_doaj=true or journal_is_oa=true) foo),
dd as (
select distinct * from (
select id, issn_printed as issn from ${stats_db_name}.datasource d where d.id like '%doajarticles%'
UNION ALL
select id, issn_online as issn from ${stats_db_name}.datasource d where d.id like '%doajarticles%'
UNION ALL
select id, issn_printed as issn from ${stats_db_name}.datasource d join gold_oa on gold_oa.issn=d.issn_printed
UNION ALL
select id, issn_online as issn from ${stats_db_name}.datasource d join gold_oa on gold_oa.issn=d.issn_online) foo
)
SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
FROM ${stats_db_name}.publication_datasources pd FROM ${stats_db_name}.publication_datasources pd
left outer join ( left outer join (
select pd.id, 1 as is_gold FROM ${stats_db_name}.publication_datasources pd select pd.id, 1 as is_gold
JOIN issn on issn.id=pd.datasource FROM ${stats_db_name}.publication_datasources pd
JOIN gold_oa on issn.issn = gold_oa.issn join dd on dd.id=pd.datasource
join alljournals on issn.issn=alljournals.issn left outer join ${stats_db_name}.result_accessroute ra on ra.id = pd.id where ra.accessroute = 'gold') tmp on tmp.id=pd.id;
left outer join ${stats_db_name}.result_instance ri on ri.id=pd.id
and ri.accessright!='Closed Access' and ri.accessright_uw='gold') tmp
on pd.id=tmp.id;
drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge; drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge;
@ -421,15 +469,26 @@ drop table if exists ${stats_db_name}.indi_pub_hybrid purge;
-- where (gold_oa.journal_is_in_doaj=false or gold_oa.journal_is_oa=false))tmp -- where (gold_oa.journal_is_in_doaj=false or gold_oa.journal_is_oa=false))tmp
-- on pd.id=tmp.id; -- on pd.id=tmp.id;
--create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as
--select distinct pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication_datasources pd
--left outer join (select pd.id, 1 as is_hybrid from ${stats_db_name}.publication_datasources pd
--join ${stats_db_name}.datasource d on pd.datasource=d.id
--join ${stats_db_name}.result_instance ri on ri.id=pd.id
--join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id
--join ${stats_db_name}.result_accessroute ra on ra.id=pd.id
--where d.type like '%Journal%' and ri.accessright!='Closed Access' and (ri.accessright_uw!='gold'
--or indi_gold.is_gold=0) and (ra.accessroute='hybrid' or ri.license is not null)) tmp
--on pd.id=tmp.id;
create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as
select distinct pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication_datasources pd select distinct pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication pd
left outer join (select pd.id, 1 as is_hybrid from ${stats_db_name}.publication_datasources pd left outer join (select pd.id, 1 as is_hybrid from ${stats_db_name}.publication pd
join ${stats_db_name}.datasource d on pd.datasource=d.id
join ${stats_db_name}.result_instance ri on ri.id=pd.id join ${stats_db_name}.result_instance ri on ri.id=pd.id
join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id
join ${stats_db_name}.result_accessroute ra on ra.id=pd.id join ${stats_db_name}.result_accessroute ra on ra.id=pd.id
where d.type like '%Journal%' and ri.accessright!='Closed Access' and (ri.accessright_uw!='gold' join ${stats_db_name}.datasource d on d.id=ri.hostedby
or indi_gold.is_gold=0) and (ra.accessroute='hybrid' or ri.license is not null)) tmp where indi_gold.is_gold=0 and ((d.type like '%Journal%' and ri.accessright!='Closed Access' and ri.accessright!='Restricted' and ri.license is not null) or
ra.accessroute='hybrid'))tmp
on pd.id=tmp.id; on pd.id=tmp.id;
drop table if exists ${stats_db_name}.indi_org_fairness purge; drop table if exists ${stats_db_name}.indi_org_fairness purge;
@ -814,14 +873,16 @@ drop table if exists ${stats_db_name}.indi_pub_bronze_oa purge;
--and ri.accessright='Open Access') tmp on tmp.id=p.id; --and ri.accessright='Open Access') tmp on tmp.id=p.id;
create table ${stats_db_name}.indi_pub_bronze_oa stored as parquet as create table ${stats_db_name}.indi_pub_bronze_oa stored as parquet as
select distinct pd.id,coalesce(is_bronze_oa,0) is_bronze_oa from ${stats_db_name}.publication_datasources pd select distinct pd.id,coalesce(is_bronze_oa,0) is_bronze_oa from ${stats_db_name}.publication pd
left outer join (select pd.id, 1 as is_bronze_oa from ${stats_db_name}.publication_datasources pd left outer join (select pd.id, 1 as is_bronze_oa from ${stats_db_name}.publication pd
join ${stats_db_name}.datasource d on pd.datasource=d.id
join ${stats_db_name}.result_instance ri on ri.id=pd.id join ${stats_db_name}.result_instance ri on ri.id=pd.id
join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id
join ${stats_db_name}.indi_pub_hybrid indi_hybrid on indi_hybrid.id=pd.id
join ${stats_db_name}.result_accessroute ra on ra.id=pd.id join ${stats_db_name}.result_accessroute ra on ra.id=pd.id
where d.type like '%Journal%' and ri.accessright!='Closed Access' and (ri.accessright_uw!='gold' join ${stats_db_name}.datasource d on d.id=ri.hostedby
or indi_gold.is_gold=0) and (ra.accessroute='bronze' or ri.license is null)) tmp where indi_gold.is_gold=0 and indi_hybrid.is_hybrid=0
and ((d.type like '%Journal%' and ri.accessright!='Closed Access'
and ri.accessright!='Restricted' and ri.license is null) or ra.accessroute='bronze')) tmp
on pd.id=tmp.id; on pd.id=tmp.id;
CREATE TEMPORARY TABLE ${stats_db_name}.project_year_result_year as CREATE TEMPORARY TABLE ${stats_db_name}.project_year_result_year as

View File

@ -64,6 +64,8 @@ create table TARGET.result_accessroute stored as parquet as select * from SOURCE
create table TARGET.result_orcid stored as parquet as select * from SOURCE.result_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.result_orcid stored as parquet as select * from SOURCE.result_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
create table TARGET.result_instance stored as parquet as select * from SOURCE.result_instance orig where exists (select 1 from TARGET.result r where r.id=orig.id);
create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result);
create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result);
create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou; create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;

View File

@ -248,6 +248,8 @@ create table TARGET.indi_impact_measures stored as parquet as select * from SOUR
create table TARGET.indi_pub_interdisciplinarity stored as parquet as select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.indi_pub_interdisciplinarity stored as parquet as select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result r where r.id=orig.id);
create table TARGET.result_apc_affiliations stored as parquet as select * from SOURCE.result_apc_affiliations orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.result_apc_affiliations stored as parquet as select * from SOURCE.result_apc_affiliations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
create table TARGET.result_instance stored as parquet as select * from SOURCE.result_instance orig where exists (select 1 from TARGET.result r where r.id=orig.id);
create table TARGET.result_orcid stored as parquet as select * from SOURCE.result_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
create table TARGET.indi_is_project_result_after stored as parquet as select * from SOURCE.indi_is_project_result_after orig where exists (select 1 from TARGET.result r where r.id=orig.result_id); create table TARGET.indi_is_project_result_after stored as parquet as select * from SOURCE.indi_is_project_result_after orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
create view TARGET.indi_is_funder_plan_s as select * from SOURCE.indi_is_funder_plan_s; create view TARGET.indi_is_funder_plan_s as select * from SOURCE.indi_is_funder_plan_s;

View File

@ -5,6 +5,8 @@
------------------------------------------------------ ------------------------------------------------------
-- Dataset temporary table supporting updates -- Dataset temporary table supporting updates
DROP TABLE IF EXISTS ${stats_db_name}.dataset_tmp purge;
CREATE TABLE ${stats_db_name}.dataset_tmp CREATE TABLE ${stats_db_name}.dataset_tmp
( (
id STRING, id STRING,
@ -40,6 +42,8 @@ SELECT substr(d.id, 4) AS id,
FROM ${openaire_db_name}.dataset d FROM ${openaire_db_name}.dataset d
WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false; WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_citations purge;
CREATE TABLE ${stats_db_name}.dataset_citations STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.dataset_citations STORED AS PARQUET AS
SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.dataset d FROM ${openaire_db_name}.dataset d
@ -47,12 +51,16 @@ FROM ${openaire_db_name}.dataset d
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_classifications purge;
CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type SELECT substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_concepts purge;
CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case SELECT substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
@ -62,6 +70,8 @@ from ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.context) contexts as context LATERAL VIEW explode(p.context) contexts as context
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_datasources purge;
CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS
SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
FROM ( FROM (
@ -74,23 +84,31 @@ FROM (
FROM ${openaire_db_name}.datasource d FROM ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_languages purge;
CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, p.language.classname AS language SELECT substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_oids purge;
CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.originalid) oids AS ids LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_pids purge;
CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.pid) pids AS ppid LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_topics purge;
CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p

View File

@ -5,6 +5,7 @@
-------------------------------------------------------- --------------------------------------------------------
-- Software temporary table supporting updates -- Software temporary table supporting updates
DROP TABLE IF EXISTS ${stats_db_name}.software_tmp purge;
CREATE TABLE ${stats_db_name}.software_tmp CREATE TABLE ${stats_db_name}.software_tmp
( (
id STRING, id STRING,
@ -40,6 +41,8 @@ SELECT substr(s.id, 4) as id,
from ${openaire_db_name}.software s from ${openaire_db_name}.software s
where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.software_citations purge;
CREATE TABLE ${stats_db_name}.software_citations STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.software_citations STORED AS PARQUET AS
SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.software s FROM ${openaire_db_name}.software s
@ -47,6 +50,8 @@ FROM ${openaire_db_name}.software s
where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false; and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.software_classifications purge;
CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type SELECT substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
@ -62,6 +67,8 @@ FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.context) contexts AS context LATERAL VIEW explode(p.context) contexts AS context
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.software_datasources purge;
CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
FROM ( FROM (
@ -74,23 +81,31 @@ FROM (
FROM ${openaire_db_name}.datasource d FROM ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id; WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id;
DROP TABLE IF EXISTS ${stats_db_name}.software_languages purge;
CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS
select substr(p.id, 4) AS id, p.language.classname AS language select substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.software_oids purge;
CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.originalid) oids AS ids LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.software_pids purge;
CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.pid) pids AS ppid LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.software_topics purge;
CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p

View File

@ -5,6 +5,8 @@
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- Otherresearchproduct temporary table supporting updates -- Otherresearchproduct temporary table supporting updates
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_tmp purge;
CREATE TABLE ${stats_db_name}.otherresearchproduct_tmp CREATE TABLE ${stats_db_name}.otherresearchproduct_tmp
( (
id STRING, id STRING,
@ -40,6 +42,8 @@ FROM ${openaire_db_name}.otherresearchproduct o
WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false; WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false;
-- Otherresearchproduct_citations -- Otherresearchproduct_citations
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_citations purge;
CREATE TABLE ${stats_db_name}.otherresearchproduct_citations STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.otherresearchproduct_citations STORED AS PARQUET AS
SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation
@ -51,6 +55,8 @@ SELECT substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_concepts purge;
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case SELECT substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
@ -59,6 +65,8 @@ SELECT substr(p.id, 4) as id, case
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_datasources purge;
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
@ -68,21 +76,29 @@ FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) A
from ${openaire_db_name}.datasource d from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id; WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id;
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_languages purge;
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, p.language.classname AS language SELECT substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.otherresearchproduct p FROM ${openaire_db_name}.otherresearchproduct p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_oids purge;
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_pids purge;
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_topics purge;
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject

View File

@ -3,29 +3,39 @@
-- Project table/view and Project related tables/views -- Project table/view and Project related tables/views
------------------------------------------------------ ------------------------------------------------------
------------------------------------------------------ ------------------------------------------------------
DROP TABLE IF EXISTS ${stats_db_name}.project_oids purge;
CREATE TABLE ${stats_db_name}.project_oids STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.project_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false; where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.project_organizations purge;
CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS
SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization
from ${openaire_db_name}.relation r from ${openaire_db_name}.relation r
WHERE r.reltype = 'projectOrganization' and r.source like '40|%' WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.project_results purge;
CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
FROM ${openaire_db_name}.relation r FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'resultProject' and r.target like '40|%' WHERE r.reltype = 'resultProject' and r.target like '40|%'
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.project_classification purge;
create table ${stats_db_name}.project_classification STORED AS PARQUET as create table ${stats_db_name}.project_classification STORED AS PARQUET as
select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3 select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
from ${openaire_db_name}.project p from ${openaire_db_name}.project p
lateral view explode(p.h2020classification) classifs as class lateral view explode(p.h2020classification) classifs as class
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null; where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null;
DROP TABLE IF EXISTS ${stats_db_name}.project_tmp purge;
CREATE TABLE ${stats_db_name}.project_tmp CREATE TABLE ${stats_db_name}.project_tmp
( (
id STRING, id STRING,
@ -80,12 +90,16 @@ SELECT substr(p.id, 4) AS id,
FROM ${openaire_db_name}.project p FROM ${openaire_db_name}.project p
WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false; WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.funder purge;
create table ${stats_db_name}.funder STORED AS PARQUET as create table ${stats_db_name}.funder STORED AS PARQUET as
select distinct xpath_string(fund, '//funder/id') as id, select distinct xpath_string(fund, '//funder/id') as id,
xpath_string(fund, '//funder/name') as name, xpath_string(fund, '//funder/name') as name,
xpath_string(fund, '//funder/shortname') as shortname xpath_string(fund, '//funder/shortname') as shortname
from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund; from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund;
DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge;
CREATE TABLE ${stats_db_name}.project_organization_contribution STORED AS PARQUET AS CREATE TABLE ${stats_db_name}.project_organization_contribution STORED AS PARQUET AS
SELECT distinct substr(r.source, 4) AS project, substr(r.target, 4) AS organization, SELECT distinct substr(r.source, 4) AS project, substr(r.target, 4) AS organization,
properties[0].value contribution, properties[1].value currency properties[0].value contribution, properties[1].value currency