1
0
Fork 0

Changes to indicators

Fixes on open access colours indicators
- indi_pub_green_oa
- indi_pub_gold_oa
- indi_pub_hybrid
- indi_pub_bronze_oa
- indi_pub_diamond
This commit is contained in:
dimitrispie 2023-12-01 13:38:19 +02:00
parent a94a54a2d0
commit 76594ded23
7 changed files with 162 additions and 34 deletions

View File

@ -1,6 +1,18 @@
-- Sprint 1 ----
drop table if exists ${stats_db_name}.indi_pub_green_oa purge;
--create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as
--select distinct p.id, coalesce(green_oa, 0) as green_oa
--from ${stats_db_name}.publication p
-- left outer join (
-- select p.id, 1 as green_oa
-- from ${stats_db_name}.publication p
-- join ${stats_db_name}.result_instance ri on ri.id = p.id
-- join ${stats_db_name}.datasource on datasource.id = ri.hostedby
-- where datasource.type like '%Repository%'
-- and (ri.accessright = 'Open Access'
-- or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp
-- on p.id= tmp.id;
create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as
select distinct p.id, coalesce(green_oa, 0) as green_oa
from ${stats_db_name}.publication p
@ -11,7 +23,7 @@ from ${stats_db_name}.publication p
join ${stats_db_name}.datasource on datasource.id = ri.hostedby
where datasource.type like '%Repository%'
and (ri.accessright = 'Open Access'
or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp
or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and datasource.name!='Other') tmp
on p.id= tmp.id;
drop table if exists ${stats_db_name}.indi_pub_grey_lit purge;
@ -183,15 +195,24 @@ drop table if exists ${stats_db_name}.tmp purge;
---- Sprint 4 ----
drop table if exists ${stats_db_name}.indi_pub_diamond purge;
--create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as
--select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
--from ${stats_db_name}.publication_datasources pd
-- left outer join (
-- select pd.id, 1 as in_diamond_journal from ${stats_db_name}.publication_datasources pd
-- join ${stats_db_name}.datasource d on d.id=pd.datasource
-- join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
-- and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp
-- on pd.id=tmp.id;
create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as
select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
from ${stats_db_name}.publication_datasources pd
left outer join (
select pd.id, 1 as in_diamond_journal from ${stats_db_name}.publication_datasources pd
join ${stats_db_name}.datasource d on d.id=pd.datasource
join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp
on pd.id=tmp.id;
left outer join (select pd.id, 1 as in_diamond_journal from ${stats_db_name}.publication_datasources pd
join ${stats_db_name}.datasource d on d.id=pd.datasource
join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp
on pd.id=tmp.id;
drop table if exists ${stats_db_name}.indi_pub_in_transformative purge;
@ -312,28 +333,55 @@ drop table if exists ${stats_db_name}.indi_pub_gold_oa purge;
-- JOIN gold_oa on issn.issn = gold_oa.issn) tmp
-- on pd.id=tmp.id;
--create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as
--with gold_oa as (
--SELECT issn,issn_l from stats_ext.issn_gold_oa_dataset_v5),
--issn AS (SELECT * FROM
--(SELECT id,issn_printed as issn FROM ${stats_db_name}.datasource
--WHERE issn_printed IS NOT NULL
--UNION ALL
--SELECT id, issn_online as issn FROM ${stats_db_name}.datasource
--WHERE issn_online IS NOT NULL or id like '%doajarticles%') as issn
--WHERE LENGTH(issn) > 7),
--alljournals AS(select issn, issn_l from stats_ext.alljournals
--where journal_is_in_doaj=true or journal_is_oa=true)
--SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
--FROM ${stats_db_name}.publication_datasources pd
--left outer join (
--select pd.id, 1 as is_gold FROM ${stats_db_name}.publication_datasources pd
--JOIN issn on issn.id=pd.datasource
--JOIN gold_oa on issn.issn = gold_oa.issn
--join alljournals on issn.issn=alljournals.issn
--left outer join ${stats_db_name}.result_instance ri on ri.id=pd.id
--and ri.accessright!='Closed Access' and ri.accessright_uw='gold') tmp
--on pd.id=tmp.id;
create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as
with gold_oa as (
SELECT issn,issn_l from stats_ext.issn_gold_oa_dataset_v5),
issn AS (SELECT * FROM
(SELECT id,issn_printed as issn FROM ${stats_db_name}.datasource
WHERE issn_printed IS NOT NULL
UNION ALL
SELECT id, issn_online as issn FROM ${stats_db_name}.datasource
WHERE issn_online IS NOT NULL or id like '%doajarticles%') as issn
WHERE LENGTH(issn) > 7),
alljournals AS(select issn, issn_l from stats_ext.alljournals
where journal_is_in_doaj=true or journal_is_oa=true)
select distinct issn from (
SELECT issn_l as issn from stats_ext.issn_gold_oa_dataset_v5
UNION ALL
SELECT issn as issn from stats_ext.issn_gold_oa_dataset_v5
UNION ALL
select issn from stats_ext.alljournals where journal_is_in_doaj=true or journal_is_oa=true
UNION ALL
select issn_l as issn from stats_ext.alljournals where journal_is_in_doaj=true or journal_is_oa=true) foo),
dd as (
select distinct * from (
select id, issn_printed as issn from ${stats_db_name}.datasource d where d.id like '%doajarticles%'
UNION ALL
select id, issn_online as issn from ${stats_db_name}.datasource d where d.id like '%doajarticles%'
UNION ALL
select id, issn_printed as issn from ${stats_db_name}.datasource d join gold_oa on gold_oa.issn=d.issn_printed
UNION ALL
select id, issn_online as issn from ${stats_db_name}.datasource d join gold_oa on gold_oa.issn=d.issn_online) foo
)
SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
FROM ${stats_db_name}.publication_datasources pd
left outer join (
select pd.id, 1 as is_gold FROM ${stats_db_name}.publication_datasources pd
JOIN issn on issn.id=pd.datasource
JOIN gold_oa on issn.issn = gold_oa.issn
join alljournals on issn.issn=alljournals.issn
left outer join ${stats_db_name}.result_instance ri on ri.id=pd.id
and ri.accessright!='Closed Access' and ri.accessright_uw='gold') tmp
on pd.id=tmp.id;
select pd.id, 1 as is_gold
FROM ${stats_db_name}.publication_datasources pd
join dd on dd.id=pd.datasource
left outer join ${stats_db_name}.result_accessroute ra on ra.id = pd.id where ra.accessroute = 'gold') tmp on tmp.id=pd.id;
drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge;
@ -421,15 +469,26 @@ drop table if exists ${stats_db_name}.indi_pub_hybrid purge;
-- where (gold_oa.journal_is_in_doaj=false or gold_oa.journal_is_oa=false))tmp
-- on pd.id=tmp.id;
--create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as
--select distinct pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication_datasources pd
--left outer join (select pd.id, 1 as is_hybrid from ${stats_db_name}.publication_datasources pd
--join ${stats_db_name}.datasource d on pd.datasource=d.id
--join ${stats_db_name}.result_instance ri on ri.id=pd.id
--join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id
--join ${stats_db_name}.result_accessroute ra on ra.id=pd.id
--where d.type like '%Journal%' and ri.accessright!='Closed Access' and (ri.accessright_uw!='gold'
--or indi_gold.is_gold=0) and (ra.accessroute='hybrid' or ri.license is not null)) tmp
--on pd.id=tmp.id;
create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as
select distinct pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication_datasources pd
left outer join (select pd.id, 1 as is_hybrid from ${stats_db_name}.publication_datasources pd
join ${stats_db_name}.datasource d on pd.datasource=d.id
select distinct pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication pd
left outer join (select pd.id, 1 as is_hybrid from ${stats_db_name}.publication pd
join ${stats_db_name}.result_instance ri on ri.id=pd.id
join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id
join ${stats_db_name}.result_accessroute ra on ra.id=pd.id
where d.type like '%Journal%' and ri.accessright!='Closed Access' and (ri.accessright_uw!='gold'
or indi_gold.is_gold=0) and (ra.accessroute='hybrid' or ri.license is not null)) tmp
join ${stats_db_name}.datasource d on d.id=ri.hostedby
where indi_gold.is_gold=0 and ((d.type like '%Journal%' and ri.accessright!='Closed Access' and ri.accessright!='Restricted' and ri.license is not null) or
ra.accessroute='hybrid'))tmp
on pd.id=tmp.id;
drop table if exists ${stats_db_name}.indi_org_fairness purge;
@ -814,14 +873,16 @@ drop table if exists ${stats_db_name}.indi_pub_bronze_oa purge;
--and ri.accessright='Open Access') tmp on tmp.id=p.id;
create table ${stats_db_name}.indi_pub_bronze_oa stored as parquet as
select distinct pd.id,coalesce(is_bronze_oa,0) is_bronze_oa from ${stats_db_name}.publication_datasources pd
left outer join (select pd.id, 1 as is_bronze_oa from ${stats_db_name}.publication_datasources pd
join ${stats_db_name}.datasource d on pd.datasource=d.id
select distinct pd.id,coalesce(is_bronze_oa,0) is_bronze_oa from ${stats_db_name}.publication pd
left outer join (select pd.id, 1 as is_bronze_oa from ${stats_db_name}.publication pd
join ${stats_db_name}.result_instance ri on ri.id=pd.id
join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id
join ${stats_db_name}.indi_pub_hybrid indi_hybrid on indi_hybrid.id=pd.id
join ${stats_db_name}.result_accessroute ra on ra.id=pd.id
where d.type like '%Journal%' and ri.accessright!='Closed Access' and (ri.accessright_uw!='gold'
or indi_gold.is_gold=0) and (ra.accessroute='bronze' or ri.license is null)) tmp
join ${stats_db_name}.datasource d on d.id=ri.hostedby
where indi_gold.is_gold=0 and indi_hybrid.is_hybrid=0
and ((d.type like '%Journal%' and ri.accessright!='Closed Access'
and ri.accessright!='Restricted' and ri.license is null) or ra.accessroute='bronze')) tmp
on pd.id=tmp.id;
CREATE TEMPORARY TABLE ${stats_db_name}.project_year_result_year as

View File

@ -64,6 +64,8 @@ create table TARGET.result_accessroute stored as parquet as select * from SOURCE
create table TARGET.result_orcid stored as parquet as select * from SOURCE.result_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
create table TARGET.result_instance stored as parquet as select * from SOURCE.result_instance orig where exists (select 1 from TARGET.result r where r.id=orig.id);
create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result);
create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result);
create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;

View File

@ -248,6 +248,8 @@ create table TARGET.indi_impact_measures stored as parquet as select * from SOUR
create table TARGET.indi_pub_interdisciplinarity stored as parquet as select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result r where r.id=orig.id);
create table TARGET.result_apc_affiliations stored as parquet as select * from SOURCE.result_apc_affiliations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
create table TARGET.result_instance stored as parquet as select * from SOURCE.result_instance orig where exists (select 1 from TARGET.result r where r.id=orig.id);
create table TARGET.result_orcid stored as parquet as select * from SOURCE.result_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
create table TARGET.indi_is_project_result_after stored as parquet as select * from SOURCE.indi_is_project_result_after orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
create view TARGET.indi_is_funder_plan_s as select * from SOURCE.indi_is_funder_plan_s;

View File

@ -5,6 +5,8 @@
------------------------------------------------------
-- Dataset temporary table supporting updates
DROP TABLE IF EXISTS ${stats_db_name}.dataset_tmp purge;
CREATE TABLE ${stats_db_name}.dataset_tmp
(
id STRING,
@ -40,6 +42,8 @@ SELECT substr(d.id, 4) AS id,
FROM ${openaire_db_name}.dataset d
WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_citations purge;
CREATE TABLE ${stats_db_name}.dataset_citations STORED AS PARQUET AS
SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.dataset d
@ -47,12 +51,16 @@ FROM ${openaire_db_name}.dataset d
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_classifications purge;
CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_concepts purge;
CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
@ -62,6 +70,8 @@ from ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.context) contexts as context
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_datasources purge;
CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS
SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
FROM (
@ -74,23 +84,31 @@ FROM (
FROM ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_languages purge;
CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.dataset p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_oids purge;
CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_pids purge;
CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.dataset_topics purge;
CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.dataset p

View File

@ -5,6 +5,7 @@
--------------------------------------------------------
-- Software temporary table supporting updates
DROP TABLE IF EXISTS ${stats_db_name}.software_tmp purge;
CREATE TABLE ${stats_db_name}.software_tmp
(
id STRING,
@ -40,6 +41,8 @@ SELECT substr(s.id, 4) as id,
from ${openaire_db_name}.software s
where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.software_citations purge;
CREATE TABLE ${stats_db_name}.software_citations STORED AS PARQUET AS
SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.software s
@ -47,6 +50,8 @@ FROM ${openaire_db_name}.software s
where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.software_classifications purge;
CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.software p
@ -62,6 +67,8 @@ FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.context) contexts AS context
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.software_datasources purge;
CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
FROM (
@ -74,23 +81,31 @@ FROM (
FROM ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id;
DROP TABLE IF EXISTS ${stats_db_name}.software_languages purge;
CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS
select substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.software p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.software_oids purge;
CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.software_pids purge;
CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.software_topics purge;
CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.software p

View File

@ -5,6 +5,8 @@
--------------------------------------------------------------------------------
-- Otherresearchproduct temporary table supporting updates
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_tmp purge;
CREATE TABLE ${stats_db_name}.otherresearchproduct_tmp
(
id STRING,
@ -40,6 +42,8 @@ FROM ${openaire_db_name}.otherresearchproduct o
WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false;
-- Otherresearchproduct_citations
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_citations purge;
CREATE TABLE ${stats_db_name}.otherresearchproduct_citations STORED AS PARQUET AS
SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation
@ -51,6 +55,8 @@ SELECT substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_concepts purge;
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
@ -59,6 +65,8 @@ SELECT substr(p.id, 4) as id, case
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_datasources purge;
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
@ -68,21 +76,29 @@ FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) A
from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id;
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_languages purge;
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.otherresearchproduct p
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_oids purge;
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_pids purge;
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_topics purge;
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject

View File

@ -3,29 +3,39 @@
-- Project table/view and Project related tables/views
------------------------------------------------------
------------------------------------------------------
DROP TABLE IF EXISTS ${stats_db_name}.project_oids purge;
CREATE TABLE ${stats_db_name}.project_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.project_organizations purge;
CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS
SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization
from ${openaire_db_name}.relation r
WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.project_results purge;
CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'resultProject' and r.target like '40|%'
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.project_classification purge;
create table ${stats_db_name}.project_classification STORED AS PARQUET as
select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
from ${openaire_db_name}.project p
lateral view explode(p.h2020classification) classifs as class
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null;
DROP TABLE IF EXISTS ${stats_db_name}.project_tmp purge;
CREATE TABLE ${stats_db_name}.project_tmp
(
id STRING,
@ -80,12 +90,16 @@ SELECT substr(p.id, 4) AS id,
FROM ${openaire_db_name}.project p
WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
DROP TABLE IF EXISTS ${stats_db_name}.funder purge;
create table ${stats_db_name}.funder STORED AS PARQUET as
select distinct xpath_string(fund, '//funder/id') as id,
xpath_string(fund, '//funder/name') as name,
xpath_string(fund, '//funder/shortname') as shortname
from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund;
DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge;
CREATE TABLE ${stats_db_name}.project_organization_contribution STORED AS PARQUET AS
SELECT distinct substr(r.source, 4) AS project, substr(r.target, 4) AS organization,
properties[0].value contribution, properties[1].value currency