forked from D-Net/dnet-hadoop
Changes to indicators
Fixes on open access colours indicators - indi_pub_green_oa - indi_pub_gold_oa - indi_pub_hybrid - indi_pub_bronze_oa - indi_pub_diamond
This commit is contained in:
parent
a94a54a2d0
commit
76594ded23
|
@ -1,6 +1,18 @@
|
|||
-- Sprint 1 ----
|
||||
drop table if exists ${stats_db_name}.indi_pub_green_oa purge;
|
||||
|
||||
--create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as
|
||||
--select distinct p.id, coalesce(green_oa, 0) as green_oa
|
||||
--from ${stats_db_name}.publication p
|
||||
-- left outer join (
|
||||
-- select p.id, 1 as green_oa
|
||||
-- from ${stats_db_name}.publication p
|
||||
-- join ${stats_db_name}.result_instance ri on ri.id = p.id
|
||||
-- join ${stats_db_name}.datasource on datasource.id = ri.hostedby
|
||||
-- where datasource.type like '%Repository%'
|
||||
-- and (ri.accessright = 'Open Access'
|
||||
-- or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp
|
||||
-- on p.id= tmp.id;
|
||||
create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as
|
||||
select distinct p.id, coalesce(green_oa, 0) as green_oa
|
||||
from ${stats_db_name}.publication p
|
||||
|
@ -11,7 +23,7 @@ from ${stats_db_name}.publication p
|
|||
join ${stats_db_name}.datasource on datasource.id = ri.hostedby
|
||||
where datasource.type like '%Repository%'
|
||||
and (ri.accessright = 'Open Access'
|
||||
or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp
|
||||
or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and datasource.name!='Other') tmp
|
||||
on p.id= tmp.id;
|
||||
|
||||
drop table if exists ${stats_db_name}.indi_pub_grey_lit purge;
|
||||
|
@ -183,11 +195,20 @@ drop table if exists ${stats_db_name}.tmp purge;
|
|||
---- Sprint 4 ----
|
||||
drop table if exists ${stats_db_name}.indi_pub_diamond purge;
|
||||
|
||||
--create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as
|
||||
--select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
|
||||
--from ${stats_db_name}.publication_datasources pd
|
||||
-- left outer join (
|
||||
-- select pd.id, 1 as in_diamond_journal from ${stats_db_name}.publication_datasources pd
|
||||
-- join ${stats_db_name}.datasource d on d.id=pd.datasource
|
||||
-- join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
|
||||
-- and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp
|
||||
-- on pd.id=tmp.id;
|
||||
|
||||
create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as
|
||||
select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
|
||||
from ${stats_db_name}.publication_datasources pd
|
||||
left outer join (
|
||||
select pd.id, 1 as in_diamond_journal from ${stats_db_name}.publication_datasources pd
|
||||
left outer join (select pd.id, 1 as in_diamond_journal from ${stats_db_name}.publication_datasources pd
|
||||
join ${stats_db_name}.datasource d on d.id=pd.datasource
|
||||
join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online)
|
||||
and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp
|
||||
|
@ -312,28 +333,55 @@ drop table if exists ${stats_db_name}.indi_pub_gold_oa purge;
|
|||
-- JOIN gold_oa on issn.issn = gold_oa.issn) tmp
|
||||
-- on pd.id=tmp.id;
|
||||
|
||||
--create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as
|
||||
--with gold_oa as (
|
||||
--SELECT issn,issn_l from stats_ext.issn_gold_oa_dataset_v5),
|
||||
--issn AS (SELECT * FROM
|
||||
--(SELECT id,issn_printed as issn FROM ${stats_db_name}.datasource
|
||||
--WHERE issn_printed IS NOT NULL
|
||||
--UNION ALL
|
||||
--SELECT id, issn_online as issn FROM ${stats_db_name}.datasource
|
||||
--WHERE issn_online IS NOT NULL or id like '%doajarticles%') as issn
|
||||
--WHERE LENGTH(issn) > 7),
|
||||
--alljournals AS(select issn, issn_l from stats_ext.alljournals
|
||||
--where journal_is_in_doaj=true or journal_is_oa=true)
|
||||
--SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
|
||||
--FROM ${stats_db_name}.publication_datasources pd
|
||||
--left outer join (
|
||||
--select pd.id, 1 as is_gold FROM ${stats_db_name}.publication_datasources pd
|
||||
--JOIN issn on issn.id=pd.datasource
|
||||
--JOIN gold_oa on issn.issn = gold_oa.issn
|
||||
--join alljournals on issn.issn=alljournals.issn
|
||||
--left outer join ${stats_db_name}.result_instance ri on ri.id=pd.id
|
||||
--and ri.accessright!='Closed Access' and ri.accessright_uw='gold') tmp
|
||||
--on pd.id=tmp.id;
|
||||
create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as
|
||||
with gold_oa as (
|
||||
SELECT issn,issn_l from stats_ext.issn_gold_oa_dataset_v5),
|
||||
issn AS (SELECT * FROM
|
||||
(SELECT id,issn_printed as issn FROM ${stats_db_name}.datasource
|
||||
WHERE issn_printed IS NOT NULL
|
||||
select distinct issn from (
|
||||
SELECT issn_l as issn from stats_ext.issn_gold_oa_dataset_v5
|
||||
UNION ALL
|
||||
SELECT id, issn_online as issn FROM ${stats_db_name}.datasource
|
||||
WHERE issn_online IS NOT NULL or id like '%doajarticles%') as issn
|
||||
WHERE LENGTH(issn) > 7),
|
||||
alljournals AS(select issn, issn_l from stats_ext.alljournals
|
||||
where journal_is_in_doaj=true or journal_is_oa=true)
|
||||
SELECT issn as issn from stats_ext.issn_gold_oa_dataset_v5
|
||||
UNION ALL
|
||||
select issn from stats_ext.alljournals where journal_is_in_doaj=true or journal_is_oa=true
|
||||
UNION ALL
|
||||
select issn_l as issn from stats_ext.alljournals where journal_is_in_doaj=true or journal_is_oa=true) foo),
|
||||
dd as (
|
||||
select distinct * from (
|
||||
select id, issn_printed as issn from ${stats_db_name}.datasource d where d.id like '%doajarticles%'
|
||||
UNION ALL
|
||||
select id, issn_online as issn from ${stats_db_name}.datasource d where d.id like '%doajarticles%'
|
||||
UNION ALL
|
||||
select id, issn_printed as issn from ${stats_db_name}.datasource d join gold_oa on gold_oa.issn=d.issn_printed
|
||||
UNION ALL
|
||||
select id, issn_online as issn from ${stats_db_name}.datasource d join gold_oa on gold_oa.issn=d.issn_online) foo
|
||||
)
|
||||
SELECT DISTINCT pd.id, coalesce(is_gold, 0) as is_gold
|
||||
FROM ${stats_db_name}.publication_datasources pd
|
||||
left outer join (
|
||||
select pd.id, 1 as is_gold FROM ${stats_db_name}.publication_datasources pd
|
||||
JOIN issn on issn.id=pd.datasource
|
||||
JOIN gold_oa on issn.issn = gold_oa.issn
|
||||
join alljournals on issn.issn=alljournals.issn
|
||||
left outer join ${stats_db_name}.result_instance ri on ri.id=pd.id
|
||||
and ri.accessright!='Closed Access' and ri.accessright_uw='gold') tmp
|
||||
on pd.id=tmp.id;
|
||||
select pd.id, 1 as is_gold
|
||||
FROM ${stats_db_name}.publication_datasources pd
|
||||
join dd on dd.id=pd.datasource
|
||||
left outer join ${stats_db_name}.result_accessroute ra on ra.id = pd.id where ra.accessroute = 'gold') tmp on tmp.id=pd.id;
|
||||
|
||||
drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge;
|
||||
|
||||
|
@ -421,15 +469,26 @@ drop table if exists ${stats_db_name}.indi_pub_hybrid purge;
|
|||
-- where (gold_oa.journal_is_in_doaj=false or gold_oa.journal_is_oa=false))tmp
|
||||
-- on pd.id=tmp.id;
|
||||
|
||||
--create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as
|
||||
--select distinct pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication_datasources pd
|
||||
--left outer join (select pd.id, 1 as is_hybrid from ${stats_db_name}.publication_datasources pd
|
||||
--join ${stats_db_name}.datasource d on pd.datasource=d.id
|
||||
--join ${stats_db_name}.result_instance ri on ri.id=pd.id
|
||||
--join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id
|
||||
--join ${stats_db_name}.result_accessroute ra on ra.id=pd.id
|
||||
--where d.type like '%Journal%' and ri.accessright!='Closed Access' and (ri.accessright_uw!='gold'
|
||||
--or indi_gold.is_gold=0) and (ra.accessroute='hybrid' or ri.license is not null)) tmp
|
||||
--on pd.id=tmp.id;
|
||||
|
||||
create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as
|
||||
select distinct pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication_datasources pd
|
||||
left outer join (select pd.id, 1 as is_hybrid from ${stats_db_name}.publication_datasources pd
|
||||
join ${stats_db_name}.datasource d on pd.datasource=d.id
|
||||
select distinct pd.id,coalesce(is_hybrid,0) is_hybrid from ${stats_db_name}.publication pd
|
||||
left outer join (select pd.id, 1 as is_hybrid from ${stats_db_name}.publication pd
|
||||
join ${stats_db_name}.result_instance ri on ri.id=pd.id
|
||||
join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id
|
||||
join ${stats_db_name}.result_accessroute ra on ra.id=pd.id
|
||||
where d.type like '%Journal%' and ri.accessright!='Closed Access' and (ri.accessright_uw!='gold'
|
||||
or indi_gold.is_gold=0) and (ra.accessroute='hybrid' or ri.license is not null)) tmp
|
||||
join ${stats_db_name}.datasource d on d.id=ri.hostedby
|
||||
where indi_gold.is_gold=0 and ((d.type like '%Journal%' and ri.accessright!='Closed Access' and ri.accessright!='Restricted' and ri.license is not null) or
|
||||
ra.accessroute='hybrid'))tmp
|
||||
on pd.id=tmp.id;
|
||||
|
||||
drop table if exists ${stats_db_name}.indi_org_fairness purge;
|
||||
|
@ -814,14 +873,16 @@ drop table if exists ${stats_db_name}.indi_pub_bronze_oa purge;
|
|||
--and ri.accessright='Open Access') tmp on tmp.id=p.id;
|
||||
|
||||
create table ${stats_db_name}.indi_pub_bronze_oa stored as parquet as
|
||||
select distinct pd.id,coalesce(is_bronze_oa,0) is_bronze_oa from ${stats_db_name}.publication_datasources pd
|
||||
left outer join (select pd.id, 1 as is_bronze_oa from ${stats_db_name}.publication_datasources pd
|
||||
join ${stats_db_name}.datasource d on pd.datasource=d.id
|
||||
select distinct pd.id,coalesce(is_bronze_oa,0) is_bronze_oa from ${stats_db_name}.publication pd
|
||||
left outer join (select pd.id, 1 as is_bronze_oa from ${stats_db_name}.publication pd
|
||||
join ${stats_db_name}.result_instance ri on ri.id=pd.id
|
||||
join ${stats_db_name}.indi_pub_gold_oa indi_gold on indi_gold.id=pd.id
|
||||
join ${stats_db_name}.indi_pub_hybrid indi_hybrid on indi_hybrid.id=pd.id
|
||||
join ${stats_db_name}.result_accessroute ra on ra.id=pd.id
|
||||
where d.type like '%Journal%' and ri.accessright!='Closed Access' and (ri.accessright_uw!='gold'
|
||||
or indi_gold.is_gold=0) and (ra.accessroute='bronze' or ri.license is null)) tmp
|
||||
join ${stats_db_name}.datasource d on d.id=ri.hostedby
|
||||
where indi_gold.is_gold=0 and indi_hybrid.is_hybrid=0
|
||||
and ((d.type like '%Journal%' and ri.accessright!='Closed Access'
|
||||
and ri.accessright!='Restricted' and ri.license is null) or ra.accessroute='bronze')) tmp
|
||||
on pd.id=tmp.id;
|
||||
|
||||
CREATE TEMPORARY TABLE ${stats_db_name}.project_year_result_year as
|
||||
|
|
|
@ -64,6 +64,8 @@ create table TARGET.result_accessroute stored as parquet as select * from SOURCE
|
|||
|
||||
create table TARGET.result_orcid stored as parquet as select * from SOURCE.result_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
|
||||
create table TARGET.result_instance stored as parquet as select * from SOURCE.result_instance orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
|
||||
create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result);
|
||||
create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result);
|
||||
create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;
|
||||
|
|
|
@ -248,6 +248,8 @@ create table TARGET.indi_impact_measures stored as parquet as select * from SOUR
|
|||
create table TARGET.indi_pub_interdisciplinarity stored as parquet as select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
|
||||
create table TARGET.result_apc_affiliations stored as parquet as select * from SOURCE.result_apc_affiliations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
create table TARGET.result_instance stored as parquet as select * from SOURCE.result_instance orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
create table TARGET.result_orcid stored as parquet as select * from SOURCE.result_orcid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
|
||||
create table TARGET.indi_is_project_result_after stored as parquet as select * from SOURCE.indi_is_project_result_after orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
|
||||
create view TARGET.indi_is_funder_plan_s as select * from SOURCE.indi_is_funder_plan_s;
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
------------------------------------------------------
|
||||
|
||||
-- Dataset temporary table supporting updates
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_tmp purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_tmp
|
||||
(
|
||||
id STRING,
|
||||
|
@ -40,6 +42,8 @@ SELECT substr(d.id, 4) AS id,
|
|||
FROM ${openaire_db_name}.dataset d
|
||||
WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_citations purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_citations STORED AS PARQUET AS
|
||||
SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
FROM ${openaire_db_name}.dataset d
|
||||
|
@ -47,12 +51,16 @@ FROM ${openaire_db_name}.dataset d
|
|||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_classifications purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_concepts purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) as id, case
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
||||
|
@ -62,6 +70,8 @@ from ${openaire_db_name}.dataset p
|
|||
LATERAL VIEW explode(p.context) contexts as context
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_datasources purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS
|
||||
SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
||||
FROM (
|
||||
|
@ -74,23 +84,31 @@ FROM (
|
|||
FROM ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_languages purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, p.language.classname AS language
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_oids purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_pids purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.dataset_topics purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
--------------------------------------------------------
|
||||
|
||||
-- Software temporary table supporting updates
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_tmp purge;
|
||||
CREATE TABLE ${stats_db_name}.software_tmp
|
||||
(
|
||||
id STRING,
|
||||
|
@ -40,6 +41,8 @@ SELECT substr(s.id, 4) as id,
|
|||
from ${openaire_db_name}.software s
|
||||
where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_citations purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_citations STORED AS PARQUET AS
|
||||
SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
FROM ${openaire_db_name}.software s
|
||||
|
@ -47,6 +50,8 @@ FROM ${openaire_db_name}.software s
|
|||
where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_classifications purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||
FROM ${openaire_db_name}.software p
|
||||
|
@ -62,6 +67,8 @@ FROM ${openaire_db_name}.software p
|
|||
LATERAL VIEW explode(p.context) contexts AS context
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_datasources purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS
|
||||
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
|
||||
FROM (
|
||||
|
@ -74,23 +81,31 @@ FROM (
|
|||
FROM ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_languages purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS
|
||||
select substr(p.id, 4) AS id, p.language.classname AS language
|
||||
FROM ${openaire_db_name}.software p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_oids purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_pids purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.software_topics purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.software p
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
--------------------------------------------------------------------------------
|
||||
|
||||
-- Otherresearchproduct temporary table supporting updates
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_tmp purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_tmp
|
||||
(
|
||||
id STRING,
|
||||
|
@ -40,6 +42,8 @@ FROM ${openaire_db_name}.otherresearchproduct o
|
|||
WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false;
|
||||
|
||||
-- Otherresearchproduct_citations
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_citations purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_citations STORED AS PARQUET AS
|
||||
SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation
|
||||
|
@ -51,6 +55,8 @@ SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
|||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_concepts purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) as id, case
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
||||
|
@ -59,6 +65,8 @@ SELECT substr(p.id, 4) as id, case
|
|||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_datasources purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS
|
||||
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
||||
FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
|
||||
|
@ -68,21 +76,29 @@ FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) A
|
|||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_languages purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, p.language.classname AS language
|
||||
FROM ${openaire_db_name}.otherresearchproduct p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_oids purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_pids purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.otherresearchproduct_topics purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
|
|
|
@ -3,29 +3,39 @@
|
|||
-- Project table/view and Project related tables/views
|
||||
------------------------------------------------------
|
||||
------------------------------------------------------
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project_oids purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_oids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project_organizations purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS
|
||||
SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization
|
||||
from ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
|
||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project_results purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
|
||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
|
||||
FROM ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'resultProject' and r.target like '40|%'
|
||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project_classification purge;
|
||||
|
||||
create table ${stats_db_name}.project_classification STORED AS PARQUET as
|
||||
select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
|
||||
from ${openaire_db_name}.project p
|
||||
lateral view explode(p.h2020classification) classifs as class
|
||||
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project_tmp purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_tmp
|
||||
(
|
||||
id STRING,
|
||||
|
@ -80,12 +90,16 @@ SELECT substr(p.id, 4) AS id,
|
|||
FROM ${openaire_db_name}.project p
|
||||
WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.funder purge;
|
||||
|
||||
create table ${stats_db_name}.funder STORED AS PARQUET as
|
||||
select distinct xpath_string(fund, '//funder/id') as id,
|
||||
xpath_string(fund, '//funder/name') as name,
|
||||
xpath_string(fund, '//funder/shortname') as shortname
|
||||
from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund;
|
||||
|
||||
DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_organization_contribution STORED AS PARQUET AS
|
||||
SELECT distinct substr(r.source, 4) AS project, substr(r.target, 4) AS organization,
|
||||
properties[0].value contribution, properties[1].value currency
|
||||
|
|
Loading…
Reference in New Issue