forked from D-Net/dnet-hadoop
Merge pull request '[stats wf] various fixes, organization ids for inst. dashboard' (#205) from antonis.lempesis/dnet-hadoop:beta into beta
Reviewed-on: D-Net/dnet-hadoop#205
This commit is contained in:
commit
9fa3dd78fe
|
@ -49,8 +49,5 @@ select * from openaire_prod_usage_stats.views_stats;
|
|||
-- Creation date of the database
|
||||
------------------------------------------------------------------------------------------------
|
||||
------------------------------------------------------------------------------------------------
|
||||
create table ${stats_db_name}.creation_date as
|
||||
create table ${stats_db_name}.creation_date STORED AS PARQUET as
|
||||
select date_format(current_date(), 'dd-MM-yyyy') as date;
|
||||
--
|
||||
-- ANALYZE TABLE ${stats_db_name}.creation_date COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.creation_date COMPUTE STATISTICS FOR COLUMNS;
|
|
@ -113,7 +113,4 @@ FROM ${stats_db_name}.result_projects,
|
|||
${stats_db_name}.project
|
||||
WHERE result_projects.id = result.id
|
||||
AND result.type = 'publication'
|
||||
AND project.id = result_projects.project;
|
||||
|
||||
-- ANALYZE TABLE ${stats_db_name}.project COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.project COMPUTE STATISTICS FOR COLUMNS;
|
||||
AND project.id = result_projects.project;
|
|
@ -5,7 +5,7 @@
|
|||
-- Sources related tables/views
|
||||
------------------------------------------------------
|
||||
------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_sources as
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_sources STORED AS PARQUET as
|
||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||
FROM (
|
||||
SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource
|
||||
|
@ -16,7 +16,7 @@ LEFT OUTER JOIN
|
|||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources as
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources STORED AS PARQUET as
|
||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||
FROM (
|
||||
SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource
|
||||
|
@ -27,7 +27,7 @@ LEFT OUTER JOIN
|
|||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources as
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources STORED AS PARQUET as
|
||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||
FROM (
|
||||
SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource
|
||||
|
@ -38,7 +38,7 @@ LEFT OUTER JOIN
|
|||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on p.datasource = d.id;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources as
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources STORED AS PARQUET as
|
||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||
FROM (
|
||||
SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource
|
||||
|
@ -59,7 +59,7 @@ UNION ALL
|
|||
SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
|
||||
|
||||
|
||||
create table ${stats_db_name}.result_orcid as
|
||||
create table ${stats_db_name}.result_orcid STORED AS PARQUET as
|
||||
select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid
|
||||
from (
|
||||
SELECT substr(res.id, 4) as id, auth_pid.value as orcid
|
||||
|
|
|
@ -5,22 +5,22 @@
|
|||
-- Licences related tables/views
|
||||
------------------------------------------------------
|
||||
------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses AS
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) as id, licenses.value as type
|
||||
from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses AS
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) as id, licenses.value as type
|
||||
from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses AS
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) as id, licenses.value as type
|
||||
from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses AS
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) as id, licenses.value as type
|
||||
from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false and p.datainfo.invisible = FALSE;
|
||||
|
@ -34,11 +34,11 @@ SELECT * FROM ${stats_db_name}.software_licenses
|
|||
UNION ALL
|
||||
SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_pids AS
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_pids STORED AS PARQUET AS
|
||||
select substr(o.id, 4) as id, ppid.qualifier.classname as type, ppid.value as pid
|
||||
from ${openaire_db_name}.organization o lateral view explode(o.pid) pids as ppid;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_sources as
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_sources STORED AS PARQUET as
|
||||
SELECT o.id, case when d.id is null then 'other' else o.datasource end as datasource
|
||||
FROM (
|
||||
SELECT substr(o.id, 4) as id, substr(instances.instance.key, 4) as datasource
|
||||
|
@ -46,17 +46,4 @@ FROM (
|
|||
LEFT OUTER JOIN (
|
||||
SELECT substr(d.id, 4) id
|
||||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id;
|
||||
|
||||
-- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.dataset_licenses COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.dataset_licenses COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.software_licenses COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.software_licenses COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_licenses COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_licenses COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.organization_pids COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.organization_pids COMPUTE STATISTICS FOR COLUMNS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.organization_sources COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.organization_sources COMPUTE STATISTICS FOR COLUMNS;
|
||||
WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id;
|
|
@ -6,22 +6,22 @@
|
|||
------------------------------------------------------
|
||||
------------------------------------------------------
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed as
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as
|
||||
select substr(r.id, 4) as id, inst.refereed.classname as refereed
|
||||
from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
|
||||
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed as
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
|
||||
select substr(r.id, 4) as id, inst.refereed.classname as refereed
|
||||
from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
|
||||
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed as
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
|
||||
select substr(r.id, 4) as id, inst.refereed.classname as refereed
|
||||
from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
|
||||
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed as
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
|
||||
select substr(r.id, 4) as id, inst.refereed.classname as refereed
|
||||
from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
|
||||
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
-------------------------------------------
|
||||
--- Extra tables, mostly used by indicators
|
||||
|
||||
create table ${stats_db_name}.result_projectcount as
|
||||
create table ${stats_db_name}.result_projectcount STORED AS PARQUET as
|
||||
select r.id, count(distinct p.id) as count
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||
left outer join ${stats_db_name}.project p on p.id=rp.project
|
||||
group by r.id;
|
||||
|
||||
create table ${stats_db_name}.result_fundercount as
|
||||
create table ${stats_db_name}.result_fundercount STORED AS PARQUET as
|
||||
select r.id, count(distinct p.funder) as count
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||
left outer join ${stats_db_name}.project p on p.id=rp.project
|
||||
group by r.id;
|
||||
|
||||
create table ${stats_db_name}.project_resultcount as
|
||||
create table ${stats_db_name}.project_resultcount STORED AS PARQUET as
|
||||
with rcount as (
|
||||
select p.id as pid, count(distinct r.id) as `count`, r.type as type
|
||||
from ${stats_db_name}.project p
|
||||
|
|
|
@ -265,19 +265,19 @@ left outer join (
|
|||
|
||||
create table indi_org_openess stored as parquet as
|
||||
WITH datasets_oa as (
|
||||
SELECT ro.organization, count(dg.id) no_oadatasets FROM indi_datasets_gold_oa_new dg
|
||||
SELECT ro.organization, count(dg.id) no_oadatasets FROM indi_datasets_gold_oa dg
|
||||
join openaire_prod_stats.result_organization ro on dg.id=ro.id
|
||||
join openaire_prod_stats.dataset ds on dg.id=ds.id
|
||||
WHERE dg.is_gold=1
|
||||
group by ro.organization),
|
||||
software_oa as (
|
||||
SELECT ro.organization, count(dg.id) no_oasoftware FROM indi_software_gold_oa_new dg
|
||||
SELECT ro.organization, count(dg.id) no_oasoftware FROM indi_software_gold_oa dg
|
||||
join openaire_prod_stats.result_organization ro on dg.id=ro.id
|
||||
join openaire_prod_stats.software ds on dg.id=ds.id
|
||||
WHERE dg.is_gold=1
|
||||
group by ro.organization),
|
||||
pubs_oa as (
|
||||
SELECT ro.organization, count(dg.id) no_oapubs FROM indi_pub_gold_oa_new dg
|
||||
SELECT ro.organization, count(dg.id) no_oapubs FROM indi_pub_gold_oa dg
|
||||
join openaire_prod_stats.result_organization ro on dg.id=ro.id
|
||||
join openaire_prod_stats.publication ds on dg.id=ds.id
|
||||
where dg.is_gold=1
|
||||
|
|
|
@ -3,20 +3,20 @@
|
|||
----------------------------------------------------
|
||||
|
||||
-- Peer reviewed:
|
||||
create table ${stats_db_name}.result_peerreviewed as
|
||||
create table ${stats_db_name}.result_peerreviewed STORED AS PARQUET as
|
||||
select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id
|
||||
left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id;
|
||||
|
||||
-- Green OA:
|
||||
create table ${stats_db_name}.result_greenoa as
|
||||
create table ${stats_db_name}.result_greenoa STORED AS PARQUET as
|
||||
select r.id, case when green.green_oa=1 then true else false end as green
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id;
|
||||
|
||||
-- GOLD OA:
|
||||
create table ${stats_db_name}.result_gold as
|
||||
create table ${stats_db_name}.result_gold STORED AS PARQUET as
|
||||
select r.id, case when gold.is_gold=1 then true else false end as gold
|
||||
from ${stats_db_name}.result r
|
||||
left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id;
|
|
@ -40,13 +40,13 @@ SELECT substr(p.id, 4) as id,
|
|||
from ${openaire_db_name}.publication p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_classifications AS
|
||||
CREATE TABLE ${stats_db_name}.publication_classifications STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) as id, instancetype.classname as type
|
||||
from ${openaire_db_name}.publication p
|
||||
LATERAL VIEW explode(p.instance.instancetype) instances as instancetype
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_concepts AS
|
||||
CREATE TABLE ${stats_db_name}.publication_concepts STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) as id, case
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
||||
|
@ -55,7 +55,7 @@ from ${openaire_db_name}.publication p
|
|||
LATERAL VIEW explode(p.context) contexts as context
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_datasources as
|
||||
CREATE TABLE ${stats_db_name}.publication_datasources STORED AS PARQUET as
|
||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||
FROM (
|
||||
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource
|
||||
|
@ -66,30 +66,30 @@ FROM (
|
|||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_languages AS
|
||||
CREATE TABLE ${stats_db_name}.publication_languages STORED AS PARQUET AS
|
||||
select substr(p.id, 4) as id, p.language.classname as language
|
||||
FROM ${openaire_db_name}.publication p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_oids AS
|
||||
CREATE TABLE ${stats_db_name}.publication_oids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.publication p
|
||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_pids AS
|
||||
CREATE TABLE ${stats_db_name}.publication_pids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid
|
||||
FROM ${openaire_db_name}.publication p
|
||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_topics as
|
||||
CREATE TABLE ${stats_db_name}.publication_topics STORED AS PARQUET as
|
||||
select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.publication p
|
||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.publication_citations AS
|
||||
CREATE TABLE ${stats_db_name}.publication_citations STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
FROM ${openaire_db_name}.publication p
|
||||
lateral view explode(p.extrainfo) citations AS citation
|
||||
|
|
|
@ -22,7 +22,16 @@ create table TARGET.result stored as parquet as
|
|||
'openorgs____::b84450f9864182c67b8611b5593f4250',
|
||||
'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975',
|
||||
'openorgs____::eadc8da90a546e98c03f896661a2e4d4',
|
||||
'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2') )) foo;
|
||||
'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2',
|
||||
'openorgs____::d169c7407dd417152596908d48c11460',
|
||||
'openorgs____::1ec924b1759bb16d0a02f2dad8689b21',
|
||||
'openorgs____::2fb1e47b4612688d9de9169d579939a7',
|
||||
'openorgs____::759d59f05d77188faee99b7493b46805',
|
||||
'openorgs____::cad284878801b9465fa51a95b1d779db',
|
||||
'openorgs____::eadc8da90a546e98c03f896661a2e4d4',
|
||||
'openorgs____::c0286313e36479eff8676dba9b724b40'
|
||||
-- ,'openorgs____::c80a8243a5e5c620d7931c88d93bf17a' -- Paris Diderot
|
||||
) )) foo;
|
||||
compute stats TARGET.result;
|
||||
|
||||
create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
|
@ -52,7 +61,7 @@ compute stats TARGET.result_languages;
|
|||
create table TARGET.result_licenses stored as parquet as select * from SOURCE.result_licenses orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_licenses;
|
||||
|
||||
create table TARGET.licenses_normalized as select * from SOURCE.licenses_normalized;
|
||||
create table TARGET.licenses_normalized STORED AS PARQUET as select * from SOURCE.licenses_normalized;
|
||||
|
||||
create table TARGET.result_oids stored as parquet as select * from SOURCE.result_oids orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.result_oids;
|
||||
|
@ -83,7 +92,7 @@ compute stats TARGET.result_topics;
|
|||
|
||||
create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result);
|
||||
create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result);
|
||||
create table TARGET.result_result as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;
|
||||
create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;
|
||||
drop view TARGET.foo1;
|
||||
drop view TARGET.foo2;
|
||||
compute stats TARGET.result_result;
|
||||
|
@ -157,13 +166,13 @@ create view TARGET.indi_org_openess as select * from SOURCE.indi_org_openess;
|
|||
create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
compute stats TARGET.indi_pub_hybrid_oa_with_cc;
|
||||
|
||||
create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
|
||||
compute stats TARGET.indi_pub_downloads;
|
||||
create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
|
||||
compute stats TARGET.indi_pub_downloads_datasource;
|
||||
create table TARGET.indi_pub_downloads_year stored as parquet as select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
create table TARGET.indi_pub_downloads_year stored as parquet as select * from SOURCE.indi_pub_downloads_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
|
||||
compute stats TARGET.indi_pub_downloads_year;
|
||||
create table TARGET.indi_pub_downloads_datasource_year stored as parquet as select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result r where r.id=orig.id);
|
||||
create table TARGET.indi_pub_downloads_datasource_year stored as parquet as select * from SOURCE.indi_pub_downloads_datasource_year orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
|
||||
compute stats TARGET.indi_pub_downloads_datasource_year;
|
||||
|
||||
--denorm
|
||||
|
|
|
@ -40,20 +40,20 @@ SELECT substr(d.id, 4) AS id,
|
|||
FROM ${openaire_db_name}.dataset d
|
||||
WHERE d.datainfo.deletedbyinference = FALSE and d.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_citations AS
|
||||
CREATE TABLE ${stats_db_name}.dataset_citations STORED AS PARQUET AS
|
||||
SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
FROM ${openaire_db_name}.dataset d
|
||||
LATERAL VIEW explode(d.extrainfo) citations AS citation
|
||||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
and d.datainfo.deletedbyinference = false and d.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_classifications AS
|
||||
CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_concepts AS
|
||||
CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) as id, case
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
||||
|
@ -62,7 +62,7 @@ from ${openaire_db_name}.dataset p
|
|||
LATERAL VIEW explode(p.context) contexts as context
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_datasources AS
|
||||
CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS
|
||||
SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
||||
FROM (
|
||||
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource
|
||||
|
@ -74,24 +74,24 @@ FROM (
|
|||
FROM ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_languages AS
|
||||
CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, p.language.classname AS language
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_oids AS
|
||||
CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_pids AS
|
||||
CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.dataset_topics AS
|
||||
CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.dataset p
|
||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
|
|
|
@ -40,20 +40,20 @@ SELECT substr(s.id, 4) as id,
|
|||
from ${openaire_db_name}.software s
|
||||
where s.datainfo.deletedbyinference = false and s.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_citations AS
|
||||
CREATE TABLE ${stats_db_name}.software_citations STORED AS PARQUET AS
|
||||
SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
FROM ${openaire_db_name}.software s
|
||||
LATERAL VIEW explode(s.extrainfo) citations as citation
|
||||
where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
and s.datainfo.deletedbyinference = false and s.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_classifications AS
|
||||
CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_concepts AS
|
||||
CREATE TABLE ${stats_db_name}.software_concepts STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) as id, case
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
||||
|
@ -62,7 +62,7 @@ FROM ${openaire_db_name}.software p
|
|||
LATERAL VIEW explode(p.context) contexts AS context
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_datasources AS
|
||||
CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS
|
||||
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
|
||||
FROM (
|
||||
SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
|
||||
|
@ -74,24 +74,24 @@ FROM (
|
|||
FROM ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d ON p.datasource = d.id;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_languages AS
|
||||
CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS
|
||||
select substr(p.id, 4) AS id, p.language.classname AS language
|
||||
FROM ${openaire_db_name}.software p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_oids AS
|
||||
CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_pids AS
|
||||
CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.software_topics AS
|
||||
CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.software p
|
||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
|
|
|
@ -40,18 +40,18 @@ FROM ${openaire_db_name}.otherresearchproduct o
|
|||
WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible=false;
|
||||
|
||||
-- Otherresearchproduct_citations
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_citations STORED AS PARQUET AS
|
||||
SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||
FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation
|
||||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||
and o.datainfo.deletedbyinference = false and o.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) as id, case
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
||||
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
||||
|
@ -59,7 +59,7 @@ SELECT substr(p.id, 4) as id, case
|
|||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources AS
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS
|
||||
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
||||
FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
|
||||
from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance
|
||||
|
@ -68,22 +68,22 @@ FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) A
|
|||
from ${openaire_db_name}.datasource d
|
||||
WHERE d.datainfo.deletedbyinference = false and d.datainfo.invisible=false) d on p.datasource = d.id;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, p.language.classname AS language
|
||||
FROM ${openaire_db_name}.otherresearchproduct p
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS
|
||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject
|
||||
where p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
|
@ -3,24 +3,24 @@
|
|||
-- Project table/view and Project related tables/views
|
||||
------------------------------------------------------
|
||||
------------------------------------------------------
|
||||
CREATE TABLE ${stats_db_name}.project_oids AS
|
||||
CREATE TABLE ${stats_db_name}.project_oids STORED AS PARQUET AS
|
||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids
|
||||
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_organizations AS
|
||||
CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS
|
||||
SELECT substr(r.source, 4) AS id, substr(r.target, 4) AS organization
|
||||
from ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'projectOrganization'
|
||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.project_results AS
|
||||
CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
|
||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
|
||||
FROM ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'resultProject'
|
||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
||||
|
||||
create table ${stats_db_name}.project_classification as
|
||||
create table ${stats_db_name}.project_classification STORED AS PARQUET as
|
||||
select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
|
||||
from ${openaire_db_name}.project p
|
||||
lateral view explode(p.h2020classification) classifs as class
|
||||
|
@ -76,7 +76,7 @@ SELECT substr(p.id, 4) AS id,
|
|||
FROM ${openaire_db_name}.project p
|
||||
WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;
|
||||
|
||||
create table ${stats_db_name}.funder as
|
||||
create table ${stats_db_name}.funder STORED AS PARQUET as
|
||||
select distinct xpath_string(fund, '//funder/id') as id,
|
||||
xpath_string(fund, '//funder/name') as name,
|
||||
xpath_string(fund, '//funder/shortname') as shortname
|
||||
|
|
|
@ -123,13 +123,13 @@ UNION ALL
|
|||
SELECT *
|
||||
FROM ${stats_db_name}.otherresearchproduct_topics;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.result_organization AS
|
||||
CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS
|
||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
||||
FROM ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'resultOrganization'
|
||||
and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.result_projects AS
|
||||
CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS
|
||||
select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
|
||||
FROM ${stats_db_name}.result r
|
||||
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
|
||||
|
|
|
@ -80,24 +80,24 @@ UPDATE ${stats_db_name}.datasource_tmp
|
|||
SET yearofvalidation=null
|
||||
WHERE yearofvalidation = '-1';
|
||||
|
||||
CREATE TABLE ${stats_db_name}.datasource_languages AS
|
||||
CREATE TABLE ${stats_db_name}.datasource_languages STORED AS PARQUET AS
|
||||
SELECT substr(d.id, 4) AS id, langs.languages AS language
|
||||
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages
|
||||
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.datasource_oids AS
|
||||
CREATE TABLE ${stats_db_name}.datasource_oids STORED AS PARQUET AS
|
||||
SELECT substr(d.id, 4) AS id, oids.ids AS oid
|
||||
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids
|
||||
where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false;
|
||||
|
||||
CREATE TABLE ${stats_db_name}.datasource_organizations AS
|
||||
CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS
|
||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
||||
FROM ${openaire_db_name}.relation r
|
||||
WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false;
|
||||
|
||||
-- datasource sources:
|
||||
-- where the datasource info have been collected from.
|
||||
create table if not exists ${stats_db_name}.datasource_sources AS
|
||||
create table if not exists ${stats_db_name}.datasource_sources STORED AS PARQUET AS
|
||||
select substr(d.id, 4) as id, substr(cf.key, 4) as datasource
|
||||
from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf
|
||||
where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false;
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
-- Organization table/view and Organization related tables/views
|
||||
----------------------------------------------------------------
|
||||
----------------------------------------------------------------
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization AS
|
||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization STORED AS PARQUET AS
|
||||
SELECT substr(o.id, 4) as id,
|
||||
o.legalname.value as name,
|
||||
o.legalshortname.value as legalshortname,
|
||||
|
@ -11,13 +11,10 @@ SELECT substr(o.id, 4) as id,
|
|||
FROM ${openaire_db_name}.organization o
|
||||
WHERE o.datainfo.deletedbyinference = FALSE and o.datainfo.invisible = FALSE;
|
||||
|
||||
CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources AS
|
||||
CREATE OR REPLACE VIEW ${stats_db_name}.organization_datasources STORED AS PARQUET AS
|
||||
SELECT organization AS id, id AS datasource
|
||||
FROM ${stats_db_name}.datasource_organizations;
|
||||
|
||||
CREATE OR REPLACE VIEW ${stats_db_name}.organization_projects AS
|
||||
CREATE OR REPLACE VIEW ${stats_db_name}.organization_projects STORED AS PARQUET AS
|
||||
SELECT id AS project, organization as id
|
||||
FROM ${stats_db_name}.project_organizations;
|
||||
|
||||
-- ANALYZE TABLE ${stats_db_name}.organization COMPUTE STATISTICS;
|
||||
-- ANALYZE TABLE ${stats_db_name}.organization COMPUTE STATISTICS FOR COLUMNS;
|
||||
FROM ${stats_db_name}.project_organizations;
|
Loading…
Reference in New Issue