This commit is contained in:
Antonis Lempesis 2021-11-26 15:40:40 +02:00
parent cb3adb90f4
commit 12749a0a77
16 changed files with 88 additions and 112 deletions

View File

@ -30,18 +30,14 @@ hdfs dfs -copyFromLocal concepts.csv ${TMP}
hdfs dfs -chmod -R 777 ${TMP} hdfs dfs -chmod -R 777 ${TMP}
echo "Creating and populating impala tables" echo "Creating and populating impala tables"
impala-shell -q "invalidate metadata" hive -e "create table ${TARGET_DB}.context (id string, name string) row format delimited fields terminated by ','"
impala-shell -d ${TARGET_DB} -q "invalidate metadata" hive -e "create table ${TARGET_DB}.category (context string, id string, name string) row format delimited fields terminated by ','"
impala-shell -q "create table ${TARGET_DB}.context (id string, name string) row format delimited fields terminated by ','" hive -e "create table ${TARGET_DB}.concept (category string, id string, name string) row format delimited fields terminated by ','"
impala-shell -q "create table ${TARGET_DB}.category (context string, id string, name string) row format delimited fields terminated by ','" hive -e "load data inpath '${TMP}/contexts.csv' into table ${TARGET_DB}.context"
impala-shell -q "create table ${TARGET_DB}.concept (category string, id string, name string) row format delimited fields terminated by ','" hive -e "load data inpath '${TMP}/categories.csv' into table ${TARGET_DB}.category"
impala-shell -d ${TARGET_DB} -q "invalidate metadata" hive -e "load data inpath '${TMP}/concepts.csv' into table ${TARGET_DB}.concept"
impala-shell -q "load data inpath '${TMP}/contexts.csv' into table ${TARGET_DB}.context"
impala-shell -q "load data inpath '${TMP}/categories.csv' into table ${TARGET_DB}.category"
impala-shell -q "load data inpath '${TMP}/concepts.csv' into table ${TARGET_DB}.concept"
echo "Cleaning up" echo "Cleaning up"
hdfs dfs -rm -f -r -skipTrash ${TMP}
rm concepts.csv rm concepts.csv
rm categories.csv rm categories.csv
rm contexts.csv rm contexts.csv

View File

@ -10,9 +10,7 @@ export SOURCE=$1
export SHADOW=$2 export SHADOW=$2
echo "Updating shadow database" echo "Updating shadow database"
impala-shell -q "invalidate metadata" hive --database ${SOURCE} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${SOURCE}.\1 compute statistics;/" | impala-shell -c -f -
impala-shell -d ${SOURCE} -q "invalidate metadata"
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${SOURCE}.\1;/" | impala-shell -c -f -
impala-shell -q "create database if not exists ${SHADOW}" impala-shell -q "create database if not exists ${SHADOW}"
impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -c -f - impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -c -f -
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f - impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f -

View File

@ -4,29 +4,28 @@
------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------
CREATE OR REPLACE VIEW ${stats_db_name}.fundref AS CREATE OR REPLACE VIEW ${stats_db_name}.fundref AS
SELECT * SELECT * FROM ${external_stats_db_name}.fundref;
FROM ${external_stats_db_name}.fundref;
CREATE OR REPLACE VIEW ${stats_db_name}.country AS CREATE OR REPLACE VIEW ${stats_db_name}.country AS
SELECT * SELECT * FROM ${external_stats_db_name}.country;
FROM ${external_stats_db_name}.country;
CREATE OR REPLACE VIEW ${stats_db_name}.countrygdp AS CREATE OR REPLACE VIEW ${stats_db_name}.countrygdp AS
SELECT * SELECT * FROM ${external_stats_db_name}.countrygdp;
FROM ${external_stats_db_name}.countrygdp;
CREATE OR REPLACE VIEW ${stats_db_name}.roarmap AS CREATE OR REPLACE VIEW ${stats_db_name}.roarmap AS
SELECT * SELECT * FROM ${external_stats_db_name}.roarmap;
FROM ${external_stats_db_name}.roarmap;
CREATE OR REPLACE VIEW ${stats_db_name}.rndexpediture AS CREATE OR REPLACE VIEW ${stats_db_name}.rndexpediture AS
SELECT * SELECT * FROM ${external_stats_db_name}.rndexpediture;
FROM ${external_stats_db_name}.rndexpediture;
CREATE OR REPLACE VIEW ${stats_db_name}.licenses_normalized AS CREATE OR REPLACE VIEW ${stats_db_name}.licenses_normalized AS
SELECT * SELECT * FROM ${external_stats_db_name}.licenses_normalized;
FROM ${external_stats_db_name}.licenses_normalized;
create view ${stats_db_name}.rndexpenditure as
select * from stats_ext.rndexpediture;
create view ${stats_db_name}.issn_gold_oa_dataset as
select * from stats_ext.issn_gold_oa_dataset;
------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------

View File

@ -102,7 +102,7 @@ WHERE otherresearchproduct_tmp.id IN (SELECT distinct r.id
AND pr.id = p.id AND pr.id = p.id
AND to_date(r.date) - to_date(p.enddate) > 0); AND to_date(r.date) - to_date(p.enddate) > 0);
CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication AS CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication STORED AS PARQUET AS
SELECT result_projects.id AS result, SELECT result_projects.id AS result,
result_projects.project AS project_results, result_projects.project AS project_results,
result.date as resultdate, result.date as resultdate,

View File

@ -5,7 +5,7 @@
-- Sources related tables/views -- Sources related tables/views
------------------------------------------------------ ------------------------------------------------------
------------------------------------------------------ ------------------------------------------------------
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_sources as CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_sources STORED AS PARQUET as
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
FROM ( FROM (
SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource
@ -16,7 +16,7 @@ LEFT OUTER JOIN
from ${openaire_db_name}.datasource d from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources as CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources STORED AS PARQUET as
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
FROM ( FROM (
SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource
@ -27,7 +27,7 @@ LEFT OUTER JOIN
from ${openaire_db_name}.datasource d from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources as CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources STORED AS PARQUET as
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
FROM ( FROM (
SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource
@ -38,7 +38,7 @@ LEFT OUTER JOIN
from ${openaire_db_name}.datasource d from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id; WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources as CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources STORED AS PARQUET as
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
FROM ( FROM (
SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource
@ -59,7 +59,7 @@ UNION ALL
SELECT * FROM ${stats_db_name}.otherresearchproduct_sources; SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
create table ${stats_db_name}.result_orcid as create table ${stats_db_name}.result_orcid STORED AS PARQUET as
select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid
from ( from (
SELECT substr(res.id, 4) as id, auth_pid.value as orcid SELECT substr(res.id, 4) as id, auth_pid.value as orcid

View File

@ -5,27 +5,27 @@
-- Licences related tables/views -- Licences related tables/views
------------------------------------------------------ ------------------------------------------------------
------------------------------------------------------ ------------------------------------------------------
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses AS CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, licenses.value as type SELECT substr(p.id, 4) as id, licenses.value as type
from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses AS CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, licenses.value as type SELECT substr(p.id, 4) as id, licenses.value as type
from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses AS CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, licenses.value as type SELECT substr(p.id, 4) as id, licenses.value as type
from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses AS CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, licenses.value as type SELECT substr(p.id, 4) as id, licenses.value as type
from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false; where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false;
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses STORED AS PARQUET AS
SELECT * FROM ${stats_db_name}.publication_licenses SELECT * FROM ${stats_db_name}.publication_licenses
UNION ALL UNION ALL
SELECT * FROM ${stats_db_name}.dataset_licenses SELECT * FROM ${stats_db_name}.dataset_licenses
@ -34,11 +34,11 @@ SELECT * FROM ${stats_db_name}.software_licenses
UNION ALL UNION ALL
SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses; SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_pids AS CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_pids STORED AS PARQUET AS
select substr(o.id, 4) as id, ppid.qualifier.classname as type, ppid.value as pid select substr(o.id, 4) as id, ppid.qualifier.classname as type, ppid.value as pid
from ${openaire_db_name}.organization o lateral view explode(o.pid) pids as ppid; from ${openaire_db_name}.organization o lateral view explode(o.pid) pids as ppid;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_sources as CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_sources STORED AS PARQUET as
SELECT o.id, case when d.id is null then 'other' else o.datasource end as datasource SELECT o.id, case when d.id is null then 'other' else o.datasource end as datasource
FROM ( FROM (
SELECT substr(o.id, 4) as id, substr(instances.instance.key, 4) as datasource SELECT substr(o.id, 4) as id, substr(instances.instance.key, 4) as datasource
@ -46,17 +46,4 @@ FROM (
LEFT OUTER JOIN ( LEFT OUTER JOIN (
SELECT substr(d.id, 4) id SELECT substr(d.id, 4) id
from ${openaire_db_name}.datasource d from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference=false) d on o.datasource = d.id; WHERE d.datainfo.deletedbyinference=false) d on o.datasource = d.id;
-- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS;
-- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS FOR COLUMNS;
-- ANALYZE TABLE ${stats_db_name}.dataset_licenses COMPUTE STATISTICS;
-- ANALYZE TABLE ${stats_db_name}.dataset_licenses COMPUTE STATISTICS FOR COLUMNS;
-- ANALYZE TABLE ${stats_db_name}.software_licenses COMPUTE STATISTICS;
-- ANALYZE TABLE ${stats_db_name}.software_licenses COMPUTE STATISTICS FOR COLUMNS;
-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_licenses COMPUTE STATISTICS;
-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_licenses COMPUTE STATISTICS FOR COLUMNS;
-- ANALYZE TABLE ${stats_db_name}.organization_pids COMPUTE STATISTICS;
-- ANALYZE TABLE ${stats_db_name}.organization_pids COMPUTE STATISTICS FOR COLUMNS;
-- ANALYZE TABLE ${stats_db_name}.organization_sources COMPUTE STATISTICS;
-- ANALYZE TABLE ${stats_db_name}.organization_sources COMPUTE STATISTICS FOR COLUMNS;

View File

@ -6,27 +6,27 @@
------------------------------------------------------ ------------------------------------------------------
------------------------------------------------------ ------------------------------------------------------
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed as CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as
select substr(r.id, 4) as id, inst.refereed.classname as refereed select substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false; where r.datainfo.deletedbyinference=false;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed as CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
select substr(r.id, 4) as id, inst.refereed.classname as refereed select substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false; where r.datainfo.deletedbyinference=false;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed as CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
select substr(r.id, 4) as id, inst.refereed.classname as refereed select substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false; where r.datainfo.deletedbyinference=false;
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed as CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
select substr(r.id, 4) as id, inst.refereed.classname as refereed select substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false; where r.datainfo.deletedbyinference=false;
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed STORED AS PARQUET as
select * from ${stats_db_name}.publication_refereed select * from ${stats_db_name}.publication_refereed
union all union all
select * from ${stats_db_name}.dataset_refereed select * from ${stats_db_name}.dataset_refereed

View File

@ -1,21 +1,21 @@
------------------------------------------- -------------------------------------------
--- Extra tables, mostly used by indicators --- Extra tables, mostly used by indicators
create table ${stats_db_name}.result_projectcount as create table ${stats_db_name}.result_projectcount STORED AS PARQUET as
select r.id, count(distinct p.id) as count select r.id, count(distinct p.id) as count
from ${stats_db_name}.result r from ${stats_db_name}.result r
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
left outer join ${stats_db_name}.project p on p.id=rp.project left outer join ${stats_db_name}.project p on p.id=rp.project
group by r.id; group by r.id;
create table ${stats_db_name}.result_fundercount as create table ${stats_db_name}.result_fundercount STORED AS PARQUET as
select r.id, count(distinct p.funder) as count select r.id, count(distinct p.funder) as count
from ${stats_db_name}.result r from ${stats_db_name}.result r
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
left outer join ${stats_db_name}.project p on p.id=rp.project left outer join ${stats_db_name}.project p on p.id=rp.project
group by r.id; group by r.id;
create table ${stats_db_name}.project_resultcount as create table ${stats_db_name}.project_resultcount STORED AS PARQUET as
with rcount as ( with rcount as (
select p.id as pid, count(distinct r.id) as `count`, r.type as type select p.id as pid, count(distinct r.id) as `count`, r.type as type
from ${stats_db_name}.project p from ${stats_db_name}.project p
@ -29,8 +29,6 @@ select rcount.pid, sum(case when rcount.type='publication' then rcount.count els
from rcount from rcount
group by rcount.pid; group by rcount.pid;
create view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture;
create table ${stats_db_name}.result_instance stored as parquet as create table ${stats_db_name}.result_instance stored as parquet as
select distinct r.* select distinct r.*
from ( from (
@ -39,12 +37,10 @@ from (
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view explode(inst.pid) pids as p) r from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view explode(inst.pid) pids as p) r
join ${stats_db_name}.result res on res.id=r.id; join ${stats_db_name}.result res on res.id=r.id;
create table ${stats_db_name}.result_apc as create table ${stats_db_name}.result_apc STORED AS PARQUET as
select r.id, r.amount, r.currency select r.id, r.amount, r.currency
from ( from (
select substr(r.id, 4) as id, inst.processingchargeamount.value as amount, inst.processingchargecurrency.value as currency select substr(r.id, 4) as id, inst.processingchargeamount.value as amount, inst.processingchargecurrency.value as currency
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r
join ${stats_db_name}.result res on res.id=r.id join ${stats_db_name}.result res on res.id=r.id
where r.amount is not null; where r.amount is not null;
create view ${stats_db_name}.issn_gold_oa_dataset as select * from stats_ext.issn_gold_oa_dataset;

View File

@ -3,20 +3,20 @@
---------------------------------------------------- ----------------------------------------------------
-- Peer reviewed: -- Peer reviewed:
create table ${stats_db_name}.result_peerreviewed as create table ${stats_db_name}.result_peerreviewed stored as parquet as
select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed
from ${stats_db_name}.result r from ${stats_db_name}.result r
left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id
left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id; left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id;
-- Green OA: -- Green OA:
create table ${stats_db_name}.result_greenoa as create table ${stats_db_name}.result_greenoa stored as parquet as
select r.id, case when green.green_oa=1 then true else false end as green select r.id, case when green.green_oa=1 then true else false end as green
from ${stats_db_name}.result r from ${stats_db_name}.result r
left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id; left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id;
-- GOLD OA: -- GOLD OA:
create table ${stats_db_name}.result_gold as create table ${stats_db_name}.result_gold stored as parquet as
select r.id, case when gold.gold_oa=1 then true else false end as gold select r.id, case when gold.gold_oa=1 then true else false end as gold
from ${stats_db_name}.result r from ${stats_db_name}.result r
left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id; left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id;

View File

@ -40,13 +40,13 @@ SELECT substr(p.id, 4) as id,
from ${openaire_db_name}.publication p from ${openaire_db_name}.publication p
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.publication_classifications AS CREATE TABLE ${stats_db_name}.publication_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, instancetype.classname as type SELECT substr(p.id, 4) as id, instancetype.classname as type
from ${openaire_db_name}.publication p from ${openaire_db_name}.publication p
LATERAL VIEW explode(p.instance.instancetype) instances as instancetype LATERAL VIEW explode(p.instance.instancetype) instances as instancetype
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.publication_concepts AS CREATE TABLE ${stats_db_name}.publication_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case SELECT substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
@ -55,7 +55,7 @@ from ${openaire_db_name}.publication p
LATERAL VIEW explode(p.context) contexts as context LATERAL VIEW explode(p.context) contexts as context
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.publication_datasources as CREATE TABLE ${stats_db_name}.publication_datasources STORED AS PARQUET as
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
FROM ( FROM (
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource
@ -66,30 +66,30 @@ FROM (
from ${openaire_db_name}.datasource d from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id; WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id;
CREATE TABLE ${stats_db_name}.publication_languages AS CREATE TABLE ${stats_db_name}.publication_languages STORED AS PARQUET AS
select substr(p.id, 4) as id, p.language.classname as language select substr(p.id, 4) as id, p.language.classname as language
FROM ${openaire_db_name}.publication p FROM ${openaire_db_name}.publication p
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.publication_oids AS CREATE TABLE ${stats_db_name}.publication_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.publication p FROM ${openaire_db_name}.publication p
LATERAL VIEW explode(p.originalid) oids AS ids LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.publication_pids AS CREATE TABLE ${stats_db_name}.publication_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid
FROM ${openaire_db_name}.publication p FROM ${openaire_db_name}.publication p
LATERAL VIEW explode(p.pid) pids AS ppid LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.publication_topics as CREATE TABLE ${stats_db_name}.publication_topics STORED AS PARQUET as
select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic
FROM ${openaire_db_name}.publication p FROM ${openaire_db_name}.publication p
LATERAL VIEW explode(p.subject) subjects AS subject LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.publication_citations AS CREATE TABLE ${stats_db_name}.publication_citations STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.publication p FROM ${openaire_db_name}.publication p
lateral view explode(p.extrainfo) citations AS citation lateral view explode(p.extrainfo) citations AS citation

View File

@ -40,20 +40,20 @@ SELECT substr(d.id, 4) AS id,
FROM ${openaire_db_name}.dataset d FROM ${openaire_db_name}.dataset d
WHERE d.datainfo.deletedbyinference = FALSE; WHERE d.datainfo.deletedbyinference = FALSE;
CREATE TABLE ${stats_db_name}.dataset_citations AS CREATE TABLE ${stats_db_name}.dataset_citations STORED AS PARQUET AS
SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.dataset d FROM ${openaire_db_name}.dataset d
LATERAL VIEW explode(d.extrainfo) citations AS citation LATERAL VIEW explode(d.extrainfo) citations AS citation
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and d.datainfo.deletedbyinference = false; and d.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.dataset_classifications AS CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type SELECT substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.dataset_concepts AS CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case SELECT substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
@ -62,7 +62,7 @@ from ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.context) contexts as context LATERAL VIEW explode(p.context) contexts as context
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.dataset_datasources AS CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS
SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
FROM ( FROM (
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource
@ -74,24 +74,24 @@ FROM (
FROM ${openaire_db_name}.datasource d FROM ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id; WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id;
CREATE TABLE ${stats_db_name}.dataset_languages AS CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, p.language.classname AS language SELECT substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.dataset_oids AS CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.originalid) oids AS ids LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.dataset_pids AS CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.pid) pids AS ppid LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.dataset_topics AS CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.dataset p FROM ${openaire_db_name}.dataset p
LATERAL VIEW explode(p.subject) subjects AS subject LATERAL VIEW explode(p.subject) subjects AS subject

View File

@ -40,20 +40,20 @@ SELECT substr(s.id, 4) as id,
from ${openaire_db_name}.software s from ${openaire_db_name}.software s
where s.datainfo.deletedbyinference = false; where s.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.software_citations AS CREATE TABLE ${stats_db_name}.software_citations STORED AS PARQUET AS
SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.software s FROM ${openaire_db_name}.software s
LATERAL VIEW explode(s.extrainfo) citations as citation LATERAL VIEW explode(s.extrainfo) citations as citation
where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and s.datainfo.deletedbyinference = false; and s.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.software_classifications AS CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type SELECT substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.software_concepts AS CREATE TABLE ${stats_db_name}.software_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case SELECT substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
@ -62,7 +62,7 @@ FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.context) contexts AS context LATERAL VIEW explode(p.context) contexts AS context
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.software_datasources AS CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
FROM ( FROM (
SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
@ -74,24 +74,24 @@ FROM (
FROM ${openaire_db_name}.datasource d FROM ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id; WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id;
CREATE TABLE ${stats_db_name}.software_languages AS CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS
select substr(p.id, 4) AS id, p.language.classname AS language select substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.software_oids AS CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.originalid) oids AS ids LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.software_pids AS CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.pid) pids AS ppid LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.software_topics AS CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.software p FROM ${openaire_db_name}.software p
LATERAL VIEW explode(p.subject) subjects AS subject LATERAL VIEW explode(p.subject) subjects AS subject

View File

@ -40,18 +40,18 @@ FROM ${openaire_db_name}.otherresearchproduct o
WHERE o.datainfo.deletedbyinference = FALSE; WHERE o.datainfo.deletedbyinference = FALSE;
-- Otherresearchproduct_citations -- Otherresearchproduct_citations
CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS CREATE TABLE ${stats_db_name}.otherresearchproduct_citations STORED AS PARQUET AS
SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
and o.datainfo.deletedbyinference = false; and o.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, instancetype.classname AS type SELECT substr(p.id, 4) AS id, instancetype.classname AS type
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS
SELECT substr(p.id, 4) as id, case SELECT substr(p.id, 4) as id, case
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other') when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
@ -59,7 +59,7 @@ SELECT substr(p.id, 4) as id, case
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources AS CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance
@ -68,22 +68,22 @@ FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) A
from ${openaire_db_name}.datasource d from ${openaire_db_name}.datasource d
WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id; WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id;
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, p.language.classname AS language SELECT substr(p.id, 4) AS id, p.language.classname AS language
FROM ${openaire_db_name}.otherresearchproduct p FROM ${openaire_db_name}.otherresearchproduct p
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject
where p.datainfo.deletedbyinference = false; where p.datainfo.deletedbyinference = false;

View File

@ -3,7 +3,7 @@
-- Project table/view and Project related tables/views -- Project table/view and Project related tables/views
------------------------------------------------------ ------------------------------------------------------
------------------------------------------------------ ------------------------------------------------------
CREATE TABLE ${stats_db_name}.project_oids AS CREATE TABLE ${stats_db_name}.project_oids STORED AS PARQUET AS
SELECT substr(p.id, 4) AS id, oids.ids AS oid SELECT substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids; FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids;
CREATE TABLE ${stats_db_name}.project_organizations AS CREATE TABLE ${stats_db_name}.project_organizations AS
@ -12,13 +12,13 @@ from ${openaire_db_name}.relation r
WHERE r.reltype = 'projectOrganization' WHERE r.reltype = 'projectOrganization'
and r.datainfo.deletedbyinference = false; and r.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.project_results AS CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
FROM ${openaire_db_name}.relation r FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'resultProject' WHERE r.reltype = 'resultProject'
and r.datainfo.deletedbyinference = false; and r.datainfo.deletedbyinference = false;
create table ${stats_db_name}.project_classification as create table ${stats_db_name}.project_classification STORED AS PARQUET as
select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3 select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
from ${openaire_db_name}.project p from ${openaire_db_name}.project p
lateral view explode(p.h2020classification) classifs as class lateral view explode(p.h2020classification) classifs as class
@ -74,7 +74,7 @@ SELECT substr(p.id, 4) AS id,
FROM ${openaire_db_name}.project p FROM ${openaire_db_name}.project p
WHERE p.datainfo.deletedbyinference = false; WHERE p.datainfo.deletedbyinference = false;
create table ${stats_db_name}.funder as create table ${stats_db_name}.funder STORED AS PARQUET as
select distinct xpath_string(fund, '//funder/id') as id, select distinct xpath_string(fund, '//funder/id') as id,
xpath_string(fund, '//funder/name') as name, xpath_string(fund, '//funder/name') as name,
xpath_string(fund, '//funder/shortname') as shortname xpath_string(fund, '//funder/shortname') as shortname

View File

@ -123,13 +123,13 @@ UNION ALL
SELECT * SELECT *
FROM ${stats_db_name}.otherresearchproduct_topics; FROM ${stats_db_name}.otherresearchproduct_topics;
CREATE TABLE ${stats_db_name}.result_organization AS CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
FROM ${openaire_db_name}.relation r FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'resultOrganization' WHERE r.reltype = 'resultOrganization'
and r.datainfo.deletedbyinference = false; and r.datainfo.deletedbyinference = false;
CREATE TABLE ${stats_db_name}.result_projects AS CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS
select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
FROM ${stats_db_name}.result r FROM ${stats_db_name}.result r
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result JOIN ${stats_db_name}.project_results pr ON r.id = pr.result

View File

@ -80,15 +80,15 @@ UPDATE ${stats_db_name}.datasource_tmp
SET yearofvalidation=null SET yearofvalidation=null
WHERE yearofvalidation = '-1'; WHERE yearofvalidation = '-1';
CREATE TABLE ${stats_db_name}.datasource_languages AS CREATE TABLE ${stats_db_name}.datasource_languages STORED AS PARQUET AS
SELECT substr(d.id, 4) AS id, langs.languages AS language SELECT substr(d.id, 4) AS id, langs.languages AS language
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages; FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages;
CREATE TABLE ${stats_db_name}.datasource_oids AS CREATE TABLE ${stats_db_name}.datasource_oids STORED AS PARQUET AS
SELECT substr(d.id, 4) AS id, oids.ids AS oid SELECT substr(d.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids; FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids;
CREATE TABLE ${stats_db_name}.datasource_organizations AS CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
FROM ${openaire_db_name}.relation r FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'datasourceOrganization' WHERE r.reltype = 'datasourceOrganization'
@ -96,11 +96,11 @@ WHERE r.reltype = 'datasourceOrganization'
-- datasource sources: -- datasource sources:
-- where the datasource info have been collected from. -- where the datasource info have been collected from.
create table if not exists ${stats_db_name}.datasource_sources AS create table if not exists ${stats_db_name}.datasource_sources STORED AS PARQUET AS
select substr(d.id, 4) as id, substr(cf.key, 4) as datasource select substr(d.id, 4) as id, substr(cf.key, 4) as datasource
from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf
where d.datainfo.deletedbyinference = false; where d.datainfo.deletedbyinference = false;
CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results STORED AS PARQUET AS
SELECT datasource AS id, id AS result SELECT datasource AS id, id AS result
FROM ${stats_db_name}.result_datasources; FROM ${stats_db_name}.result_datasources;