forked from D-Net/dnet-hadoop
first
This commit is contained in:
parent
cb3adb90f4
commit
12749a0a77
|
@ -30,18 +30,14 @@ hdfs dfs -copyFromLocal concepts.csv ${TMP}
|
||||||
hdfs dfs -chmod -R 777 ${TMP}
|
hdfs dfs -chmod -R 777 ${TMP}
|
||||||
|
|
||||||
echo "Creating and populating impala tables"
|
echo "Creating and populating impala tables"
|
||||||
impala-shell -q "invalidate metadata"
|
hive -e "create table ${TARGET_DB}.context (id string, name string) row format delimited fields terminated by ','"
|
||||||
impala-shell -d ${TARGET_DB} -q "invalidate metadata"
|
hive -e "create table ${TARGET_DB}.category (context string, id string, name string) row format delimited fields terminated by ','"
|
||||||
impala-shell -q "create table ${TARGET_DB}.context (id string, name string) row format delimited fields terminated by ','"
|
hive -e "create table ${TARGET_DB}.concept (category string, id string, name string) row format delimited fields terminated by ','"
|
||||||
impala-shell -q "create table ${TARGET_DB}.category (context string, id string, name string) row format delimited fields terminated by ','"
|
hive -e "load data inpath '${TMP}/contexts.csv' into table ${TARGET_DB}.context"
|
||||||
impala-shell -q "create table ${TARGET_DB}.concept (category string, id string, name string) row format delimited fields terminated by ','"
|
hive -e "load data inpath '${TMP}/categories.csv' into table ${TARGET_DB}.category"
|
||||||
impala-shell -d ${TARGET_DB} -q "invalidate metadata"
|
hive -e "load data inpath '${TMP}/concepts.csv' into table ${TARGET_DB}.concept"
|
||||||
impala-shell -q "load data inpath '${TMP}/contexts.csv' into table ${TARGET_DB}.context"
|
|
||||||
impala-shell -q "load data inpath '${TMP}/categories.csv' into table ${TARGET_DB}.category"
|
|
||||||
impala-shell -q "load data inpath '${TMP}/concepts.csv' into table ${TARGET_DB}.concept"
|
|
||||||
|
|
||||||
echo "Cleaning up"
|
echo "Cleaning up"
|
||||||
hdfs dfs -rm -f -r -skipTrash ${TMP}
|
|
||||||
rm concepts.csv
|
rm concepts.csv
|
||||||
rm categories.csv
|
rm categories.csv
|
||||||
rm contexts.csv
|
rm contexts.csv
|
||||||
|
|
|
@ -10,9 +10,7 @@ export SOURCE=$1
|
||||||
export SHADOW=$2
|
export SHADOW=$2
|
||||||
|
|
||||||
echo "Updating shadow database"
|
echo "Updating shadow database"
|
||||||
impala-shell -q "invalidate metadata"
|
hive --database ${SOURCE} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${SOURCE}.\1 compute statistics;/" | impala-shell -c -f -
|
||||||
impala-shell -d ${SOURCE} -q "invalidate metadata"
|
|
||||||
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${SOURCE}.\1;/" | impala-shell -c -f -
|
|
||||||
impala-shell -q "create database if not exists ${SHADOW}"
|
impala-shell -q "create database if not exists ${SHADOW}"
|
||||||
impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -c -f -
|
impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -c -f -
|
||||||
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f -
|
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f -
|
||||||
|
|
|
@ -4,29 +4,28 @@
|
||||||
------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------
|
||||||
------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.fundref AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.fundref AS
|
||||||
SELECT *
|
SELECT * FROM ${external_stats_db_name}.fundref;
|
||||||
FROM ${external_stats_db_name}.fundref;
|
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.country AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.country AS
|
||||||
SELECT *
|
SELECT * FROM ${external_stats_db_name}.country;
|
||||||
FROM ${external_stats_db_name}.country;
|
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.countrygdp AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.countrygdp AS
|
||||||
SELECT *
|
SELECT * FROM ${external_stats_db_name}.countrygdp;
|
||||||
FROM ${external_stats_db_name}.countrygdp;
|
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.roarmap AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.roarmap AS
|
||||||
SELECT *
|
SELECT * FROM ${external_stats_db_name}.roarmap;
|
||||||
FROM ${external_stats_db_name}.roarmap;
|
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.rndexpediture AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.rndexpediture AS
|
||||||
SELECT *
|
SELECT * FROM ${external_stats_db_name}.rndexpediture;
|
||||||
FROM ${external_stats_db_name}.rndexpediture;
|
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.licenses_normalized AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.licenses_normalized AS
|
||||||
SELECT *
|
SELECT * FROM ${external_stats_db_name}.licenses_normalized;
|
||||||
FROM ${external_stats_db_name}.licenses_normalized;
|
|
||||||
|
|
||||||
|
create view ${stats_db_name}.rndexpenditure as
|
||||||
|
select * from stats_ext.rndexpediture;
|
||||||
|
|
||||||
|
create view ${stats_db_name}.issn_gold_oa_dataset as
|
||||||
|
select * from stats_ext.issn_gold_oa_dataset;
|
||||||
|
|
||||||
------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------
|
||||||
------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------
|
||||||
|
|
|
@ -102,7 +102,7 @@ WHERE otherresearchproduct_tmp.id IN (SELECT distinct r.id
|
||||||
AND pr.id = p.id
|
AND pr.id = p.id
|
||||||
AND to_date(r.date) - to_date(p.enddate) > 0);
|
AND to_date(r.date) - to_date(p.enddate) > 0);
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication STORED AS PARQUET AS
|
||||||
SELECT result_projects.id AS result,
|
SELECT result_projects.id AS result,
|
||||||
result_projects.project AS project_results,
|
result_projects.project AS project_results,
|
||||||
result.date as resultdate,
|
result.date as resultdate,
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
-- Sources related tables/views
|
-- Sources related tables/views
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_sources as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_sources STORED AS PARQUET as
|
||||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||||
FROM (
|
FROM (
|
||||||
SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource
|
SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource
|
||||||
|
@ -16,7 +16,7 @@ LEFT OUTER JOIN
|
||||||
from ${openaire_db_name}.datasource d
|
from ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id;
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_sources STORED AS PARQUET as
|
||||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||||
FROM (
|
FROM (
|
||||||
SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource
|
SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource
|
||||||
|
@ -27,7 +27,7 @@ LEFT OUTER JOIN
|
||||||
from ${openaire_db_name}.datasource d
|
from ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id;
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_sources STORED AS PARQUET as
|
||||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||||
FROM (
|
FROM (
|
||||||
SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource
|
SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource
|
||||||
|
@ -38,7 +38,7 @@ LEFT OUTER JOIN
|
||||||
from ${openaire_db_name}.datasource d
|
from ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference=false) d on p.datasource = d.id;
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_sources STORED AS PARQUET as
|
||||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||||
FROM (
|
FROM (
|
||||||
SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource
|
SELECT substr(p.id, 4) as id, substr(datasource, 4) as datasource
|
||||||
|
@ -59,7 +59,7 @@ UNION ALL
|
||||||
SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
|
SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
|
||||||
|
|
||||||
|
|
||||||
create table ${stats_db_name}.result_orcid as
|
create table ${stats_db_name}.result_orcid STORED AS PARQUET as
|
||||||
select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid
|
select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid
|
||||||
from (
|
from (
|
||||||
SELECT substr(res.id, 4) as id, auth_pid.value as orcid
|
SELECT substr(res.id, 4) as id, auth_pid.value as orcid
|
||||||
|
|
|
@ -5,27 +5,27 @@
|
||||||
-- Licences related tables/views
|
-- Licences related tables/views
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses AS
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_licenses STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, licenses.value as type
|
SELECT substr(p.id, 4) as id, licenses.value as type
|
||||||
from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses
|
from ${openaire_db_name}.publication p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false;
|
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false;
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses AS
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_licenses STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, licenses.value as type
|
SELECT substr(p.id, 4) as id, licenses.value as type
|
||||||
from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses
|
from ${openaire_db_name}.dataset p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false;
|
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false;
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses AS
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_licenses STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, licenses.value as type
|
SELECT substr(p.id, 4) as id, licenses.value as type
|
||||||
from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses
|
from ${openaire_db_name}.software p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false;
|
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false;
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses AS
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_licenses STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, licenses.value as type
|
SELECT substr(p.id, 4) as id, licenses.value as type
|
||||||
from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses
|
from ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.license) instances as licenses
|
||||||
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false;
|
where licenses.value is not null and licenses.value != '' and p.datainfo.deletedbyinference=false;
|
||||||
|
|
||||||
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses AS
|
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_licenses STORED AS PARQUET AS
|
||||||
SELECT * FROM ${stats_db_name}.publication_licenses
|
SELECT * FROM ${stats_db_name}.publication_licenses
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT * FROM ${stats_db_name}.dataset_licenses
|
SELECT * FROM ${stats_db_name}.dataset_licenses
|
||||||
|
@ -34,11 +34,11 @@ SELECT * FROM ${stats_db_name}.software_licenses
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses;
|
SELECT * FROM ${stats_db_name}.otherresearchproduct_licenses;
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_pids AS
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_pids STORED AS PARQUET AS
|
||||||
select substr(o.id, 4) as id, ppid.qualifier.classname as type, ppid.value as pid
|
select substr(o.id, 4) as id, ppid.qualifier.classname as type, ppid.value as pid
|
||||||
from ${openaire_db_name}.organization o lateral view explode(o.pid) pids as ppid;
|
from ${openaire_db_name}.organization o lateral view explode(o.pid) pids as ppid;
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_sources as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.organization_sources STORED AS PARQUET as
|
||||||
SELECT o.id, case when d.id is null then 'other' else o.datasource end as datasource
|
SELECT o.id, case when d.id is null then 'other' else o.datasource end as datasource
|
||||||
FROM (
|
FROM (
|
||||||
SELECT substr(o.id, 4) as id, substr(instances.instance.key, 4) as datasource
|
SELECT substr(o.id, 4) as id, substr(instances.instance.key, 4) as datasource
|
||||||
|
@ -47,16 +47,3 @@ FROM (
|
||||||
SELECT substr(d.id, 4) id
|
SELECT substr(d.id, 4) id
|
||||||
from ${openaire_db_name}.datasource d
|
from ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference=false) d on o.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference=false) d on o.datasource = d.id;
|
||||||
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.publication_licenses COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.dataset_licenses COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.dataset_licenses COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.software_licenses COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.software_licenses COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_licenses COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_licenses COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.organization_pids COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.organization_pids COMPUTE STATISTICS FOR COLUMNS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.organization_sources COMPUTE STATISTICS;
|
|
||||||
-- ANALYZE TABLE ${stats_db_name}.organization_sources COMPUTE STATISTICS FOR COLUMNS;
|
|
|
@ -6,27 +6,27 @@
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.publication_refereed STORED AS PARQUET as
|
||||||
select substr(r.id, 4) as id, inst.refereed.classname as refereed
|
select substr(r.id, 4) as id, inst.refereed.classname as refereed
|
||||||
from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
|
from ${openaire_db_name}.publication r lateral view explode(r.instance) instances as inst
|
||||||
where r.datainfo.deletedbyinference=false;
|
where r.datainfo.deletedbyinference=false;
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.dataset_refereed STORED AS PARQUET as
|
||||||
select substr(r.id, 4) as id, inst.refereed.classname as refereed
|
select substr(r.id, 4) as id, inst.refereed.classname as refereed
|
||||||
from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
|
from ${openaire_db_name}.dataset r lateral view explode(r.instance) instances as inst
|
||||||
where r.datainfo.deletedbyinference=false;
|
where r.datainfo.deletedbyinference=false;
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.software_refereed STORED AS PARQUET as
|
||||||
select substr(r.id, 4) as id, inst.refereed.classname as refereed
|
select substr(r.id, 4) as id, inst.refereed.classname as refereed
|
||||||
from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
|
from ${openaire_db_name}.software r lateral view explode(r.instance) instances as inst
|
||||||
where r.datainfo.deletedbyinference=false;
|
where r.datainfo.deletedbyinference=false;
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed as
|
CREATE TABLE IF NOT EXISTS ${stats_db_name}.otherresearchproduct_refereed STORED AS PARQUET as
|
||||||
select substr(r.id, 4) as id, inst.refereed.classname as refereed
|
select substr(r.id, 4) as id, inst.refereed.classname as refereed
|
||||||
from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
|
from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
|
||||||
where r.datainfo.deletedbyinference=false;
|
where r.datainfo.deletedbyinference=false;
|
||||||
|
|
||||||
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as
|
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed STORED AS PARQUET as
|
||||||
select * from ${stats_db_name}.publication_refereed
|
select * from ${stats_db_name}.publication_refereed
|
||||||
union all
|
union all
|
||||||
select * from ${stats_db_name}.dataset_refereed
|
select * from ${stats_db_name}.dataset_refereed
|
||||||
|
|
|
@ -1,21 +1,21 @@
|
||||||
-------------------------------------------
|
-------------------------------------------
|
||||||
--- Extra tables, mostly used by indicators
|
--- Extra tables, mostly used by indicators
|
||||||
|
|
||||||
create table ${stats_db_name}.result_projectcount as
|
create table ${stats_db_name}.result_projectcount STORED AS PARQUET as
|
||||||
select r.id, count(distinct p.id) as count
|
select r.id, count(distinct p.id) as count
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
|
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||||
left outer join ${stats_db_name}.project p on p.id=rp.project
|
left outer join ${stats_db_name}.project p on p.id=rp.project
|
||||||
group by r.id;
|
group by r.id;
|
||||||
|
|
||||||
create table ${stats_db_name}.result_fundercount as
|
create table ${stats_db_name}.result_fundercount STORED AS PARQUET as
|
||||||
select r.id, count(distinct p.funder) as count
|
select r.id, count(distinct p.funder) as count
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
|
left outer join ${stats_db_name}.result_projects rp on rp.id=r.id
|
||||||
left outer join ${stats_db_name}.project p on p.id=rp.project
|
left outer join ${stats_db_name}.project p on p.id=rp.project
|
||||||
group by r.id;
|
group by r.id;
|
||||||
|
|
||||||
create table ${stats_db_name}.project_resultcount as
|
create table ${stats_db_name}.project_resultcount STORED AS PARQUET as
|
||||||
with rcount as (
|
with rcount as (
|
||||||
select p.id as pid, count(distinct r.id) as `count`, r.type as type
|
select p.id as pid, count(distinct r.id) as `count`, r.type as type
|
||||||
from ${stats_db_name}.project p
|
from ${stats_db_name}.project p
|
||||||
|
@ -29,8 +29,6 @@ select rcount.pid, sum(case when rcount.type='publication' then rcount.count els
|
||||||
from rcount
|
from rcount
|
||||||
group by rcount.pid;
|
group by rcount.pid;
|
||||||
|
|
||||||
create view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture;
|
|
||||||
|
|
||||||
create table ${stats_db_name}.result_instance stored as parquet as
|
create table ${stats_db_name}.result_instance stored as parquet as
|
||||||
select distinct r.*
|
select distinct r.*
|
||||||
from (
|
from (
|
||||||
|
@ -39,12 +37,10 @@ from (
|
||||||
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view explode(inst.pid) pids as p) r
|
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view explode(inst.pid) pids as p) r
|
||||||
join ${stats_db_name}.result res on res.id=r.id;
|
join ${stats_db_name}.result res on res.id=r.id;
|
||||||
|
|
||||||
create table ${stats_db_name}.result_apc as
|
create table ${stats_db_name}.result_apc STORED AS PARQUET as
|
||||||
select r.id, r.amount, r.currency
|
select r.id, r.amount, r.currency
|
||||||
from (
|
from (
|
||||||
select substr(r.id, 4) as id, inst.processingchargeamount.value as amount, inst.processingchargecurrency.value as currency
|
select substr(r.id, 4) as id, inst.processingchargeamount.value as amount, inst.processingchargecurrency.value as currency
|
||||||
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r
|
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r
|
||||||
join ${stats_db_name}.result res on res.id=r.id
|
join ${stats_db_name}.result res on res.id=r.id
|
||||||
where r.amount is not null;
|
where r.amount is not null;
|
||||||
|
|
||||||
create view ${stats_db_name}.issn_gold_oa_dataset as select * from stats_ext.issn_gold_oa_dataset;
|
|
|
@ -3,20 +3,20 @@
|
||||||
----------------------------------------------------
|
----------------------------------------------------
|
||||||
|
|
||||||
-- Peer reviewed:
|
-- Peer reviewed:
|
||||||
create table ${stats_db_name}.result_peerreviewed as
|
create table ${stats_db_name}.result_peerreviewed stored as parquet as
|
||||||
select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed
|
select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id
|
left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id
|
||||||
left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id;
|
left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id;
|
||||||
|
|
||||||
-- Green OA:
|
-- Green OA:
|
||||||
create table ${stats_db_name}.result_greenoa as
|
create table ${stats_db_name}.result_greenoa stored as parquet as
|
||||||
select r.id, case when green.green_oa=1 then true else false end as green
|
select r.id, case when green.green_oa=1 then true else false end as green
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id;
|
left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id;
|
||||||
|
|
||||||
-- GOLD OA:
|
-- GOLD OA:
|
||||||
create table ${stats_db_name}.result_gold as
|
create table ${stats_db_name}.result_gold stored as parquet as
|
||||||
select r.id, case when gold.gold_oa=1 then true else false end as gold
|
select r.id, case when gold.gold_oa=1 then true else false end as gold
|
||||||
from ${stats_db_name}.result r
|
from ${stats_db_name}.result r
|
||||||
left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id;
|
left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id;
|
|
@ -40,13 +40,13 @@ SELECT substr(p.id, 4) as id,
|
||||||
from ${openaire_db_name}.publication p
|
from ${openaire_db_name}.publication p
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication_classifications AS
|
CREATE TABLE ${stats_db_name}.publication_classifications STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, instancetype.classname as type
|
SELECT substr(p.id, 4) as id, instancetype.classname as type
|
||||||
from ${openaire_db_name}.publication p
|
from ${openaire_db_name}.publication p
|
||||||
LATERAL VIEW explode(p.instance.instancetype) instances as instancetype
|
LATERAL VIEW explode(p.instance.instancetype) instances as instancetype
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication_concepts AS
|
CREATE TABLE ${stats_db_name}.publication_concepts STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, case
|
SELECT substr(p.id, 4) as id, case
|
||||||
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
||||||
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
||||||
|
@ -55,7 +55,7 @@ from ${openaire_db_name}.publication p
|
||||||
LATERAL VIEW explode(p.context) contexts as context
|
LATERAL VIEW explode(p.context) contexts as context
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication_datasources as
|
CREATE TABLE ${stats_db_name}.publication_datasources STORED AS PARQUET as
|
||||||
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
SELECT p.id, case when d.id is null then 'other' else p.datasource end as datasource
|
||||||
FROM (
|
FROM (
|
||||||
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource
|
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) as datasource
|
||||||
|
@ -66,30 +66,30 @@ FROM (
|
||||||
from ${openaire_db_name}.datasource d
|
from ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication_languages AS
|
CREATE TABLE ${stats_db_name}.publication_languages STORED AS PARQUET AS
|
||||||
select substr(p.id, 4) as id, p.language.classname as language
|
select substr(p.id, 4) as id, p.language.classname as language
|
||||||
FROM ${openaire_db_name}.publication p
|
FROM ${openaire_db_name}.publication p
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication_oids AS
|
CREATE TABLE ${stats_db_name}.publication_oids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||||
FROM ${openaire_db_name}.publication p
|
FROM ${openaire_db_name}.publication p
|
||||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication_pids AS
|
CREATE TABLE ${stats_db_name}.publication_pids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid
|
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value as pid
|
||||||
FROM ${openaire_db_name}.publication p
|
FROM ${openaire_db_name}.publication p
|
||||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication_topics as
|
CREATE TABLE ${stats_db_name}.publication_topics STORED AS PARQUET as
|
||||||
select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic
|
select substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS TYPE, subjects.subject.value AS topic
|
||||||
FROM ${openaire_db_name}.publication p
|
FROM ${openaire_db_name}.publication p
|
||||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.publication_citations AS
|
CREATE TABLE ${stats_db_name}.publication_citations STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||||
FROM ${openaire_db_name}.publication p
|
FROM ${openaire_db_name}.publication p
|
||||||
lateral view explode(p.extrainfo) citations AS citation
|
lateral view explode(p.extrainfo) citations AS citation
|
||||||
|
|
|
@ -40,20 +40,20 @@ SELECT substr(d.id, 4) AS id,
|
||||||
FROM ${openaire_db_name}.dataset d
|
FROM ${openaire_db_name}.dataset d
|
||||||
WHERE d.datainfo.deletedbyinference = FALSE;
|
WHERE d.datainfo.deletedbyinference = FALSE;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_citations AS
|
CREATE TABLE ${stats_db_name}.dataset_citations STORED AS PARQUET AS
|
||||||
SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||||
FROM ${openaire_db_name}.dataset d
|
FROM ${openaire_db_name}.dataset d
|
||||||
LATERAL VIEW explode(d.extrainfo) citations AS citation
|
LATERAL VIEW explode(d.extrainfo) citations AS citation
|
||||||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||||
and d.datainfo.deletedbyinference = false;
|
and d.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_classifications AS
|
CREATE TABLE ${stats_db_name}.dataset_classifications STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||||
FROM ${openaire_db_name}.dataset p
|
FROM ${openaire_db_name}.dataset p
|
||||||
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_concepts AS
|
CREATE TABLE ${stats_db_name}.dataset_concepts STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, case
|
SELECT substr(p.id, 4) as id, case
|
||||||
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
||||||
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
||||||
|
@ -62,7 +62,7 @@ from ${openaire_db_name}.dataset p
|
||||||
LATERAL VIEW explode(p.context) contexts as context
|
LATERAL VIEW explode(p.context) contexts as context
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_datasources AS
|
CREATE TABLE ${stats_db_name}.dataset_datasources STORED AS PARQUET AS
|
||||||
SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
SELECT p.id, case when d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
||||||
FROM (
|
FROM (
|
||||||
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource
|
SELECT substr(p.id, 4) as id, substr(instances.instance.hostedby.key, 4) AS datasource
|
||||||
|
@ -74,24 +74,24 @@ FROM (
|
||||||
FROM ${openaire_db_name}.datasource d
|
FROM ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_languages AS
|
CREATE TABLE ${stats_db_name}.dataset_languages STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, p.language.classname AS language
|
SELECT substr(p.id, 4) AS id, p.language.classname AS language
|
||||||
FROM ${openaire_db_name}.dataset p
|
FROM ${openaire_db_name}.dataset p
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_oids AS
|
CREATE TABLE ${stats_db_name}.dataset_oids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||||
FROM ${openaire_db_name}.dataset p
|
FROM ${openaire_db_name}.dataset p
|
||||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_pids AS
|
CREATE TABLE ${stats_db_name}.dataset_pids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||||
FROM ${openaire_db_name}.dataset p
|
FROM ${openaire_db_name}.dataset p
|
||||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.dataset_topics AS
|
CREATE TABLE ${stats_db_name}.dataset_topics STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||||
FROM ${openaire_db_name}.dataset p
|
FROM ${openaire_db_name}.dataset p
|
||||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||||
|
|
|
@ -40,20 +40,20 @@ SELECT substr(s.id, 4) as id,
|
||||||
from ${openaire_db_name}.software s
|
from ${openaire_db_name}.software s
|
||||||
where s.datainfo.deletedbyinference = false;
|
where s.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software_citations AS
|
CREATE TABLE ${stats_db_name}.software_citations STORED AS PARQUET AS
|
||||||
SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||||
FROM ${openaire_db_name}.software s
|
FROM ${openaire_db_name}.software s
|
||||||
LATERAL VIEW explode(s.extrainfo) citations as citation
|
LATERAL VIEW explode(s.extrainfo) citations as citation
|
||||||
where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||||
and s.datainfo.deletedbyinference = false;
|
and s.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software_classifications AS
|
CREATE TABLE ${stats_db_name}.software_classifications STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||||
FROM ${openaire_db_name}.software p
|
FROM ${openaire_db_name}.software p
|
||||||
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software_concepts AS
|
CREATE TABLE ${stats_db_name}.software_concepts STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, case
|
SELECT substr(p.id, 4) as id, case
|
||||||
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
||||||
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
||||||
|
@ -62,7 +62,7 @@ FROM ${openaire_db_name}.software p
|
||||||
LATERAL VIEW explode(p.context) contexts AS context
|
LATERAL VIEW explode(p.context) contexts AS context
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software_datasources AS
|
CREATE TABLE ${stats_db_name}.software_datasources STORED AS PARQUET AS
|
||||||
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
|
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource end as datasource
|
||||||
FROM (
|
FROM (
|
||||||
SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
|
SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
|
||||||
|
@ -74,24 +74,24 @@ FROM (
|
||||||
FROM ${openaire_db_name}.datasource d
|
FROM ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference = false) d ON p.datasource = d.id;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software_languages AS
|
CREATE TABLE ${stats_db_name}.software_languages STORED AS PARQUET AS
|
||||||
select substr(p.id, 4) AS id, p.language.classname AS language
|
select substr(p.id, 4) AS id, p.language.classname AS language
|
||||||
FROM ${openaire_db_name}.software p
|
FROM ${openaire_db_name}.software p
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software_oids AS
|
CREATE TABLE ${stats_db_name}.software_oids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||||
FROM ${openaire_db_name}.software p
|
FROM ${openaire_db_name}.software p
|
||||||
LATERAL VIEW explode(p.originalid) oids AS ids
|
LATERAL VIEW explode(p.originalid) oids AS ids
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software_pids AS
|
CREATE TABLE ${stats_db_name}.software_pids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||||
FROM ${openaire_db_name}.software p
|
FROM ${openaire_db_name}.software p
|
||||||
LATERAL VIEW explode(p.pid) pids AS ppid
|
LATERAL VIEW explode(p.pid) pids AS ppid
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.software_topics AS
|
CREATE TABLE ${stats_db_name}.software_topics STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||||
FROM ${openaire_db_name}.software p
|
FROM ${openaire_db_name}.software p
|
||||||
LATERAL VIEW explode(p.subject) subjects AS subject
|
LATERAL VIEW explode(p.subject) subjects AS subject
|
||||||
|
|
|
@ -40,18 +40,18 @@ FROM ${openaire_db_name}.otherresearchproduct o
|
||||||
WHERE o.datainfo.deletedbyinference = FALSE;
|
WHERE o.datainfo.deletedbyinference = FALSE;
|
||||||
|
|
||||||
-- Otherresearchproduct_citations
|
-- Otherresearchproduct_citations
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_citations STORED AS PARQUET AS
|
||||||
SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites
|
||||||
FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation
|
FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation
|
||||||
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != ""
|
||||||
and o.datainfo.deletedbyinference = false;
|
and o.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_classifications STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
SELECT substr(p.id, 4) AS id, instancetype.classname AS type
|
||||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.instance.instancetype) instances AS instancetype
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_concepts STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) as id, case
|
SELECT substr(p.id, 4) as id, case
|
||||||
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
when contexts.context.id RLIKE '^[^::]+::[^::]+::.+$' then contexts.context.id
|
||||||
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
when contexts.context.id RLIKE '^[^::]+::[^::]+$' then concat(contexts.context.id, '::other')
|
||||||
|
@ -59,7 +59,7 @@ SELECT substr(p.id, 4) as id, case
|
||||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context
|
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.context) contexts AS context
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_datasources STORED AS PARQUET AS
|
||||||
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
SELECT p.id, CASE WHEN d.id IS NULL THEN 'other' ELSE p.datasource END AS datasource
|
||||||
FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
|
FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) AS datasource
|
||||||
from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance
|
from ${openaire_db_name}.otherresearchproduct p lateral view explode(p.instance) instances as instance
|
||||||
|
@ -68,22 +68,22 @@ FROM (SELECT substr(p.id, 4) AS id, substr(instances.instance.hostedby.key, 4) A
|
||||||
from ${openaire_db_name}.datasource d
|
from ${openaire_db_name}.datasource d
|
||||||
WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id;
|
WHERE d.datainfo.deletedbyinference = false) d on p.datasource = d.id;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_languages STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, p.language.classname AS language
|
SELECT substr(p.id, 4) AS id, p.language.classname AS language
|
||||||
FROM ${openaire_db_name}.otherresearchproduct p
|
FROM ${openaire_db_name}.otherresearchproduct p
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_oids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
|
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.originalid) oids AS ids
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_pids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
SELECT substr(p.id, 4) AS id, ppid.qualifier.classname AS type, ppid.value AS pid
|
||||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
|
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.pid) pids AS ppid
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS
|
CREATE TABLE ${stats_db_name}.otherresearchproduct_topics STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic
|
||||||
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject
|
FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject
|
||||||
where p.datainfo.deletedbyinference = false;
|
where p.datainfo.deletedbyinference = false;
|
|
@ -3,7 +3,7 @@
|
||||||
-- Project table/view and Project related tables/views
|
-- Project table/view and Project related tables/views
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
------------------------------------------------------
|
------------------------------------------------------
|
||||||
CREATE TABLE ${stats_db_name}.project_oids AS
|
CREATE TABLE ${stats_db_name}.project_oids STORED AS PARQUET AS
|
||||||
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
SELECT substr(p.id, 4) AS id, oids.ids AS oid
|
||||||
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids;
|
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids;
|
||||||
CREATE TABLE ${stats_db_name}.project_organizations AS
|
CREATE TABLE ${stats_db_name}.project_organizations AS
|
||||||
|
@ -12,13 +12,13 @@ from ${openaire_db_name}.relation r
|
||||||
WHERE r.reltype = 'projectOrganization'
|
WHERE r.reltype = 'projectOrganization'
|
||||||
and r.datainfo.deletedbyinference = false;
|
and r.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.project_results AS
|
CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
|
||||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
|
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
|
||||||
FROM ${openaire_db_name}.relation r
|
FROM ${openaire_db_name}.relation r
|
||||||
WHERE r.reltype = 'resultProject'
|
WHERE r.reltype = 'resultProject'
|
||||||
and r.datainfo.deletedbyinference = false;
|
and r.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
create table ${stats_db_name}.project_classification as
|
create table ${stats_db_name}.project_classification STORED AS PARQUET as
|
||||||
select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
|
select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
|
||||||
from ${openaire_db_name}.project p
|
from ${openaire_db_name}.project p
|
||||||
lateral view explode(p.h2020classification) classifs as class
|
lateral view explode(p.h2020classification) classifs as class
|
||||||
|
@ -74,7 +74,7 @@ SELECT substr(p.id, 4) AS id,
|
||||||
FROM ${openaire_db_name}.project p
|
FROM ${openaire_db_name}.project p
|
||||||
WHERE p.datainfo.deletedbyinference = false;
|
WHERE p.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
create table ${stats_db_name}.funder as
|
create table ${stats_db_name}.funder STORED AS PARQUET as
|
||||||
select distinct xpath_string(fund, '//funder/id') as id,
|
select distinct xpath_string(fund, '//funder/id') as id,
|
||||||
xpath_string(fund, '//funder/name') as name,
|
xpath_string(fund, '//funder/name') as name,
|
||||||
xpath_string(fund, '//funder/shortname') as shortname
|
xpath_string(fund, '//funder/shortname') as shortname
|
||||||
|
|
|
@ -123,13 +123,13 @@ UNION ALL
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM ${stats_db_name}.otherresearchproduct_topics;
|
FROM ${stats_db_name}.otherresearchproduct_topics;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.result_organization AS
|
CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS
|
||||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
||||||
FROM ${openaire_db_name}.relation r
|
FROM ${openaire_db_name}.relation r
|
||||||
WHERE r.reltype = 'resultOrganization'
|
WHERE r.reltype = 'resultOrganization'
|
||||||
and r.datainfo.deletedbyinference = false;
|
and r.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.result_projects AS
|
CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS
|
||||||
select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
|
select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance
|
||||||
FROM ${stats_db_name}.result r
|
FROM ${stats_db_name}.result r
|
||||||
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
|
JOIN ${stats_db_name}.project_results pr ON r.id = pr.result
|
||||||
|
|
|
@ -80,15 +80,15 @@ UPDATE ${stats_db_name}.datasource_tmp
|
||||||
SET yearofvalidation=null
|
SET yearofvalidation=null
|
||||||
WHERE yearofvalidation = '-1';
|
WHERE yearofvalidation = '-1';
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.datasource_languages AS
|
CREATE TABLE ${stats_db_name}.datasource_languages STORED AS PARQUET AS
|
||||||
SELECT substr(d.id, 4) AS id, langs.languages AS language
|
SELECT substr(d.id, 4) AS id, langs.languages AS language
|
||||||
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages;
|
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.odlanguages.value) langs AS languages;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.datasource_oids AS
|
CREATE TABLE ${stats_db_name}.datasource_oids STORED AS PARQUET AS
|
||||||
SELECT substr(d.id, 4) AS id, oids.ids AS oid
|
SELECT substr(d.id, 4) AS id, oids.ids AS oid
|
||||||
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids;
|
FROM ${openaire_db_name}.datasource d LATERAL VIEW explode(d.originalid) oids AS ids;
|
||||||
|
|
||||||
CREATE TABLE ${stats_db_name}.datasource_organizations AS
|
CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS
|
||||||
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization
|
||||||
FROM ${openaire_db_name}.relation r
|
FROM ${openaire_db_name}.relation r
|
||||||
WHERE r.reltype = 'datasourceOrganization'
|
WHERE r.reltype = 'datasourceOrganization'
|
||||||
|
@ -96,11 +96,11 @@ WHERE r.reltype = 'datasourceOrganization'
|
||||||
|
|
||||||
-- datasource sources:
|
-- datasource sources:
|
||||||
-- where the datasource info have been collected from.
|
-- where the datasource info have been collected from.
|
||||||
create table if not exists ${stats_db_name}.datasource_sources AS
|
create table if not exists ${stats_db_name}.datasource_sources STORED AS PARQUET AS
|
||||||
select substr(d.id, 4) as id, substr(cf.key, 4) as datasource
|
select substr(d.id, 4) as id, substr(cf.key, 4) as datasource
|
||||||
from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf
|
from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf
|
||||||
where d.datainfo.deletedbyinference = false;
|
where d.datainfo.deletedbyinference = false;
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS
|
CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results STORED AS PARQUET AS
|
||||||
SELECT datasource AS id, id AS result
|
SELECT datasource AS id, id AS result
|
||||||
FROM ${stats_db_name}.result_datasources;
|
FROM ${stats_db_name}.result_datasources;
|
Loading…
Reference in New Issue