This commit is contained in:
Antonis Lempesis 2021-11-26 15:40:40 +02:00 committed by dimitrispie
parent 2a52a42169
commit e84dd5fe26
6 changed files with 12 additions and 22 deletions

View File

@ -30,18 +30,14 @@ hdfs dfs -copyFromLocal concepts.csv ${TMP}
hdfs dfs -chmod -R 777 ${TMP}
echo "Creating and populating impala tables"
impala-shell -q "invalidate metadata"
impala-shell -d ${TARGET_DB} -q "invalidate metadata"
impala-shell -q "create table ${TARGET_DB}.context (id string, name string) row format delimited fields terminated by ','"
impala-shell -q "create table ${TARGET_DB}.category (context string, id string, name string) row format delimited fields terminated by ','"
impala-shell -q "create table ${TARGET_DB}.concept (category string, id string, name string) row format delimited fields terminated by ','"
impala-shell -d ${TARGET_DB} -q "invalidate metadata"
impala-shell -q "load data inpath '${TMP}/contexts.csv' into table ${TARGET_DB}.context"
impala-shell -q "load data inpath '${TMP}/categories.csv' into table ${TARGET_DB}.category"
impala-shell -q "load data inpath '${TMP}/concepts.csv' into table ${TARGET_DB}.concept"
hive -e "create table ${TARGET_DB}.context (id string, name string) row format delimited fields terminated by ','"
hive -e "create table ${TARGET_DB}.category (context string, id string, name string) row format delimited fields terminated by ','"
hive -e "create table ${TARGET_DB}.concept (category string, id string, name string) row format delimited fields terminated by ','"
hive -e "load data inpath '${TMP}/contexts.csv' into table ${TARGET_DB}.context"
hive -e "load data inpath '${TMP}/categories.csv' into table ${TARGET_DB}.category"
hive -e "load data inpath '${TMP}/concepts.csv' into table ${TARGET_DB}.concept"
echo "Cleaning up"
hdfs dfs -rm -f -r -skipTrash ${TMP}
rm concepts.csv
rm categories.csv
rm contexts.csv

View File

@ -10,9 +10,7 @@ export SOURCE=$1
export SHADOW=$2
echo "Updating shadow database"
impala-shell -q "invalidate metadata"
impala-shell -d ${SOURCE} -q "invalidate metadata"
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${SOURCE}.\1;/" | impala-shell -c -f -
hive --database ${SOURCE} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${SOURCE}.\1 compute statistics;/" | impala-shell -c -f -
impala-shell -q "create database if not exists ${SHADOW}"
impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -c -f -
impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f -

View File

@ -104,7 +104,7 @@ WHERE otherresearchproduct_tmp.id IN (SELECT distinct r.id
AND pr.id = p.id
AND to_date(r.date) - to_date(p.enddate) > 0);
CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication AS
CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication STORED AS PARQUET AS
SELECT result_projects.id AS result,
result_projects.project AS project_results,
result.date as resultdate,

View File

@ -26,7 +26,7 @@ select substr(r.id, 4) as id, inst.refereed.classname as refereed
from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst
where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE;
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as
CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed STORED AS PARQUET as
select * from ${stats_db_name}.publication_refereed
union all
select * from ${stats_db_name}.dataset_refereed

View File

@ -29,8 +29,6 @@ select rcount.pid, sum(case when rcount.type='publication' then rcount.count els
from rcount
group by rcount.pid;
create view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture;
create table ${stats_db_name}.result_instance stored as parquet as
select distinct r.*
from (
@ -39,12 +37,10 @@ from (
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view explode(inst.pid) pids as p) r
join ${stats_db_name}.result res on res.id=r.id;
create table ${stats_db_name}.result_apc as
create table ${stats_db_name}.result_apc STORED AS PARQUET as
select r.id, r.amount, r.currency
from (
select substr(r.id, 4) as id, cast(inst.processingchargeamount.value as float) as amount, inst.processingchargecurrency.value as currency
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r
join ${stats_db_name}.result res on res.id=r.id
where r.amount is not null;
create view ${stats_db_name}.issn_gold_oa_dataset as select * from stats_ext.issn_gold_oa_dataset;

View File

@ -102,6 +102,6 @@ select substr(d.id, 4) as id, substr(cf.key, 4) as datasource
from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf
where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false;
CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS
CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results STORED AS PARQUET AS
SELECT datasource AS id, id AS result
FROM ${stats_db_name}.result_datasources;