diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh index b66ab47e0..fafb45cb0 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh @@ -30,18 +30,14 @@ hdfs dfs -copyFromLocal concepts.csv ${TMP} hdfs dfs -chmod -R 777 ${TMP} echo "Creating and populating impala tables" -impala-shell -q "invalidate metadata" -impala-shell -d ${TARGET_DB} -q "invalidate metadata" -impala-shell -q "create table ${TARGET_DB}.context (id string, name string) row format delimited fields terminated by ','" -impala-shell -q "create table ${TARGET_DB}.category (context string, id string, name string) row format delimited fields terminated by ','" -impala-shell -q "create table ${TARGET_DB}.concept (category string, id string, name string) row format delimited fields terminated by ','" -impala-shell -d ${TARGET_DB} -q "invalidate metadata" -impala-shell -q "load data inpath '${TMP}/contexts.csv' into table ${TARGET_DB}.context" -impala-shell -q "load data inpath '${TMP}/categories.csv' into table ${TARGET_DB}.category" -impala-shell -q "load data inpath '${TMP}/concepts.csv' into table ${TARGET_DB}.concept" +hive -e "create table ${TARGET_DB}.context (id string, name string) row format delimited fields terminated by ','" +hive -e "create table ${TARGET_DB}.category (context string, id string, name string) row format delimited fields terminated by ','" +hive -e "create table ${TARGET_DB}.concept (category string, id string, name string) row format delimited fields terminated by ','" +hive -e "load data inpath '${TMP}/contexts.csv' into table ${TARGET_DB}.context" +hive -e "load data inpath '${TMP}/categories.csv' into table ${TARGET_DB}.category" +hive -e "load data inpath '${TMP}/concepts.csv' into table ${TARGET_DB}.concept" echo "Cleaning up" -hdfs dfs -rm -f -r -skipTrash ${TMP} rm concepts.csv rm categories.csv rm contexts.csv diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh index d04c5ccfd..8eade479a 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizedb.sh @@ -10,9 +10,7 @@ export SOURCE=$1 export SHADOW=$2 echo "Updating shadow database" -impala-shell -q "invalidate metadata" -impala-shell -d ${SOURCE} -q "invalidate metadata" -impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${SOURCE}.\1;/" | impala-shell -c -f - +hive --database ${SOURCE} -e "show tables" | grep -v WARN | sed "s/^\(.*\)/analyze table ${SOURCE}.\1 compute statistics;/" | impala-shell -c -f - impala-shell -q "create database if not exists ${SHADOW}" impala-shell -d ${SHADOW} -q "show tables" --delimited | sed "s/^/drop view if exists ${SHADOW}./" | sed "s/$/;/" | impala-shell -c -f - impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${SHADOW}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -c -f - diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql index 41c3ed751..122947050 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql @@ -104,7 +104,7 @@ WHERE otherresearchproduct_tmp.id IN (SELECT distinct r.id AND pr.id = p.id AND to_date(r.date) - to_date(p.enddate) > 0); -CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication AS +CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication STORED AS PARQUET AS SELECT result_projects.id AS result, result_projects.project AS project_results, result.date as resultdate, diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql index cec22cd3e..9b5c6ec06 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql @@ -26,7 +26,7 @@ select substr(r.id, 4) as id, inst.refereed.classname as refereed from ${openaire_db_name}.otherresearchproduct r lateral view explode(r.instance) instances as inst where r.datainfo.deletedbyinference=false and r.datainfo.invisible = FALSE; -CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed as +CREATE VIEW IF NOT EXISTS ${stats_db_name}.result_refereed STORED AS PARQUET as select * from ${stats_db_name}.publication_refereed union all select * from ${stats_db_name}.dataset_refereed diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql index 86ead4a2c..9b2630286 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql @@ -29,8 +29,6 @@ select rcount.pid, sum(case when rcount.type='publication' then rcount.count els from rcount group by rcount.pid; -create view ${stats_db_name}.rndexpenditure as select * from stats_ext.rndexpediture; - create table ${stats_db_name}.result_instance stored as parquet as select distinct r.* from ( @@ -39,12 +37,10 @@ from ( from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst lateral view explode(inst.pid) pids as p) r join ${stats_db_name}.result res on res.id=r.id; -create table ${stats_db_name}.result_apc as +create table ${stats_db_name}.result_apc STORED AS PARQUET as select r.id, r.amount, r.currency from ( select substr(r.id, 4) as id, cast(inst.processingchargeamount.value as float) as amount, inst.processingchargecurrency.value as currency from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r join ${stats_db_name}.result res on res.id=r.id -where r.amount is not null; - -create view ${stats_db_name}.issn_gold_oa_dataset as select * from stats_ext.issn_gold_oa_dataset; \ No newline at end of file +where r.amount is not null; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql index 01bed17cc..f089693fc 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -102,6 +102,6 @@ select substr(d.id, 4) as id, substr(cf.key, 4) as datasource from ${openaire_db_name}.datasource d lateral view explode(d.collectedfrom) cfrom as cf where d.datainfo.deletedbyinference = false and d.datainfo.invisible=false; -CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS +CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results STORED AS PARQUET AS SELECT datasource AS id, id AS result FROM ${stats_db_name}.result_datasources; \ No newline at end of file