Merge pull request 'added missing EOS, Generate tables with parquet-files, instead of csv in the contexts.sh script' (#408) from antonis.lempesis/dnet-hadoop:beta into master
Reviewed-on: #408
This commit is contained in:
commit
5592ccc37a
|
@ -35,12 +35,20 @@ export HADOOP_USER="oozie"
|
|||
export HADOOP_USER_NAME="oozie"
|
||||
|
||||
echo "Creating and populating impala tables"
|
||||
hive $HIVE_OPTS -e "create table ${TARGET_DB}.context (id string, name string) row format delimited fields terminated by ','"
|
||||
hive $HIVE_OPTS -e "create table ${TARGET_DB}.category (context string, id string, name string) row format delimited fields terminated by ','"
|
||||
hive $HIVE_OPTS -e "create table ${TARGET_DB}.concept (category string, id string, name string) row format delimited fields terminated by ','"
|
||||
hive $HIVE_OPTS -e "load data inpath '${TMP}/contexts.csv' into table ${TARGET_DB}.context"
|
||||
hive $HIVE_OPTS -e "load data inpath '${TMP}/categories.csv' into table ${TARGET_DB}.category"
|
||||
hive $HIVE_OPTS -e "load data inpath '${TMP}/concepts.csv' into table ${TARGET_DB}.concept"
|
||||
hive $HIVE_OPTS -e "create table ${TARGET_DB}.context_csv (id string, name string) row format delimited fields terminated by ','"
|
||||
hive $HIVE_OPTS -e "load data inpath '${TMP}/contexts.csv' into table ${TARGET_DB}.context_csv"
|
||||
hive $HIVE_OPTS -e "create table ${TARGET_DB}.context stored as parquet as select * from ${TARGET_DB}.context_csv"
|
||||
hive $HIVE_OPTS -e "drop table ${TARGET_DB}.context_csv purge"
|
||||
|
||||
hive $HIVE_OPTS -e "create table ${TARGET_DB}.category_csv (context string, id string, name string) row format delimited fields terminated by ','"
|
||||
hive $HIVE_OPTS -e "load data inpath '${TMP}/categories.csv' into table ${TARGET_DB}.category_csv"
|
||||
hive $HIVE_OPTS -e "create table ${TARGET_DB}.category stored as parquet as select * from ${TARGET_DB}.category_csv"
|
||||
hive $HIVE_OPTS -e "drop table ${TARGET_DB}.category_csv purge"
|
||||
|
||||
hive $HIVE_OPTS -e "create table ${TARGET_DB}.concept_csv (category string, id string, name string) row format delimited fields terminated by ','"
|
||||
hive $HIVE_OPTS -e "load data inpath '${TMP}/concepts.csv' into table ${TARGET_DB}.concept_csv"
|
||||
hive $HIVE_OPTS -e "create table ${TARGET_DB}.concept stored as parquet as select * from ${TARGET_DB}.concept_csv"
|
||||
hive $HIVE_OPTS -e "drop table ${TARGET_DB}.concept_csv purge"
|
||||
|
||||
echo "Cleaning up"
|
||||
rm concepts.csv
|
||||
|
|
|
@ -335,8 +335,8 @@ select ar.organization, rf.no_result_fair/ar.no_allresults org_fairness
|
|||
from allresults ar
|
||||
join result_fair rf on rf.organization=ar.organization; /*EOS*/
|
||||
|
||||
DROP VIEW result_fair;
|
||||
DROP VIEW allresults;
|
||||
DROP VIEW result_fair; /*EOS*/
|
||||
DROP VIEW allresults; /*EOS*/
|
||||
|
||||
CREATE TEMPORARY VIEW result_fair as
|
||||
select year, ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro
|
||||
|
@ -1006,7 +1006,7 @@ left outer join ${stats_db_name}.organization o on o.id=ro.organization
|
|||
left outer join ${stats_db_name}.result_projects rp on rp.id=ro.id
|
||||
left outer join ${stats_db_name}.project p on p.id=rp.project
|
||||
left outer join ${stats_db_name}.funder f on f.name=p.funder
|
||||
where coalesce(o.country, f.country) IS NOT NULL;
|
||||
where coalesce(o.country, f.country) IS NOT NULL; /*EOS*/
|
||||
|
||||
drop table if exists ${stats_db_name}.indi_result_oa_with_license purge; /*EOS*/
|
||||
create table ${stats_db_name}.indi_result_oa_with_license stored as parquet as
|
||||
|
|
Loading…
Reference in New Issue