42 lines
1.6 KiB
Bash
42 lines
1.6 KiB
Bash
export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
|
|
export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
|
|
if ! [ -L $link_folder ]
|
|
then
|
|
rm -Rf "$link_folder"
|
|
ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
|
|
fi
|
|
|
|
function createPDFsAggregated() {
|
|
db=$1
|
|
|
|
impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -d ${db} -q "drop table if exists indi_is_result_accessible";
|
|
|
|
impala-shell --user $HADOOP_USER_NAME -i impala-cluster-dn1.openaire.eu -d ${db} -q "create table indi_is_result_accessible stored as parquet as
|
|
select distinct p.id, coalesce(is_result_accessible, 0) as is_result_accessible from result p
|
|
left outer join
|
|
(select id, 1 as is_result_accessible from (select pl.* from result r
|
|
join pdfaggregation_i.publication p on r.id=p.id
|
|
join pdfaggregation_i.payload pl on pl.id=p.id
|
|
union all
|
|
select pl.* from result r
|
|
join pdfaggregation_i.publication p on r.id=p.dedupid
|
|
join pdfaggregation_i.payload pl on pl.id=p.id) foo) tmp on p.id=tmp.id";
|
|
}
|
|
|
|
STATS_DB=$1
|
|
MONITOR_DB=$2
|
|
HADOOP_USER_NAME=$3
|
|
|
|
createPDFsAggregated $STATS_DB
|
|
createPDFsAggregated $MONITOR_DB
|
|
|
|
createPDFsAggregated $MONITOR_DB'_funded'
|
|
createPDFsAggregated $MONITOR_DB'_institutions'
|
|
createPDFsAggregated $MONITOR_DB'_ris_tail'
|
|
|
|
contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other"
|
|
for i in ${contexts}
|
|
do
|
|
tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'`
|
|
createPDFsAggregated ${MONITOR_DB}'_'${tmp}
|
|
done |