Split Monitor DBs-Interdisciplinarity indicators

- Split DBs Monitor for faster rendering of visualizations
- Add interdisciplinarity indicators from result_fos
This commit is contained in:
dimitrispie 2023-06-02 13:34:16 +03:00
parent ebe586b1d1
commit 2324670714
10 changed files with 302 additions and 79 deletions

View File

@ -68,6 +68,16 @@ copydb $USAGE_STATS_DB
copydb $PROD_USAGE_STATS_DB copydb $PROD_USAGE_STATS_DB
copydb $EXT_DB copydb $EXT_DB
copydb $STATS_DB copydb $STATS_DB
copydb $MONITOR_DB #copydb $MONITOR_DB
copydb $OBSERVATORY_DB copydb $OBSERVATORY_DB
copydb $MONITOR_DB'_funded'
copydb $MONITOR_DB'_institutions'
copydb $MONITOR_DB'_RIs_tail'
contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other"
for i in ${contexts}
do
tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'`
copydb ${MONITOR_DB}'_'${tmp}
done

View File

@ -29,3 +29,14 @@ createShadowDB $STATS_DB $STATS_DB_SHADOW
createShadowDB $MONITOR_DB $MONITOR_DB_SHADOW createShadowDB $MONITOR_DB $MONITOR_DB_SHADOW
createShadowDB $OBSERVATORY_DB $OBSERVATORY_DB_SHADOW createShadowDB $OBSERVATORY_DB $OBSERVATORY_DB_SHADOW
createShadowDB USAGE_STATS_DB USAGE_STATS_DB_SHADOW createShadowDB USAGE_STATS_DB USAGE_STATS_DB_SHADOW
createShadowDB $MONITOR_DB'_funded' $MONITOR_DB'_funded_shadow'
createShadowDB $MONITOR_DB'_institutions' $MONITOR_DB'_institutions_shadow'
createShadowDB $MONITOR_DB'_RIs_tail' $MONITOR_DB'_RIs_tail_shadow'
contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other"
for i in ${contexts}
do
tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'`
createShadowDB ${MONITOR_DB}'_'${tmp} ${MONITOR_DB}'_'${tmp}'_shadow'
done

View File

@ -10,16 +10,88 @@ export SOURCE=$1
export TARGET=$2 export TARGET=$2
export SHADOW=$3 export SHADOW=$3
export SCRIPT_PATH=$4 export SCRIPT_PATH=$4
export SCRIPT_PATH2=$5
export SCRIPT_PATH3=$6
export SCRIPT_PATH4=$7
export SCRIPT_PATH5=$8
export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228" export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228"
export HADOOP_USER_NAME="oozie" export HADOOP_USER_NAME="oozie"
echo "Getting file from " $SCRIPT_PATH echo "Getting file from " $4
hdfs dfs -copyToLocal $SCRIPT_PATH hdfs dfs -copyToLocal $4
echo "Getting file from " $5
hdfs dfs -copyToLocal $5
echo "Getting file from " $6
hdfs dfs -copyToLocal $6
echo "Getting file from " $7
hdfs dfs -copyToLocal $7
echo "Getting file from " $8
hdfs dfs -copyToLocal $8
echo "Creating monitor database" echo "Creating monitor database"
#cat step20-createMonitorDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 > foo cat step20-createMonitorDB_funded.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_funded/g1" > foo
cat step20-createMonitorDB.sql | sed "s/TARGET/${TARGET}/g" | sed "s/SOURCE/${SOURCE}/g1" > foo
hive $HIVE_OPTS -f foo hive $HIVE_OPTS -f foo
cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_funded/g1" > foo
hive $HIVE_OPTS -f foo
#
cat step20-createMonitorDB_institutions.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_institutions/g1" > foo
hive $HIVE_OPTS -f foo
cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_institutions/g1" > foo
hive $HIVE_OPTS -f foo
contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other"
for i in ${contexts}
do
tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'`
tmp2=`echo "$i" |sed 's/:.*//' `
cat step20-createMonitorDB_RIs.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_$tmp/g1" | sed "s/CONTEXT/\'%$tmp2%\'/g" > foo
hive $HIVE_OPTS -f foo
cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_$tmp/g1" > foo
hive $HIVE_OPTS -f foo
done
cat step20-createMonitorDB_RIs_tail.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_RIs_tail/g1" | sed "s/CONTEXTS/\"'knowmad::other','dh-ch::other', 'enermaps::other', 'gotriple::other', 'neanias-atmospheric::other', 'rural-digital-europe::other', 'covid-19::other', 'aurora::other', 'neanias-space::other', 'north-america-studies::other', 'north-american-studies::other', 'eutopia::other'\"/g" > foo
hive $HIVE_OPTS -f foo
cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_RIs_tail/g1" > foo
hive $HIVE_OPTS -f foo
echo "Hive shell finished" echo "Hive shell finished"
echo "Updating shadow monitor funded database"
hive -e "drop database if exists ${SHADOW}_funded cascade"
hive -e "create database if not exists ${SHADOW}_funded"
hive $HIVE_OPTS --database ${2}_funded -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_funded.\1 as select * from ${2}_funded.\1;/" > foo
hive -f foo
echo "Updated shadow monitor funded database"
echo "Updating shadow monitor insitutions database"
hive -e "drop database if exists ${SHADOW}_institutions cascade"
hive -e "create database if not exists ${SHADOW}_institutions"
hive $HIVE_OPTS --database ${2}_institutions -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_institutions.\1 as select * from ${2}_institutions.\1;/" > foo
hive -f foo
echo "Shadow db monitor insitutions ready!"
echo "Updating shadow monitor RIs database"
for i in $contexts
do
tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'`
hive -e "drop database if exists ${SHADOW}_${tmp} cascade"
hive -e "create database if not exists ${SHADOW}_${tmp}"
hive $HIVE_OPTS --database ${2}_${tmp} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_${tmp}.\1 as select * from ${2}_${tmp}.\1;/" > foo
hive -f foo
done
echo "Shadow db monitor RIs ready!"
echo "Updating shadow monitor RIs tail database"
hive -e "drop database if exists ${SHADOW}_ris_tail cascade"
hive -e "create database if not exists ${SHADOW}_ris_tail"
hive $HIVE_OPTS --database ${2}_ris_tail -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_ris_tail.\1 as select * from ${2}_ris_tail.\1;/" > foo
hive -f foo
echo "Shadow db monitor RIs tail ready!"

View File

@ -779,4 +779,18 @@ cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.va
from result lateral view explode(measures) measures as measures_ids from result lateral view explode(measures) measures as measures_ids
where measures_ids.id!='views' and measures_ids.id!='downloads'; where measures_ids.id!='views' and measures_ids.id!='downloads';
ANALYZE TABLE indi_impact_measures COMPUTE STATISTICS; ANALYZE TABLE indi_impact_measures COMPUTE STATISTICS;
CREATE TEMPORARY TABLE pub_fos_totals as
select rf.id, count(distinct lvl3) totals from result_fos rf
group by rf.id;
create table if not exists indi_pub_interdisciplinarity as
select distinct p.id, coalesce(indi_pub_is_interdisciplinary, 0)
as indi_pub_is_interdisciplinary
from pub_fos_totals p
left outer join (
select pub_fos_totals.id, 1 as indi_pub_is_interdisciplinary from pub_fos_totals
where totals>10) tmp on p.id=tmp.id;
ANALYZE TABLE indi_pub_interdisciplinarity COMPUTE STATISTICS;

View File

@ -1,5 +1,78 @@
drop database if exists TARGET cascade; --drop database if exists TARGET cascade;
create database if not exists TARGET; --create database if not exists TARGET;
--
--create view if not exists TARGET.category as select * from SOURCE.category;
--create view if not exists TARGET.concept as select * from SOURCE.concept;
--create view if not exists TARGET.context as select * from SOURCE.context;
--create view if not exists TARGET.country as select * from SOURCE.country;
--create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp;
--create view if not exists TARGET.creation_date as select * from SOURCE.creation_date;
--create view if not exists TARGET.funder as select * from SOURCE.funder;
--create view if not exists TARGET.fundref as select * from SOURCE.fundref;
--create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture;
--create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure;
--create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents;
--create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers;
--create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft;
--create view if not exists TARGET.hrrst as select * from SOURCE.hrrst;
--
--create table TARGET.result stored as parquet as
-- select distinct * from (
-- select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id)
-- union all
-- select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id)
-- union all
-- select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in (
-- 'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC"
-- 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council
-- 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ??
-- 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University
-- 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade
-- 'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki
-- 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho
-- 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid
-- 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen
-- 'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens
-- -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot
-- 'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University
-- 'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark
-- 'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin
-- 'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt
-- 'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven
-- 'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape
-- 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute
-- 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
-- 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
-- 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
-- 'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
-- 'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
-- 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
-- 'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
-- 'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
-- 'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras
-- 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki
-- 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank
-- 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech
-- 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University
-- 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona
-- 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University
-- 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia
-- 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University
-- 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje
-- 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan
-- 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork
-- 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University
-- 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech
-- 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town
-- 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin
-- 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology
-- 'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba
-- 'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili
-- 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University
-- 'openorgs____::3cff625a4370d51e08624cc586138b2f' -- IMT Atlantique
-- ) )) foo;
--
--ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
create view if not exists TARGET.category as select * from SOURCE.category; create view if not exists TARGET.category as select * from SOURCE.category;
create view if not exists TARGET.concept as select * from SOURCE.concept; create view if not exists TARGET.concept as select * from SOURCE.concept;
@ -16,64 +89,6 @@ create view if not exists TARGET.totalresearchers as select * from SOURCE.totalr
create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft; create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft;
create view if not exists TARGET.hrrst as select * from SOURCE.hrrst; create view if not exists TARGET.hrrst as select * from SOURCE.hrrst;
create table TARGET.result stored as parquet as
select distinct * from (
select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id)
union all
select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id)
union all
select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in (
'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC"
'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council
'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ??
'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University
'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade
'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki
'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho
'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid
'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen
'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens
-- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot
'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University
'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark
'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin
'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt
'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven
'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape
'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute
'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras
'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki
'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank
'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech
'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University
'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona
'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University
'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia
'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University
'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje
'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan
'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork
'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University
'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech
'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town
'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin
'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology
'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba
'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili
'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University
'openorgs____::3cff625a4370d51e08624cc586138b2f' -- IMT Atlantique
) )) foo;
ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS; ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS;
@ -251,3 +266,5 @@ create table TARGET.indi_result_with_pid stored as parquet as select * from SOUR
ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS; ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS;
create table TARGET.indi_impact_measures stored as parquet as select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result r where r.id=orig.id); create table TARGET.indi_impact_measures stored as parquet as select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result r where r.id=orig.id);
ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS; ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS;
create table TARGET.indi_pub_interdisciplinarity stored as parquet as select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result r where r.id=orig.id);
ANALYZE TABLE TARGET.indi_pub_interdisciplinarity COMPUTE STATISTICS;

View File

@ -0,0 +1,15 @@
drop database if exists TARGET cascade;
create database if not exists TARGET;
create table TARGET.result stored as parquet as
select distinct * from (
select * from SOURCE.result r where exists
(select 1
from SOURCE.result_concepts rc
join SOURCE.concept conc on conc.id=rc.concept
join SOURCE.category cat on cat.id=conc.category
join SOURCE.context cont on cont.id=cat.context
-- join SOURCE.result
where rc.id=r.id and conc.category like CONTEXT)
) foo;
ANALYZE TABLE TARGET.result COMPUTE STATISTICS;

View File

@ -0,0 +1,15 @@
drop database if exists TARGET cascade;
create database if not exists TARGET;
create table TARGET.result stored as parquet as
select distinct * from (
select * from SOURCE.result r where exists
(select 1
from SOURCE.result_concepts rc
join SOURCE.concept conc on conc.id=rc.concept
join SOURCE.category cat on cat.id=conc.category
join SOURCE.context cont on cont.id=cat.context
-- join SOURCE.result
where rc.id=r.id and conc.category not in (CONTEXTS))
) foo;
ANALYZE TABLE TARGET.result COMPUTE STATISTICS;

View File

@ -0,0 +1,9 @@
drop database if exists TARGET cascade;
create database if not exists TARGET;
create table TARGET.result stored as parquet as
select distinct * from (
select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id)
) foo;
ANALYZE TABLE TARGET.result COMPUTE STATISTICS;

View File

@ -0,0 +1,56 @@
drop database if exists TARGET cascade;
create database if not exists TARGET;
create table TARGET.result stored as parquet as
select distinct * from (
select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in (
'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC"
'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council
'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ??
'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University
'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade
'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki
'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho
'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid
'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen
'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens
-- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot
'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University
'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark
'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin
'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt
'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven
'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape
'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute
'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras
'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki
'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank
'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech
'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University
'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona
'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University
'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia
'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University
'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje
'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan
'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork
'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University
'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech
'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town
'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin
'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology
'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba
'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili
'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University
'openorgs____::3cff625a4370d51e08624cc586138b2f' -- IMT Atlantique
))) foo;
ANALYZE TABLE TARGET.result COMPUTE STATISTICS;

View File

@ -374,25 +374,29 @@
<argument>${monitor_db_name}</argument> <argument>${monitor_db_name}</argument>
<argument>${monitor_db_shadow_name}</argument> <argument>${monitor_db_shadow_name}</argument>
<argument>${wf:appPath()}/scripts/step20-createMonitorDB.sql</argument> <argument>${wf:appPath()}/scripts/step20-createMonitorDB.sql</argument>
<argument>${wf:appPath()}/scripts/step20-createMonitorDB_funded.sql</argument>
<argument>${wf:appPath()}/scripts/step20-createMonitorDB_institutions.sql</argument>
<argument>${wf:appPath()}/scripts/step20-createMonitorDB_RIs.sql</argument>
<argument>${wf:appPath()}/scripts/step20-createMonitorDB_RIs_tail.sql</argument>
<file>monitor.sh</file> <file>monitor.sh</file>
</shell> </shell>
<ok to="step20-createMonitorDB-post"/>
<error to="Kill"/>
</action>
<action name="step20-createMonitorDB-post">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<exec>monitor-post.sh</exec>
<argument>${monitor_db_name}</argument>
<argument>${monitor_db_shadow_name}</argument>
<file>monitor-post.sh</file>
</shell>
<ok to="step21-createObservatoryDB-pre"/> <ok to="step21-createObservatoryDB-pre"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<!-- <action name="step20-createMonitorDB-post">-->
<!-- <shell xmlns="uri:oozie:shell-action:0.1">-->
<!-- <job-tracker>${jobTracker}</job-tracker>-->
<!-- <name-node>${nameNode}</name-node>-->
<!-- <exec>monitor-post.sh</exec>-->
<!-- <argument>${monitor_db_name}</argument>-->
<!-- <argument>${monitor_db_shadow_name}</argument>-->
<!-- <file>monitor-post.sh</file>-->
<!-- </shell>-->
<!-- <ok to="step21-createObservatoryDB-pre"/>-->
<!-- <error to="Kill"/>-->
<!-- </action>-->
<action name="step21-createObservatoryDB-pre"> <action name="step21-createObservatoryDB-pre">
<shell xmlns="uri:oozie:shell-action:0.1"> <shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker> <job-tracker>${jobTracker}</job-tracker>