diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh
index 66783c234..b937eea25 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/copyDataToImpalaCluster.sh
@@ -68,6 +68,16 @@ copydb $USAGE_STATS_DB
copydb $PROD_USAGE_STATS_DB
copydb $EXT_DB
copydb $STATS_DB
-copydb $MONITOR_DB
+#copydb $MONITOR_DB
copydb $OBSERVATORY_DB
+copydb $MONITOR_DB'_funded'
+copydb $MONITOR_DB'_institutions'
+copydb $MONITOR_DB'_RIs_tail'
+
+contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other"
+for i in ${contexts}
+do
+ tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'`
+ copydb ${MONITOR_DB}'_'${tmp}
+done
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh
index 5914b95f8..a77b5a113 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/finalizeImpalaCluster.sh
@@ -29,3 +29,14 @@ createShadowDB $STATS_DB $STATS_DB_SHADOW
createShadowDB $MONITOR_DB $MONITOR_DB_SHADOW
createShadowDB $OBSERVATORY_DB $OBSERVATORY_DB_SHADOW
createShadowDB USAGE_STATS_DB USAGE_STATS_DB_SHADOW
+
+createShadowDB $MONITOR_DB'_funded' $MONITOR_DB'_funded_shadow'
+createShadowDB $MONITOR_DB'_institutions' $MONITOR_DB'_institutions_shadow'
+createShadowDB $MONITOR_DB'_RIs_tail' $MONITOR_DB'_RIs_tail_shadow'
+
+contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other"
+for i in ${contexts}
+do
+ tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'`
+ createShadowDB ${MONITOR_DB}'_'${tmp} ${MONITOR_DB}'_'${tmp}'_shadow'
+done
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh
index 440aac770..08f4c9232 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/monitor.sh
@@ -10,16 +10,88 @@ export SOURCE=$1
export TARGET=$2
export SHADOW=$3
export SCRIPT_PATH=$4
+export SCRIPT_PATH2=$5
+export SCRIPT_PATH3=$6
+export SCRIPT_PATH4=$7
+export SCRIPT_PATH5=$8
export HIVE_OPTS="-hiveconf mapred.job.queue.name=analytics -hiveconf hive.spark.client.connect.timeout=120000ms -hiveconf hive.spark.client.server.connect.timeout=300000ms -hiveconf spark.executor.memory=19166291558 -hiveconf spark.yarn.executor.memoryOverhead=3225 -hiveconf spark.driver.memory=11596411699 -hiveconf spark.yarn.driver.memoryOverhead=1228"
export HADOOP_USER_NAME="oozie"
-echo "Getting file from " $SCRIPT_PATH
-hdfs dfs -copyToLocal $SCRIPT_PATH
+echo "Getting file from " $4
+hdfs dfs -copyToLocal $4
+
+echo "Getting file from " $5
+hdfs dfs -copyToLocal $5
+
+echo "Getting file from " $6
+hdfs dfs -copyToLocal $6
+
+echo "Getting file from " $7
+hdfs dfs -copyToLocal $7
+
+echo "Getting file from " $8
+hdfs dfs -copyToLocal $8
echo "Creating monitor database"
-#cat step20-createMonitorDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 > foo
-cat step20-createMonitorDB.sql | sed "s/TARGET/${TARGET}/g" | sed "s/SOURCE/${SOURCE}/g1" > foo
+cat step20-createMonitorDB_funded.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_funded/g1" > foo
hive $HIVE_OPTS -f foo
+cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_funded/g1" > foo
+hive $HIVE_OPTS -f foo
+#
+cat step20-createMonitorDB_institutions.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_institutions/g1" > foo
+hive $HIVE_OPTS -f foo
+cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_institutions/g1" > foo
+hive $HIVE_OPTS -f foo
+
+contexts="knowmad::other dh-ch::other enermaps::other gotriple::other neanias-atmospheric::other rural-digital-europe::other covid-19::other aurora::other neanias-space::other north-america-studies::other north-american-studies::other eutopia::other"
+
+for i in ${contexts}
+do
+ tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'`
+ tmp2=`echo "$i" |sed 's/:.*//' `
+ cat step20-createMonitorDB_RIs.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_$tmp/g1" | sed "s/CONTEXT/\'%$tmp2%\'/g" > foo
+ hive $HIVE_OPTS -f foo
+ cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_$tmp/g1" > foo
+ hive $HIVE_OPTS -f foo
+done
+
+
+cat step20-createMonitorDB_RIs_tail.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_RIs_tail/g1" | sed "s/CONTEXTS/\"'knowmad::other','dh-ch::other', 'enermaps::other', 'gotriple::other', 'neanias-atmospheric::other', 'rural-digital-europe::other', 'covid-19::other', 'aurora::other', 'neanias-space::other', 'north-america-studies::other', 'north-american-studies::other', 'eutopia::other'\"/g" > foo
+hive $HIVE_OPTS -f foo
+cat step20-createMonitorDB.sql | sed "s/SOURCE/$1/g" | sed "s/TARGET/$2_RIs_tail/g1" > foo
+hive $HIVE_OPTS -f foo
+
echo "Hive shell finished"
+echo "Updating shadow monitor funded database"
+hive -e "drop database if exists ${SHADOW}_funded cascade"
+hive -e "create database if not exists ${SHADOW}_funded"
+hive $HIVE_OPTS --database ${2}_funded -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_funded.\1 as select * from ${2}_funded.\1;/" > foo
+hive -f foo
+echo "Updated shadow monitor funded database"
+
+echo "Updating shadow monitor insitutions database"
+hive -e "drop database if exists ${SHADOW}_institutions cascade"
+hive -e "create database if not exists ${SHADOW}_institutions"
+hive $HIVE_OPTS --database ${2}_institutions -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_institutions.\1 as select * from ${2}_institutions.\1;/" > foo
+hive -f foo
+echo "Shadow db monitor insitutions ready!"
+
+echo "Updating shadow monitor RIs database"
+for i in $contexts
+do
+ tmp=`echo "$i" | sed 's/'-'/'_'/g' | sed 's/'::'/'_'/g'`
+ hive -e "drop database if exists ${SHADOW}_${tmp} cascade"
+ hive -e "create database if not exists ${SHADOW}_${tmp}"
+ hive $HIVE_OPTS --database ${2}_${tmp} -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_${tmp}.\1 as select * from ${2}_${tmp}.\1;/" > foo
+ hive -f foo
+done
+echo "Shadow db monitor RIs ready!"
+
+echo "Updating shadow monitor RIs tail database"
+hive -e "drop database if exists ${SHADOW}_ris_tail cascade"
+hive -e "create database if not exists ${SHADOW}_ris_tail"
+hive $HIVE_OPTS --database ${2}_ris_tail -e "show tables" | grep -v WARN | sed "s/\(.*\)/create view ${SHADOW}_ris_tail.\1 as select * from ${2}_ris_tail.\1;/" > foo
+hive -f foo
+echo "Shadow db monitor RIs tail ready!"
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql
index 47a6f84c2..dc9e6c1f9 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step14.sql
@@ -46,4 +46,8 @@ FROM (
LEFT OUTER JOIN (
SELECT substr(d.id, 4) id
from ${openaire_db_name}.datasource d
- WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id;
\ No newline at end of file
+ WHERE d.datainfo.deletedbyinference=false and d.datainfo.invisible = FALSE) d on o.datasource = d.id;
+
+CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_accessroute STORED AS PARQUET as
+select distinct substr(id,4),id, accessroute from ${openaire_db_name}.result
+lateral view explode (instance.accessright.openaccessroute) openaccessroute as accessroute;
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
index 4fd941e5d..f523b63d5 100755
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
@@ -92,53 +92,57 @@ ANALYZE TABLE indi_funded_result_with_fundref COMPUTE STATISTICS;
--
-- compute stats indi_result_org_collab;
--
-create TEMPORARY TABLE tmp AS SELECT ro.organization organization, ro.id from result_organization ro
+create TEMPORARY TABLE tmp AS SELECT ro.organization organization, ro.id, o.name from result_organization ro
join organization o on o.id=ro.organization where o.name is not null;
create table if not exists indi_result_org_collab stored as parquet as
-select o1.organization org1, o2.organization org2, count(o1.id) as collaborations
+select o1.organization org1, o1.name org1name1, o2.organization org2, o2.name org2name2, count(o1.id) as collaborations
from tmp as o1
-join tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization
-group by o1.organization, o2.organization;
+join tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization and o1.name!=o2.name
+group by o1.organization, o2.organization, o1.name, o2.name;
drop table tmp purge;
ANALYZE TABLE indi_result_org_collab COMPUTE STATISTICS;
create TEMPORARY TABLE tmp AS
-select distinct ro.organization organization, ro.id, o.country from result_organization ro
+select distinct ro.organization organization, ro.id, o.name, o.country from result_organization ro
join organization o on o.id=ro.organization where country <> 'UNKNOWN' and o.name is not null;
create table if not exists indi_result_org_country_collab stored as parquet as
-select o1.organization org1,o2.country country2, count(o1.id) as collaborations
+select o1.organization org1,o1.name org1name1, o2.country country2, count(o1.id) as collaborations
from tmp as o1 join tmp as o2 on o1.id=o2.id
where o1.id=o2.id and o1.country!=o2.country
-group by o1.organization, o1.id, o2.country;
+group by o1.organization, o1.id, o1.name, o2.country;
drop table tmp purge;
ANALYZE TABLE indi_result_org_country_collab COMPUTE STATISTICS;
+create TEMPORARY TABLE AS
+select o.id organization, o.name, ro.project as project from organization o
+ join organization_projects ro on o.id=ro.id where o.name is not null;
+
create table if not exists indi_project_collab_org stored as parquet as
-select o1.id org1,o2.id org2, count(distinct o1.project) as collaborations
-from organization_projects as o1
- join organization_projects as o2 on o1.project=o2.project
-where o1.id!=o2.id
-group by o1.id, o2.id;
+select o1.organization org1,o1.name orgname1, o2.organization org2, o2.name orgname2, count(distinct o1.project) as collaborations
+from tmp as o1
+ join tmp as o2 on o1.project=o2.project
+where o1.organization<>o2.organization and o1.name<>o2.name
+group by o1.name,o2.name, o1.organization, o2.organization;
ANALYZE TABLE indi_project_collab_org COMPUTE STATISTICS;
create TEMPORARY TABLE tmp AS
-select o.id organization, o.country , ro.project as project from organization o
+select o.id organization, o.name, o.country , ro.project as project from organization o
join organization_projects ro on o.id=ro.id
- and o.country <> 'UNKNOWN';
+ and o.country <> 'UNKNOWN' and o.name is not null;
create table if not exists indi_project_collab_org_country stored as parquet as
-select o1.organization org1,o2.country country2, count(distinct o1.project) as collaborations
+select o1.organization org1,o1.name org1name, o2.country country2, count(distinct o1.project) as collaborations
from tmp as o1
join tmp as o2 on o1.project=o2.project
where o1.organization<>o2.organization and o1.country<>o2.country
-group by o1.organization, o2.country;
+group by o1.organization, o2.country, o1.name;
drop table tmp purge;
@@ -245,10 +249,45 @@ FROM publication_datasources pd
JOIN issn on issn.id=pd.datasource
JOIN hybrid_oa ON issn.issn = hybrid_oa.issn
JOIN indi_result_has_cc_licence cc on pd.id=cc.id
- where cc.has_cc_license=1) tmp on pd.id=tmp.id;
+ JOIN indi_pub_gold_oa ga on pd.id=ga.id
+ where cc.has_cc_license=1 and ga.is_gold=0) tmp on pd.id=tmp.id;
ANALYZE TABLE indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS;
+ create table if not exists indi_pub_bronze_oa stored as parquet as
+ WITH hybrid_oa AS (
+ SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn
+ FROM STATS_EXT.plan_s_jn
+ WHERE issn_print != ""
+ UNION ALL
+ SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_online as issn
+ FROM STATS_EXT.plan_s_jn
+ WHERE issn_online != "" and (journal_is_in_doaj = FALSE OR journal_is_oa = FALSE)),
+ issn AS (
+ SELECT *
+ FROM (
+ SELECT id, issn_printed as issn
+ FROM datasource
+ WHERE issn_printed IS NOT NULL
+ UNION ALL
+ SELECT id,issn_online as issn
+ FROM datasource
+ WHERE issn_online IS NOT NULL ) as issn
+ WHERE LENGTH(issn) > 7)
+SELECT DISTINCT pd.id, coalesce(is_bronze_oa, 0) as is_hybrid_oa
+FROM publication_datasources pd
+ LEFT OUTER JOIN (
+ SELECT pd.id, 1 as is_bronze_oa from publication_datasources pd
+ JOIN datasource d on d.id=pd.datasource
+ JOIN issn on issn.id=pd.datasource
+ JOIN hybrid_oa ON issn.issn = hybrid_oa.issn
+ JOIN indi_result_has_cc_licence cc on pd.id=cc.id
+ JOIN indi_pub_gold_oa ga on pd.id=ga.id
+ JOIN indi_pub_hybrid_oa_with_cc hy on hy.id=pd.id
+ where cc.has_cc_license=0 and ga.is_gold=0 and hy.is_hybrid_oa=0) tmp on pd.id=tmp.id;
+
+ANALYZE TABLE indi_pub_bronze_oa COMPUTE STATISTICS;
+
create table if not exists indi_pub_downloads stored as parquet as
SELECT result_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats
join publication on result_id=id
@@ -733,3 +772,27 @@ from result p
on p.id= tmp.id;
ANALYZE TABLE indi_result_with_pid COMPUTE STATISTICS;
+
+create table if not exists indi_impact_measures as
+select distinct substr(id, 4), measures_ids.id impactmetric, measures_ids.unit.value[0] score,
+cast(measures_ids.unit.value[0] as decimal(6,3)) score_dec, measures_ids.unit.value[1] class
+from result lateral view explode(measures) measures as measures_ids
+where measures_ids.id!='views' and measures_ids.id!='downloads';
+
+ANALYZE TABLE indi_impact_measures COMPUTE STATISTICS;
+
+CREATE TEMPORARY TABLE pub_fos_totals as
+select rf.id, count(distinct lvl3) totals from result_fos rf
+group by rf.id;
+
+create table if not exists indi_pub_interdisciplinarity as
+select distinct p.id, coalesce(indi_pub_is_interdisciplinary, 0)
+as indi_pub_is_interdisciplinary
+from pub_fos_totals p
+left outer join (
+select pub_fos_totals.id, 1 as indi_pub_is_interdisciplinary from pub_fos_totals
+where totals>10) tmp on p.id=tmp.id;
+
+drop table pub_fos_totals purge;
+
+ANALYZE TABLE indi_pub_interdisciplinarity COMPUTE STATISTICS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
index bc72b6c15..9744d5aae 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
@@ -1,5 +1,78 @@
-drop database if exists TARGET cascade;
-create database if not exists TARGET;
+--drop database if exists TARGET cascade;
+--create database if not exists TARGET;
+--
+--create view if not exists TARGET.category as select * from SOURCE.category;
+--create view if not exists TARGET.concept as select * from SOURCE.concept;
+--create view if not exists TARGET.context as select * from SOURCE.context;
+--create view if not exists TARGET.country as select * from SOURCE.country;
+--create view if not exists TARGET.countrygdp as select * from SOURCE.countrygdp;
+--create view if not exists TARGET.creation_date as select * from SOURCE.creation_date;
+--create view if not exists TARGET.funder as select * from SOURCE.funder;
+--create view if not exists TARGET.fundref as select * from SOURCE.fundref;
+--create view if not exists TARGET.rndexpenditure as select * from SOURCE.rndexpediture;
+--create view if not exists TARGET.rndgdpexpenditure as select * from SOURCE.rndgdpexpenditure;
+--create view if not exists TARGET.doctoratestudents as select * from SOURCE.doctoratestudents;
+--create view if not exists TARGET.totalresearchers as select * from SOURCE.totalresearchers;
+--create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft;
+--create view if not exists TARGET.hrrst as select * from SOURCE.hrrst;
+--
+--create table TARGET.result stored as parquet as
+-- select distinct * from (
+-- select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id)
+-- union all
+-- select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id)
+-- union all
+-- select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in (
+-- 'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC"
+-- 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council
+-- 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ??
+-- 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University
+-- 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade
+-- 'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki
+-- 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho
+-- 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid
+-- 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen
+-- 'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens
+-- -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot
+-- 'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University
+-- 'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark
+-- 'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin
+-- 'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt
+-- 'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven
+-- 'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape
+-- 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute
+-- 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
+-- 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
+-- 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
+-- 'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
+-- 'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
+-- 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
+-- 'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
+-- 'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
+-- 'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras
+-- 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki
+-- 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank
+-- 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech
+-- 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University
+-- 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona
+-- 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University
+-- 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia
+-- 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University
+-- 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje
+-- 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan
+-- 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork
+-- 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University
+-- 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech
+-- 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town
+-- 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin
+-- 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology
+-- 'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba
+-- 'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili
+-- 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University
+-- 'openorgs____::3cff625a4370d51e08624cc586138b2f' -- IMT Atlantique
+-- ) )) foo;
+--
+--ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
create view if not exists TARGET.category as select * from SOURCE.category;
create view if not exists TARGET.concept as select * from SOURCE.concept;
@@ -16,61 +89,6 @@ create view if not exists TARGET.totalresearchers as select * from SOURCE.totalr
create view if not exists TARGET.totalresearchersft as select * from SOURCE.totalresearchersft;
create view if not exists TARGET.hrrst as select * from SOURCE.hrrst;
-create table TARGET.result stored as parquet as
- select distinct * from (
- select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id)
- union all
- select * from SOURCE.result r where exists (select 1 from SOURCE.result_concepts rc where rc.id=r.id)
- union all
- select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in (
- 'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC"
- 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council
- 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ??
- 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University
- 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade
- 'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki
- 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho
- 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid
- 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen
- 'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens
- -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot
- 'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University
- 'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark
- 'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin
- 'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt
- 'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven
- 'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape
- 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute
- 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
- 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
- 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
- 'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
- 'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
- 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
- 'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
- 'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
- 'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras
- 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki
- 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank
- 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech
- 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University
- 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona
- 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University
- 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia
- 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University
- 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje
- 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan
- 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork
- 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University
- 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech
- 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town
- 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin
- 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology
- 'openorgs____::846cb428d3f52a445f7275561a7beb5d' -- University of Manitoba
- ) )) foo;
-
-ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
-
create table TARGET.result_citations stored as parquet as select * from SOURCE.result_citations orig where exists (select 1 from TARGET.result r where r.id=orig.id);
ANALYZE TABLE TARGET.result_citations COMPUTE STATISTICS;
@@ -140,6 +158,9 @@ ANALYZE TABLE TARGET.result_topics COMPUTE STATISTICS;
create table TARGET.result_fos stored as parquet as select * from SOURCE.result_fos orig where exists (select 1 from TARGET.result r where r.id=orig.id);
ANALYZE TABLE TARGET.result_fos COMPUTE STATISTICS;
+create table TARGET.result_accessroute stored as parquet as select * from SOURCE.result_accessroute orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+ANALYZE TABLE TARGET.result_accessroute COMPUTE STATISTICS;
+
create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result);
create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result);
create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;
@@ -213,6 +234,8 @@ ANALYZE TABLE TARGET.indi_result_no_of_copies COMPUTE STATISTICS;
---- Sprint 6 ----
create table TARGET.indi_pub_hybrid_oa_with_cc stored as parquet as select * from SOURCE.indi_pub_hybrid_oa_with_cc orig where exists (select 1 from TARGET.result r where r.id=orig.id);
ANALYZE TABLE TARGET.indi_pub_hybrid_oa_with_cc COMPUTE STATISTICS;
+create table TARGET.indi_pub_bronze_oa stored as parquet as select * from SOURCE.indi_pub_bronze_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+ANALYZE TABLE TARGET.indi_pub_bronze_oa COMPUTE STATISTICS;
create table TARGET.indi_pub_downloads stored as parquet as select * from SOURCE.indi_pub_downloads orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
ANALYZE TABLE TARGET.indi_pub_downloads COMPUTE STATISTICS;
create table TARGET.indi_pub_downloads_datasource stored as parquet as select * from SOURCE.indi_pub_downloads_datasource orig where exists (select 1 from TARGET.result r where r.id=orig.result_id);
@@ -241,3 +264,7 @@ create table TARGET.indi_pub_in_subscribed stored as parquet as select * from SO
ANALYZE TABLE TARGET.indi_pub_in_subscribed COMPUTE STATISTICS;
create table TARGET.indi_result_with_pid stored as parquet as select * from SOURCE.indi_result_with_pid orig where exists (select 1 from TARGET.result r where r.id=orig.id);
ANALYZE TABLE TARGET.indi_result_with_pid COMPUTE STATISTICS;
+create table TARGET.indi_impact_measures stored as parquet as select * from SOURCE.indi_impact_measures orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+ANALYZE TABLE TARGET.indi_impact_measures COMPUTE STATISTICS;
+create table TARGET.indi_pub_interdisciplinarity stored as parquet as select * from SOURCE.indi_pub_interdisciplinarity orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+ANALYZE TABLE TARGET.indi_pub_interdisciplinarity COMPUTE STATISTICS;
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql
new file mode 100644
index 000000000..92b40405d
--- /dev/null
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs.sql
@@ -0,0 +1,15 @@
+drop database if exists TARGET cascade;
+create database if not exists TARGET;
+
+create table TARGET.result stored as parquet as
+ select distinct * from (
+ select * from SOURCE.result r where exists
+ (select 1
+ from SOURCE.result_concepts rc
+ join SOURCE.concept conc on conc.id=rc.concept
+ join SOURCE.category cat on cat.id=conc.category
+ join SOURCE.context cont on cont.id=cat.context
+-- join SOURCE.result
+ where rc.id=r.id and conc.category like CONTEXT)
+) foo;
+ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs_tail.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs_tail.sql
new file mode 100644
index 000000000..ef6d08d79
--- /dev/null
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_RIs_tail.sql
@@ -0,0 +1,15 @@
+drop database if exists TARGET cascade;
+create database if not exists TARGET;
+
+create table TARGET.result stored as parquet as
+ select distinct * from (
+ select * from SOURCE.result r where exists
+ (select 1
+ from SOURCE.result_concepts rc
+ join SOURCE.concept conc on conc.id=rc.concept
+ join SOURCE.category cat on cat.id=conc.category
+ join SOURCE.context cont on cont.id=cat.context
+-- join SOURCE.result
+ where rc.id=r.id and conc.category not in (CONTEXTS))
+) foo;
+ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_funded.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_funded.sql
new file mode 100644
index 000000000..8d8739c74
--- /dev/null
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_funded.sql
@@ -0,0 +1,9 @@
+drop database if exists TARGET cascade;
+create database if not exists TARGET;
+
+create table TARGET.result stored as parquet as
+ select distinct * from (
+ select * from SOURCE.result r where exists (select 1 from SOURCE.result_projects rp join SOURCE.project p on rp.project=p.id where rp.id=r.id)
+ ) foo;
+
+ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql
new file mode 100644
index 000000000..121ee6e7f
--- /dev/null
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB_institutions.sql
@@ -0,0 +1,56 @@
+drop database if exists TARGET cascade;
+create database if not exists TARGET;
+
+create table TARGET.result stored as parquet as
+ select distinct * from (
+ select * from SOURCE.result r where exists (select 1 from SOURCE.result_organization ro where ro.id=r.id and ro.organization in (
+ 'openorgs____::b84450f9864182c67b8611b5593f4250', --"Athena Research and Innovation Center In Information Communication & Knowledge Technologies', --ARC"
+ 'openorgs____::d41cf6bd4ab1b1362a44397e0b95c975', --National Research Council
+ 'openorgs____::d2a09b9d5eabb10c95f9470e172d05d2', --??? Not exists ??
+ 'openorgs____::d169c7407dd417152596908d48c11460', --Masaryk University
+ 'openorgs____::1ec924b1759bb16d0a02f2dad8689b21', --University of Belgrade
+ 'openorgs____::0ae431b820e4c33db8967fbb2b919150', --University of Helsinki
+ 'openorgs____::759d59f05d77188faee99b7493b46805', --University of Minho
+ 'openorgs____::cad284878801b9465fa51a95b1d779db', --Universidad Politécnica de Madrid
+ 'openorgs____::eadc8da90a546e98c03f896661a2e4d4', --University of Göttingen
+ 'openorgs____::c0286313e36479eff8676dba9b724b40', --National and Kapodistrian University of Athens
+ -- 'openorgs____::c80a8243a5e5c620d7931c88d93bf17a', --Université Paris Diderot
+ 'openorgs____::c08634f0a6b0081c3dc6e6c93a4314f3', --Bielefeld University
+ 'openorgs____::6fc85e4a8f7ecaf4b0c738d010e967ea', --University of Southern Denmark
+ 'openorgs____::3d6122f87f9a97a99d8f6e3d73313720', --Humboldt-Universität zu Berlin
+ 'openorgs____::16720ada63d0fa8ca41601feae7d1aa5', --TU Darmstadt
+ 'openorgs____::ccc0a066b56d2cfaf90c2ae369df16f5', --KU Leuven
+ 'openorgs____::4c6f119632adf789746f0a057ed73e90', --University of the Western Cape
+ 'openorgs____::ec3665affa01aeafa28b7852c4176dbd', --Rudjer Boskovic Institute
+ 'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
+ 'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
+ 'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
+ 'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
+ 'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
+ 'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
+ 'openorgs____::11f7919dadc8f8a7251af54bba60c956', -- Technical University of Crete
+ 'openorgs____::84f0c5f5dbb6daf42748485924efde4b', -- University of Piraeus
+ 'openorgs____::4ac562f0376fce3539504567649cb373', -- University of Patras
+ 'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki
+ 'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank
+ 'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech
+ 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University
+ 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona
+ 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University
+ 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia
+ 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University
+ 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje
+ 'openorgs____::db7686f30f22cbe73a4fde872ce812a6', -- University of Milan
+ 'openorgs____::b8b8ca674452579f3f593d9f5e557483', -- University College Cork
+ 'openorgs____::38d7097854736583dde879d12dacafca' -- Brown University
+ 'openorgs____::57784c9e047e826fefdb1ef816120d92', --Arts et Métiers ParisTech
+ 'openorgs____::2530baca8a15936ba2e3297f2bce2e7e', -- University of Cape Town
+ 'openorgs____::d11f981828c485cd23d93f7f24f24db1', -- Technological University Dublin
+ 'openorgs____::5e6bf8962665cdd040341171e5c631d8', -- Delft University of Technology
+ 'openorgs____::846cb428d3f52a445f7275561a7beb5d', -- University of Manitoba
+ 'openorgs____::eb391317ed0dc684aa81ac16265de041', -- Universitat Rovira i Virgili
+ 'openorgs____::66aa9fc2fceb271423dfabcc38752dc0', -- Lund University
+ 'openorgs____::3cff625a4370d51e08624cc586138b2f' -- IMT Atlantique
+ ))) foo;
+
+ANALYZE TABLE TARGET.result COMPUTE STATISTICS;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
index 68ef4595e..2ab50fb29 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
@@ -374,25 +374,29 @@
${monitor_db_name}
${monitor_db_shadow_name}
${wf:appPath()}/scripts/step20-createMonitorDB.sql
+ ${wf:appPath()}/scripts/step20-createMonitorDB_funded.sql
+ ${wf:appPath()}/scripts/step20-createMonitorDB_institutions.sql
+ ${wf:appPath()}/scripts/step20-createMonitorDB_RIs.sql
+ ${wf:appPath()}/scripts/step20-createMonitorDB_RIs_tail.sql
monitor.sh
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
- monitor-post.sh
- ${monitor_db_name}
- ${monitor_db_shadow_name}
- monitor-post.sh
-
+
+
+
+
+
+
+
+
+
+
+
+
+
${jobTracker}