diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java
index cd07cfcb1d..8daf318bea 100644
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java
@@ -76,6 +76,18 @@ public class IndexRecordTransformerTest {
testRecordTransformation(record);
}
+ @Test
+ public void testForEOSCFutureDataTransferPilot() throws IOException, TransformerException {
+ final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/data-transfer-pilot.xml"));
+ testRecordTransformation(record);
+ }
+
+ @Test
+ public void testForEOSCFutureTraining() throws IOException, TransformerException {
+ final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/training-notebooks-seadatanet.xml"));
+ testRecordTransformation(record);
+ }
+
private void testRecordTransformation(final String record) throws IOException, TransformerException {
final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml"));
final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"));
diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml
new file mode 100644
index 0000000000..23dd6c6ed0
--- /dev/null
+++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml
@@ -0,0 +1,72 @@
+
+
+
+ r37b0ad08687::dec0d8520e726f2adda9a51280ac7299
+ 2021-09-22T08:53:16Z
+ under curation
+
+
+
+
+
+ EGI-Foundation/data-transfer-pilot: Include libraries in environment.yml
+ Giuseppe La Rocca
+ Enol Fernández
+ Andrea Manzi
+
+
+
+ This notebook is used to demonstrate how a scientist from one of the PaNOSC RIs can use the resources provided by EGI to perform analysis on the data sets obtained during an expirement.
+
+ EOSC Jupyter Notebook
+
+ Zenodo
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ oai:zenodo.org:4218562
+
+ oai:zenodo.org:4218562
+ 10.5281/zenodo.4218562
+
+
+ false
+ false
+ 0.9
+
+
+
+
+
+
+
+
+
+
+
+
+ https://zenodo.org/record/4218562
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml
new file mode 100644
index 0000000000..9995b902f6
--- /dev/null
+++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml
@@ -0,0 +1,71 @@
+
+
+
+ r37b0ad08687::eb430fb7438e1533ba95d6aa50a477eb
+ 2021-09-22T08:53:13Z
+ under curation
+
+
+
+
+
+
+ EGI-Foundation/training-notebooks-seadatanet: Version 0.4
+ Enol Fernández
+
+
+
+ A sample notebook using SeaDataNet data to plot a map that shows surface temperature of Black Sea, Arctic Sea and Baltic Sea. The data is available at EGI DataHub with PID http://hdl.handle.net/21.T15999/qVk6JWQ (run at EGI Notebooks service for easy access to data).This release updates the PID for the data.
+
+ EOSC Jupyter Notebook
+
+ Zenodo
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ oai:zenodo.org:3561323
+
+ oai:zenodo.org:3561323
+ 10.5281/zenodo.3561323
+
+
+ false
+ false
+ 0.9
+
+
+
+
+
+
+
+
+
+
+
+
+ https://zenodo.org/record/3561323
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml
index c47975c9d9..910a366f6d 100644
--- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml
+++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml
@@ -15,7 +15,13 @@
-
+
+
+
+
+
+
+
@@ -28,7 +34,8 @@
-
+
+
@@ -79,6 +86,7 @@
+
@@ -105,7 +113,7 @@
-
+
@@ -130,7 +138,7 @@
-
+
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh
index 6c5823b0ca..6d42ab13d4 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh
@@ -16,7 +16,7 @@ curl -L ${CONTEXT_API}/contexts/?type=ri,community -H "accept: application/json"
cat contexts.csv | cut -d , -f1 | xargs -I {} curl -L ${CONTEXT_API}/context/{}/?all=true | /usr/local/sbin/jq -r '.[]|"\(.id|split(":")[0]),\(.id),\(.label)"' > categories.csv
cat categories.csv | cut -d , -f2 | sed 's/:/%3A/g'| xargs -I {} curl -L ${CONTEXT_API}/context/category/{}/?all=true | /usr/local/sbin/jq -r '.[]|"\(.id|split("::")[0])::\(.id|split("::")[1]),\(.id),\(.label)"' > concepts.csv
cat contexts.csv | sed 's/^\(.*\),\(.*\)/\1,\1::other,\2/' >> categories.csv
-cat categories.csv | grep -v ::other | sed 's/^.*,\(.*\),\(.*\)/\1,\1::other,\2/' >> concepts.csv
+cat categories.csv | sed 's/^.*,\(.*\),\(.*\)/\1,\1::other,\2/' >> concepts.csv
echo "uploading context data to hdfs"
hdfs dfs -mkdir ${TMP}
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh
index fb944f4ffb..93faa43d63 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh
@@ -15,5 +15,5 @@ hdfs dfs -copyToLocal $SCRIPT_PATH
echo "Creating indicators"
impala-shell -q "invalidate metadata"
impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -c -f -
-cat step16_7-createIndicatorsTables.sql | impala-shell -d $TARGET -f -
+cat step16-createIndicatorsTables.sql | impala-shell -d $TARGET -f -
echo "Indicators created"
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh
similarity index 58%
rename from dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh
rename to dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh
index ff03bca038..db8d39af2a 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh
@@ -9,16 +9,9 @@ fi
export SOURCE=$1
export TARGET=$2
export SHADOW=$3
-export SCRIPT_PATH=$4
-echo "Getting file from " $4
-hdfs dfs -copyToLocal $4
-
-echo "Creating observatory database"
-impala-shell -q "drop database if exists ${TARGET} cascade"
-impala-shell -q "create database if not exists ${TARGET}"
-impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f -
-cat step21-createObservatoryDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | impala-shell -f -
+impala-shell -q "invalidate metadata;"
+impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f -
echo "Impala shell finished"
echo "Updating shadow observatory database"
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-pre.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-pre.sh
new file mode 100644
index 0000000000..92543b8b8c
--- /dev/null
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-pre.sh
@@ -0,0 +1,16 @@
+export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs
+export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami)
+if ! [ -L $link_folder ]
+then
+ rm -Rf "$link_folder"
+ ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder}
+fi
+
+export SOURCE=$1
+export TARGET=$2
+export SHADOW=$3
+
+echo "Creating observatory database"
+impala-shell -q "drop database if exists ${TARGET} cascade"
+impala-shell -q "create database if not exists ${TARGET}"
+impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f -
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql
index 77fbd3b18d..fc0162a9c5 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql
@@ -23,6 +23,11 @@ CREATE OR REPLACE VIEW ${stats_db_name}.rndexpediture AS
SELECT *
FROM ${external_stats_db_name}.rndexpediture;
+CREATE OR REPLACE VIEW ${stats_db_name}.licenses_normalized AS
+SELECT *
+FROM ${external_stats_db_name}.licenses_normalized;
+
+
------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------
-- Creation date of the database
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql
similarity index 100%
rename from dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql
rename to dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
similarity index 100%
rename from dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql
rename to dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql
deleted file mode 100644
index 481fd9e8c2..0000000000
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql
+++ /dev/null
@@ -1,62 +0,0 @@
-----------------------------------------------------
--- Shortcuts for various definitions in stats db ---
-----------------------------------------------------
-
--- Peer reviewed:
--- Results that have been collected from Crossref
-create table ${stats_db_name}.result_peerreviewed as
-with peer_reviewed as (
- select distinct r.id as id
- from ${stats_db_name}.result r
- join ${stats_db_name}.result_sources rs on rs.id=r.id
- join ${stats_db_name}.datasource d on d.id=rs.datasource
- where d.name='Crossref')
-select distinct peer_reviewed.id as id, true as peer_reviewed
-from peer_reviewed
-union all
-select distinct r.id as id, false as peer_reviewed
-from ${stats_db_name}.result r
-left outer join peer_reviewed pr on pr.id=r.id
-where pr.id is null;
-
--- Green OA:
--- OA results that are hosted by an Institutional repository and have NOT been harvested from a DOAJ journal.
-create table ${stats_db_name}.result_greenoa as
-with result_green as (
- select distinct r.id as id
- from ${stats_db_name}.result r
- join ${stats_db_name}.result_datasources rd on rd.id=r.id
- join ${stats_db_name}.datasource d on d.id=rd.datasource
- left outer join (
- select rd.id from ${stats_db_name}.result_datasources rd
- join ${stats_db_name}.datasource d on rd.datasource=d.id
- join ${stats_db_name}.datasource_sources sds on sds.id=d.id
- join ${stats_db_name}.datasource sd on sd.id=sds.datasource
- where sd.name='DOAJ-ARTICLES'
- ) as doaj on doaj.id=r.id
- where r.bestlicence in ('Open Access', 'Open Source') and d.type='Institutional Repository' and doaj.id is null)
-select distinct result_green.id, true as green
-from result_green
-union all
-select distinct r.id as id, false as green
-from ${stats_db_name}.result r
-left outer join result_green rg on rg.id=r.id
-where rg.id is null;
-
--- GOLD OA:
--- OA results that have been harvested from a DOAJ journal.
-create table ${stats_db_name}.result_gold as
-with result_gold as (
- select distinct r.id as id
- from ${stats_db_name}.result r
- join ${stats_db_name}.result_datasources rd on rd.id=r.id
- join ${stats_db_name}.datasource d on d.id=rd.datasource
- join ${stats_db_name}.datasource_sources sds on sds.id=d.id
- join ${stats_db_name}.datasource sd on sd.id=sds.datasource
- where r.type='publication' and r.bestlicence='Open Access' and sd.name='DOAJ-Articles')
-select distinct result_gold.id, true as gold
-from result_gold
-union all
-select distinct r.id, false as gold
-from ${stats_db_name}.result r
-where r.id not in (select id from result_gold);
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql
new file mode 100644
index 0000000000..6b4d9b1b0a
--- /dev/null
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql
@@ -0,0 +1,22 @@
+----------------------------------------------------
+-- Shortcuts for various definitions in stats db ---
+----------------------------------------------------
+
+-- Peer reviewed:
+create table ${stats_db_name}.result_peerreviewed as
+select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed
+from ${stats_db_name}.result r
+left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id
+left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id;
+
+-- Green OA:
+create table ${stats_db_name}.result_greenoa as
+select r.id, case when green.green_oa=1 then true else false end as green
+from ${stats_db_name}.result r
+left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id;
+
+-- GOLD OA:
+create table ${stats_db_name}.result_gold as
+select r.id, case when gold.gold_oa=1 then true else false end as gold
+from ${stats_db_name}.result r
+ left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
index 5da0283047..9ea50d4886 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
@@ -104,25 +104,42 @@ create table TARGET.project_results as select id as result, project as id from T
compute stats TARGET.project_results;
-- indicators
-create table TARGET.indi_pub_green_oa as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-compute stats TARGET.indi_pub_green_oa;
-
-create table TARGET.indi_pub_grey_lit as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-compute stats TARGET.indi_pub_grey_lit;
-
-create table TARGET.indi_pub_doi_from_crossref as select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-compute stats TARGET.indi_pub_doi_from_crossref;
-
-create table TARGET.indi_pub_gold_oa as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
-compute stats TARGET.indi_pub_gold_oa;
-
+create view TARGET.indi_dataset_avg_year_content_oa as select * from SOURCE.indi_dataset_avg_year_content_oa orig;
+create view TARGET.indi_dataset_avg_year_context_oa as select * from SOURCE.indi_dataset_avg_year_context_oa orig;
create view TARGET.indi_dataset_avg_year_country_oa as select * from SOURCE.indi_dataset_avg_year_country_oa orig;
+
+create view TARGET.indi_other_avg_year_content_oa as select * from SOURCE.indi_other_avg_year_content_oa orig;
+create view TARGET.indi_other_avg_year_context_oa as select * from SOURCE.indi_other_avg_year_context_oa orig;
+create view TARGET.indi_other_avg_year_country_oa as select * from SOURCE.indi_other_avg_year_country_oa orig;
+
create view TARGET.indi_project_datasets_count as select * from SOURCE.indi_project_datasets_count orig;
create view TARGET.indi_project_otherresearch_count as select * from SOURCE.indi_project_otherresearch_count orig;
create view TARGET.indi_project_pubs_count as select * from SOURCE.indi_project_pubs_count orig;
create view TARGET.indi_project_software_count as select * from SOURCE.indi_project_software_count orig;
+
+create view TARGET.indi_pub_avg_year_content_oa as select * from SOURCE.indi_pub_avg_year_content_oa orig;
+create view TARGET.indi_pub_avg_year_context_oa as select * from SOURCE.indi_pub_avg_year_context_oa orig;
create view TARGET.indi_pub_avg_year_country_oa as select * from SOURCE.indi_pub_avg_year_country_oa orig;
+create table TARGET.indi_pub_green_oa as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.indi_pub_green_oa;
+create table TARGET.indi_pub_grey_lit as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.indi_pub_grey_lit;
+create table TARGET.indi_pub_doi_from_crossref as select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.indi_pub_doi_from_crossref;
+create table TARGET.indi_pub_gold_oa as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.indi_pub_gold_oa;
+create table TARGET.indi_pub_has_abstract as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.indi_pub_has_abstract;
+create table TARGET.indi_pub_has_cc_licence as select * from SOURCE.indi_pub_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.indi_pub_has_cc_licence;
+create table TARGET.indi_pub_has_cc_licence_url as select * from SOURCE.indi_pub_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id);
+compute stats TARGET.indi_pub_has_cc_licence_url;
+
+create view TARGET.indi_software_avg_year_content_oa as select * from SOURCE.indi_software_avg_year_content_oa orig;
+create view TARGET.indi_software_avg_year_context_oa as select * from SOURCE.indi_software_avg_year_context_oa orig;
+create view TARGET.indi_software_avg_year_country_oa as select * from SOURCE.indi_software_avg_year_country_oa orig;
+
--denorm
alter table TARGET.result rename to TARGET.res_tmp;
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql
index 40cdf3f6d9..e24370e7d3 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql
@@ -1,259 +1,561 @@
-create table TARGET.result_affiliated_country stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
- r.peer_reviewed, r.type, c.code as ccode, c.name as cname
-from SOURCE.result r
-join SOURCE.result_organization ro on ro.id=r.id
-join SOURCE.organization o on o.id=ro.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, c.code, c.name;
+create table ${observatory_db_name}.result_cc_licence stored as parquet as
+select r.id, coalesce(rln.count, 0) > 0 as cc_licence
+from ${stats_db_name}.result r
+ left outer join (
+ select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count
+ from ${stats_db_name}.result_licenses rl
+ left outer join ${stats_db_name}.licenses_normalized rln on rl.type=rln.license
+ group by rl.id
+) rln on rln.id=r.id;
-create table TARGET.result_affiliated_year stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, r.year
-from SOURCE.result r
-join SOURCE.result_organization ro on ro.id=r.id
-join SOURCE.organization o on o.id=ro.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year;
+create table ${observatory_db_name}.result_affiliated_country stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ c.code as ccode, c.name as cname
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_organization ro on ro.id=r.id
+ join ${stats_db_name}.organization o on o.id=ro.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
-create table TARGET.result_affiliated_year_country stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
- r.peer_reviewed, r.type, r.year, c.code as ccode, c.name as cname
-from SOURCE.result r
-join SOURCE.result_organization ro on ro.id=r.id
-join SOURCE.organization o on o.id=ro.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year, c.code, c.name;
+create table ${observatory_db_name}.result_affiliated_year stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ r.year
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_organization ro on ro.id=r.id
+ join ${stats_db_name}.organization o on o.id=ro.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
-create table TARGET.result_affiliated_datasource stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, d.name as dname
-from SOURCE.result r
-join SOURCE.result_organization ro on ro.id=r.id
-join SOURCE.organization o on o.id=ro.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-left outer join SOURCE.result_datasources rd on rd.id=r.id
-left outer join SOURCE.datasource d on d.id=rd.datasource
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name;
+create table ${observatory_db_name}.result_affiliated_year_country stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ r.year, c.code as ccode, c.name as cname
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_organization ro on ro.id=r.id
+ join ${stats_db_name}.organization o on o.id=ro.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
-create table TARGET.result_affiliated_datasource_country stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
- r.peer_reviewed, r.type, d.name as dname, c.code as ccode, c.name as cname
-from SOURCE.result r
-join SOURCE.result_organization ro on ro.id=r.id
-join SOURCE.organization o on o.id=ro.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-left outer join SOURCE.result_datasources rd on rd.id=r.id
-left outer join SOURCE.datasource d on d.id=rd.datasource
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name, c.code, c.name;
+create table ${observatory_db_name}.result_affiliated_datasource stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ d.name as dname
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_organization ro on ro.id=r.id
+ join ${stats_db_name}.organization o on o.id=ro.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ left outer join ${stats_db_name}.result_datasources rd on rd.id=r.id
+ left outer join ${stats_db_name}.datasource d on d.id=rd.datasource
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
-create table TARGET.result_affiliated_organization stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
- r.peer_reviewed, r.type, o.name as oname
-from SOURCE.result r
-join SOURCE.result_organization ro on ro.id=r.id
-join SOURCE.organization o on o.id=ro.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name;
+create table ${observatory_db_name}.result_affiliated_datasource_country stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ d.name as dname, c.code as ccode, c.name as cname
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_organization ro on ro.id=r.id
+ join ${stats_db_name}.organization o on o.id=ro.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ left outer join ${stats_db_name}.result_datasources rd on rd.id=r.id
+ left outer join ${stats_db_name}.datasource d on d.id=rd.datasource
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
-create table TARGET.result_affiliated_organization_country stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
- r.peer_reviewed, r.type, o.name as oname, c.code as ccode, c.name as cname
-from SOURCE.result r
-join SOURCE.result_organization ro on ro.id=r.id
-join SOURCE.organization o on o.id=ro.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name, c.code, c.name;
+create table ${observatory_db_name}.result_affiliated_organization stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ o.name as oname
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_organization ro on ro.id=r.id
+ join ${stats_db_name}.organization o on o.id=ro.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
-create table TARGET.result_affiliated_funder stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, p.funder as pfunder
-from SOURCE.result r
-join SOURCE.result_organization ro on ro.id=r.id
-join SOURCE.organization o on o.id=ro.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-join SOURCE.result_projects rp on rp.id=r.id
-join SOURCE.project p on p.id=rp.project
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder;
+create table ${observatory_db_name}.result_affiliated_organization_country stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ o.name as oname, c.code as ccode, c.name as cname
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_organization ro on ro.id=r.id
+ join ${stats_db_name}.organization o on o.id=ro.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
-create table TARGET.result_affiliated_funder_country stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
- r.peer_reviewed, r.type, p.funder as pfunder, c.code as ccode, c.name as cname
-from SOURCE.result r
-join SOURCE.result_organization ro on ro.id=r.id
-join SOURCE.organization o on o.id=ro.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-join SOURCE.result_projects rp on rp.id=r.id
-join SOURCE.project p on p.id=rp.project
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder, c.code, c.name;
+create table ${observatory_db_name}.result_affiliated_funder stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ p.funder as pfunder
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_organization ro on ro.id=r.id
+ join ${stats_db_name}.organization o on o.id=ro.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ join ${stats_db_name}.result_projects rp on rp.id=r.id
+ join ${stats_db_name}.project p on p.id=rp.project
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
-create table TARGET.result_deposited_country stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
- r.peer_reviewed, r.type, c.code as ccode, c.name as cname
-from SOURCE.result r
-join SOURCE.result_datasources rd on rd.id=r.id
-join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
-join SOURCE.datasource_organizations dor on dor.id=d.id
-join SOURCE.organization o on o.id=dor.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, c.code, c.name;
+create table ${observatory_db_name}.result_affiliated_funder_country stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ p.funder as pfunder, c.code as ccode, c.name as cname
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_organization ro on ro.id=r.id
+ join ${stats_db_name}.organization o on o.id=ro.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ join ${stats_db_name}.result_projects rp on rp.id=r.id
+ join ${stats_db_name}.project p on p.id=rp.project
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
-create table TARGET.result_deposited_year stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, r.year
-from SOURCE.result r
-join SOURCE.result_datasources rd on rd.id=r.id
-join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
-join SOURCE.datasource_organizations dor on dor.id=d.id
-join SOURCE.organization o on o.id=dor.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year;
+create table ${observatory_db_name}.result_deposited_country stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ c.code as ccode, c.name as cname
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_datasources rd on rd.id=r.id
+ join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+ join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
+ join ${stats_db_name}.organization o on o.id=dor.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name;
-create table TARGET.result_deposited_year_country stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
- r.peer_reviewed, r.type, r.year, c.code as ccode, c.name as cname
-from SOURCE.result r
-join SOURCE.result_datasources rd on rd.id=r.id
-join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
-join SOURCE.datasource_organizations dor on dor.id=d.id
-join SOURCE.organization o on o.id=dor.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year, c.code, c.name;
+create table ${observatory_db_name}.result_deposited_year stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ r.year
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_datasources rd on rd.id=r.id
+ join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+ join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
+ join ${stats_db_name}.organization o on o.id=dor.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year;
-create table TARGET.result_deposited_datasource stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
- r.peer_reviewed, r.type, d.name as dname
-from SOURCE.result r
-join SOURCE.result_datasources rd on rd.id=r.id
-join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
-join SOURCE.datasource_organizations dor on dor.id=d.id
-join SOURCE.organization o on o.id=dor.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name;
+create table ${observatory_db_name}.result_deposited_year_country stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ r.year, c.code as ccode, c.name as cname
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_datasources rd on rd.id=r.id
+ join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+ join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
+ join ${stats_db_name}.organization o on o.id=dor.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name;
-create table TARGET.result_deposited_datasource_country stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
- r.peer_reviewed, r.type, d.name as dname, c.code as ccode, c.name as cname
-from SOURCE.result r
-join SOURCE.result_datasources rd on rd.id=r.id
-join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
-join SOURCE.datasource_organizations dor on dor.id=d.id
-join SOURCE.organization o on o.id=dor.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name, c.code, c.name;
+create table ${observatory_db_name}.result_deposited_datasource stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ d.name as dname
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_datasources rd on rd.id=r.id
+ join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+ join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
+ join ${stats_db_name}.organization o on o.id=dor.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name;
-create table TARGET.result_deposited_organization stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, o.name as oname
-from SOURCE.result r
-join SOURCE.result_datasources rd on rd.id=r.id
-join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
-join SOURCE.datasource_organizations dor on dor.id=d.id
-join SOURCE.organization o on o.id=dor.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name;
+create table ${observatory_db_name}.result_deposited_datasource_country stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ d.name as dname, c.code as ccode, c.name as cname
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_datasources rd on rd.id=r.id
+ join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+ join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
+ join ${stats_db_name}.organization o on o.id=dor.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name;
-create table TARGET.result_deposited_organization_country stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
- r.peer_reviewed, r.type, o.name as oname, c.code as ccode, c.name as cname
-from SOURCE.result r
-join SOURCE.result_datasources rd on rd.id=r.id
-join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
-join SOURCE.datasource_organizations dor on dor.id=d.id
-join SOURCE.organization o on o.id=dor.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name, c.code, c.name;
+create table ${observatory_db_name}.result_deposited_organization stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ o.name as oname
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_datasources rd on rd.id=r.id
+ join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+ join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
+ join ${stats_db_name}.organization o on o.id=dor.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name;
-create table TARGET.result_deposited_funder stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
- r.peer_reviewed, r.type, p.funder as pfunder
-from SOURCE.result r
-join SOURCE.result_datasources rd on rd.id=r.id
-join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
-join SOURCE.datasource_organizations dor on dor.id=d.id
-join SOURCE.organization o on o.id=dor.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-join SOURCE.result_projects rp on rp.id=r.id
-join SOURCE.project p on p.id=rp.project
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder;
+create table ${observatory_db_name}.result_deposited_organization_country stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ o.name as oname, c.code as ccode, c.name as cname
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_datasources rd on rd.id=r.id
+ join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+ join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
+ join ${stats_db_name}.organization o on o.id=dor.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name;
-create table TARGET.result_deposited_funder_country stored as parquet as
-select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence,
- case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
- r.peer_reviewed, r.type, p.funder as pfunder, c.code as ccode, c.name as cname
-from SOURCE.result r
-join SOURCE.result_datasources rd on rd.id=r.id
-join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
-join SOURCE.datasource_organizations dor on dor.id=d.id
-join SOURCE.organization o on o.id=dor.organization
-join SOURCE.country c on c.code=o.country and c.continent_name='Europe'
-join SOURCE.result_projects rp on rp.id=r.id
-join SOURCE.project p on p.id=rp.project
-left outer join SOURCE.result_licenses rl on rl.id=r.id
-left outer join SOURCE.result_pids pids on pids.id=r.id
-group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder, c.code, c.name;
+create table ${observatory_db_name}.result_deposited_funder stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ p.funder as pfunder
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_datasources rd on rd.id=r.id
+ join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+ join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
+ join ${stats_db_name}.organization o on o.id=dor.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ join ${stats_db_name}.result_projects rp on rp.id=r.id
+ join ${stats_db_name}.project p on p.id=rp.project
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder;
-compute stats TARGET.result_affiliated_country;
-compute stats TARGET.result_affiliated_year;
-compute stats TARGET.result_affiliated_year_country;
-compute stats TARGET.result_affiliated_datasource;
-compute stats TARGET.result_affiliated_datasource_country;
-compute stats TARGET.result_affiliated_organization;
-compute stats TARGET.result_affiliated_organization_country;
-compute stats TARGET.result_affiliated_funder;
-compute stats TARGET.result_affiliated_funder_country;
-compute stats TARGET.result_deposited_country;
-compute stats TARGET.result_deposited_year;
-compute stats TARGET.result_deposited_year_country;
-compute stats TARGET.result_deposited_datasource;
-compute stats TARGET.result_deposited_datasource_country;
-compute stats TARGET.result_deposited_organization;
-compute stats TARGET.result_deposited_organization_country;
-compute stats TARGET.result_deposited_funder;
-compute stats TARGET.result_deposited_funder_country;
+create table ${observatory_db_name}.result_deposited_funder_country stored as parquet as
+select
+ count(distinct r.id) as total,
+ r.green,
+ r.gold,
+ case when rl.type is not null then true else false end as licence,
+ case when pids.pid is not null then true else false end as pid,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa,
+ r.peer_reviewed,
+ rln.cc_licence,
+ r.abstract as abstract,
+ r.authors > 1 as multiple_authors,
+ rpc.count > 1 as multiple_projects,
+ rfc.count > 1 as multiple_funders,
+ r.type,
+ p.funder as pfunder, c.code as ccode, c.name as cname
+from ${stats_db_name}.result r
+ join ${stats_db_name}.result_datasources rd on rd.id=r.id
+ join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository')
+ join ${stats_db_name}.datasource_organizations dor on dor.id=d.id
+ join ${stats_db_name}.organization o on o.id=dor.organization
+ join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe'
+ join ${stats_db_name}.result_projects rp on rp.id=r.id
+ join ${stats_db_name}.project p on p.id=rp.project
+ left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id
+ left outer join ${stats_db_name}.result_pids pids on pids.id=r.id
+ left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id
+ left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id
+ left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id
+group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end,
+ case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract,
+ cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name;
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
index a329ca4bf6..08d33f4e8f 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml
@@ -239,14 +239,51 @@
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
-
+
-
+ ${hive_jdbc_url}
-
+
+ stats_db_name=${stats_db_name}
+ openaire_db_name=${openaire_db_name}
+
+
+
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+ contexts.sh
+ ${context_api_url}
+ ${stats_db_name}
+ contexts.sh
+
+
+
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+ indicators.sh
+ ${stats_db_name}
+ ${wf:appPath()}/scripts/step16-createIndicatorsTables.sql
+ indicators.sh
+
+
+
+
+
+
+
+ ${hive_jdbc_url}
+
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
@@ -261,48 +298,11 @@
stats_db_name=${stats_db_name}
openaire_db_name=${openaire_db_name}
-
-
-
-
-
-
- ${hive_jdbc_url}
-
- stats_db_name=${stats_db_name}
- openaire_db_name=${openaire_db_name}
-
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
- indicators.sh
- ${stats_db_name}
- ${wf:appPath()}/scripts/step16_7-createIndicatorsTables.sql
- indicators.sh
-
-
-
-
-
-
-
- ${jobTracker}
- ${nameNode}
- contexts.sh
- ${context_api_url}
- ${stats_db_name}
- contexts.sh
-
-
+
-
+ ${jobTracker}${nameNode}
@@ -326,20 +326,44 @@
${wf:appPath()}/scripts/step20-createMonitorDB.sqlmonitor.sh
+
+
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+ observatory-pre.sh
+ ${stats_db_name}
+ ${observatory_db_name}
+ ${observatory_db_shadow_name}
+ observatory-pre.sh
+
+
+ ${hive_jdbc_url}
+
+ stats_db_name=${stats_db_name}
+ observatory_db_name=${observatory_db_name}
+
+
+
+
+
+ ${jobTracker}${nameNode}
- observatory.sh
+ observatory-post.sh${stats_db_name}${observatory_db_name}${observatory_db_shadow_name}
- ${wf:appPath()}/scripts/step21-createObservatoryDB.sql
- observatory.sh
+ observatory-post.sh