diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml
index 9331d4ac5..63fc84d75 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/config-default.xml
@@ -21,7 +21,7 @@
hive_jdbc_url
- jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000
+ jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000/;UseNativeQuery=1;?spark.executor.memory=19166291558;spark.yarn.executor.memoryOverhead=3225;spark.driver.memory=11596411699;spark.yarn.driver.memoryOverhead=1228
oozie.wf.workflow.notification.url
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql
index d699b68c3..41c3ed751 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step11.sql
@@ -42,7 +42,9 @@ SELECT p.id,
CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.dp END AS delayedpubs,
p.callidentifier,
p.code,
- p.totalcost
+ p.totalcost,
+ p.fundedamount,
+ p.currency
FROM ${stats_db_name}.project_tmp p
LEFT JOIN (SELECT pr.id, count(distinct pr.result) AS np
FROM ${stats_db_name}.project_results pr
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql
index aee66fd5e..24e1a1355 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql
@@ -59,7 +59,7 @@ UNION ALL
SELECT * FROM ${stats_db_name}.otherresearchproduct_sources;
-create table ${stats_db_name}.result_orcid STORED AS PARQUET as
+CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_orcid STORED AS PARQUET as
select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid
from (
SELECT substr(res.id, 4) as id, auth_pid.value as orcid
@@ -69,7 +69,7 @@ from (
LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type
WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res;
-create table ${stats_db_name}.result_result stored as parquet as
+CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_result stored as parquet as
select substr(rel.source, 4) as source, substr(rel.target, 4) as target, relclass, subreltype
from ${openaire_db_name}.relation rel
join ${openaire_db_name}.result r1 on rel.source=r1.id
@@ -82,7 +82,7 @@ where reltype='resultResult'
and r2.resulttype.classname != 'other'
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE;
-create table ${stats_db_name}.result_citations_oc stored as parquet as
+CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_citations_oc stored as parquet as
select substr(target, 4) as id, count(distinct substr(source, 4)) as citations
from ${openaire_db_name}.relation rel
join ${openaire_db_name}.result r1 on rel.source=r1.id
@@ -97,7 +97,7 @@ where relClass='Cites' and rel.datainfo.provenanceaction.classid = 'sysimport:cr
and rel.datainfo.deletedbyinference=false and rel.datainfo.invisible = FALSE
group by substr(target, 4);
-create table ${stats_db_name}.result_references_oc stored as parquet as
+CREATE TABLE IF NOT EXISTS ${stats_db_name}.result_references_oc stored as parquet as
select substr(source, 4) as id, count(distinct substr(target, 4)) as references
from ${openaire_db_name}.relation rel
join ${openaire_db_name}.result r1 on rel.source=r1.id
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql
index 04c7f83b9..86ead4a2c 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql
@@ -42,7 +42,7 @@ join ${stats_db_name}.result res on res.id=r.id;
create table ${stats_db_name}.result_apc as
select r.id, r.amount, r.currency
from (
- select substr(r.id, 4) as id, inst.processingchargeamount.value as amount, inst.processingchargecurrency.value as currency
+ select substr(r.id, 4) as id, cast(inst.processingchargeamount.value as float) as amount, inst.processingchargecurrency.value as currency
from ${openaire_db_name}.result r lateral view explode(r.instance) instances as inst) r
join ${stats_db_name}.result res on res.id=r.id
where r.amount is not null;
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
index 417ed6e4e..1bda07629 100755
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql
@@ -454,16 +454,16 @@ FROM publication_datasources pd
compute stats indi_pub_hybrid_oa_with_cc;
create table indi_pub_downloads stored as parquet as
-SELECT result_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats
+SELECT result_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats
join publication on result_id=id
where downloads>0
GROUP BY result_id
-order by no_dowloads desc;
+order by no_downloads desc;
compute stats indi_pub_downloads;
create table indi_pub_downloads_datasource stored as parquet as
-SELECT result_id, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats
+SELECT result_id, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats
join publication on result_id=id
where downloads>0
GROUP BY result_id, repository_id
@@ -472,7 +472,7 @@ order by result_id;
compute stats indi_pub_downloads_datasource;
create table indi_pub_downloads_year stored as parquet as
-SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us
+SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us
join publication on result_id=id where downloads>0
GROUP BY result_id, `year`
order by `year` asc;
@@ -480,7 +480,7 @@ order by `year` asc;
compute stats indi_pub_downloads_year;
create table indi_pub_downloads_datasource_year stored as parquet as
-SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us
+SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us
join publication on result_id=id
where downloads>0
GROUP BY result_id, repository_id, `year`
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
index 290acbf9f..98dca7129 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql
@@ -39,7 +39,6 @@ create table TARGET.result stored as parquet as
'openorgs____::5f31346d444a7f06a28c880fb170b0f6', --Ghent University
'openorgs____::2dbe47117fd5409f9c61620813456632', --University of Luxembourg
'openorgs____::6445d7758d3a40c4d997953b6632a368', --National Institute of Informatics (NII)
-
'openorgs____::b77c01aa15de3675da34277d48de2ec1', -- Valencia Catholic University Saint Vincent Martyr
'openorgs____::7fe2f66cdc43983c6b24816bfe9cf6a0', -- Unviersity of Warsaw
'openorgs____::15e7921fc50d9aa1229a82a84429419e', -- University Of Thessaly
@@ -49,7 +48,13 @@ create table TARGET.result stored as parquet as
'openorgs____::3e8d1f8c3f6cd7f418b09f1f58b4873b', -- Aristotle University of Thessaloniki
'openorgs____::3fcef6e1c469c10f2a84b281372c9814', -- World Bank
'openorgs____::1698a2eb1885ef8adb5a4a969e745ad3', -- École des Ponts ParisTech
- 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a' -- Nanyang Technological University
+ 'openorgs____::e15adb13c4dadd49de4d35c39b5da93a', -- Nanyang Technological University
+ 'openorgs____::4b34103bde246228fcd837f5f1bf4212', -- Autonomous University of Barcelona
+ 'openorgs____::72ec75fcfc4e0df1a76dc4c49007fceb', -- McMaster University
+ 'openorgs____::51c7fc556e46381734a25a6fbc3fd398', -- University of Modena and Reggio Emilia
+ 'openorgs____::235d7f9ad18ecd7e6dc62ea4990cb9db', -- Bilkent University
+ 'openorgs____::31f2fa9e05b49d4cf40a19c3fed8eb06', -- Saints Cyril and Methodius University of Skopje
+ 'openorgs____::db7686f30f22cbe73a4fde872ce812a6' -- University of Milan
) )) foo;
compute stats TARGET.result;
@@ -224,18 +229,3 @@ create table TARGET.indi_result_with_pid stored as parquet as select * from SOUR
--create table TARGET.indi_software_gold_oa stored as parquet as select * from SOURCE.indi_software_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id);
--compute stats TARGET.indi_software_gold_oa;
---denorm
-alter table TARGET.result rename to TARGET.res_tmp;
-
-create table TARGET.result_denorm stored as parquet as
- select distinct r.*, rp.project, p.acronym as pacronym, p.title as ptitle, p.funder as pfunder, p.funding_lvl0 as pfunding_lvl0, rd.datasource, d.name as dname, d.type as dtype
- from TARGET.res_tmp r
- left outer join TARGET.result_projects rp on rp.id=r.id
- left outer join TARGET.result_datasources rd on rd.id=r.id
- left outer join TARGET.project p on p.id=rp.project
- left outer join TARGET.datasource d on d.id=rd.datasource;
-compute stats TARGET.result_denorm;
-
-alter table TARGET.result_denorm rename to TARGET.result;
-drop table TARGET.res_tmp;
---- done!
\ No newline at end of file
diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql
index 5461afde6..c31180c14 100644
--- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql
+++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql
@@ -48,7 +48,9 @@ CREATE TABLE ${stats_db_name}.project_tmp
delayedpubs INT,
callidentifier STRING,
code STRING,
- totalcost FLOAT
+ totalcost FLOAT,
+ fundedamount FLOAT,
+ currency STRING
) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true');
INSERT INTO ${stats_db_name}.project_tmp
@@ -72,7 +74,9 @@ SELECT substr(p.id, 4) AS id,
0 AS delayedpubs,
p.callidentifier.value AS callidentifier,
p.code.value AS code,
- p.totalcost AS totalcost
+ p.totalcost AS totalcost,
+ p.fundedamount AS fundedamount,
+ p.currency.value AS currency
FROM ${openaire_db_name}.project p
WHERE p.datainfo.deletedbyinference = false and p.datainfo.invisible=false;