From db088cc69c6878e5242e5edb8b8d1ebb894a3136 Mon Sep 17 00:00:00 2001 From: antleb Date: Tue, 7 Jun 2022 04:04:28 +0300 Subject: [PATCH 1/2] fixed *_organization tables --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql | 1 + .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql index e1c36cbc0..b5eba6111 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql @@ -127,6 +127,7 @@ CREATE TABLE ${stats_db_name}.result_organization STORED AS PARQUET AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultOrganization' + and r.target like '50|%' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.result_projects STORED AS PARQUET AS diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql index fa3eca1a9..6fa0e6fdf 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -93,7 +93,7 @@ where d.datainfo.deletedbyinference=false and d.datainfo.invisible=false; CREATE TABLE ${stats_db_name}.datasource_organizations STORED AS PARQUET AS SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS organization FROM ${openaire_db_name}.relation r -WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; +WHERE r.reltype = 'datasourceOrganization' and r.datainfo.deletedbyinference = false and r.target like '20|%' and r.datainfo.invisible=false; -- datasource sources: -- where the datasource info have been collected from. From 574492c659ea7a6d8e54f4332ba76a6ad45555fd Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Thu, 9 Jun 2022 15:48:13 +0300 Subject: [PATCH 2/2] removed double result_apc table creation from monitor --- .../graph/stats/oozie_app/scripts/step20-createMonitorDB.sql | 5 ----- 1 file changed, 5 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 3cf155869..7412910a9 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -107,11 +107,6 @@ compute stats TARGET.result_sources; create table TARGET.result_topics stored as parquet as select * from SOURCE.result_topics orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.result_topics; -create table TARGET.result_apc stored as parquet as select * from SOURCE.result_apc orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.result_apc; - - - create view TARGET.foo1 as select * from SOURCE.result_result rr where rr.source in (select id from TARGET.result); create view TARGET.foo2 as select * from SOURCE.result_result rr where rr.target in (select id from TARGET.result); create table TARGET.result_result STORED AS PARQUET as select distinct * from (select * from TARGET.foo1 union all select * from TARGET.foo2) foufou;