From e9686365a21dbcf4fe14e9c8ea211d4db62fe39d Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Wed, 3 Jul 2024 20:24:36 +0300 Subject: [PATCH] Improve performance of creating the "result_fos" table, by using a temp-table to cache data, which is requested multiple times. --- .../dhp/oa/graph/stats/oozie_app/scripts/step7.sql | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql index 0717d7897..bffd59ef1 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql @@ -125,14 +125,20 @@ UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_topics; /*EOS*/ + +DROP TABLE IF EXISTS ${stats_db_name}.result_fos_base_tmp purge; /*EOS*/ + +create table ${stats_db_name}.result_fos_base_tmp stored as parquet as +select /*+ COALESCE(100) */ id, topic from ${stats_db_name}.result_topics where type='Fields of Science and Technology classification'; /*EOS*/ + DROP TABLE IF EXISTS ${stats_db_name}.result_fos purge; /*EOS*/ create table ${stats_db_name}.result_fos stored as parquet as with - lvl1 as (select id, topic from ${stats_db_name}.result_topics where topic like '__ %' and type='Fields of Science and Technology classification'), - lvl2 as (select id, topic from ${stats_db_name}.result_topics where topic like '____ %' and type='Fields of Science and Technology classification'), - lvl3 as (select id, topic from ${stats_db_name}.result_topics where topic like '______ %' and type='Fields of Science and Technology classification'), - lvl4 as (select id, topic from ${stats_db_name}.result_topics where topic like '________ %' and type='Fields of Science and Technology classification') + lvl1 as (select * from ${stats_db_name}.result_fos_base_tmp where topic like '__ %'), + lvl2 as (select * from ${stats_db_name}.result_fos_base_tmp where topic like '____ %'), + lvl3 as (select * from ${stats_db_name}.result_fos_base_tmp where topic like '______ %'), + lvl4 as (select * from ${stats_db_name}.result_fos_base_tmp where topic like '________ %') select /*+ COALESCE(100) */ lvl1.id, lvl1.topic as lvl1, lvl2.topic as lvl2, lvl3.topic as lvl3, lvl4.topic as lvl4 from lvl1 join lvl2 on lvl1.id=lvl2.id and substr(lvl2.topic, 1, 2)=substr(lvl1.topic, 1, 2)