119 lines
5.4 KiB
SQL
119 lines
5.4 KiB
SQL
----------------------------------------------------------------
|
|
----------------------------------------------------------------
|
|
-- Post processing - Updates on main tables
|
|
----------------------------------------------------------------
|
|
----------------------------------------------------------------
|
|
|
|
--Datasource temporary table updates
|
|
UPDATE ${stats_db_name}.datasource_tmp
|
|
SET harvested='true'
|
|
WHERE datasource_tmp.id IN (SELECT DISTINCT d.id
|
|
FROM ${stats_db_name}.datasource_tmp d,
|
|
${stats_db_name}.result_datasources rd
|
|
WHERE d.id = rd.datasource);
|
|
|
|
-- Project temporary table update and final project table creation with final updates that can not be applied to ORC tables
|
|
UPDATE ${stats_db_name}.project_tmp
|
|
SET haspubs='yes'
|
|
WHERE project_tmp.id IN (SELECT pr.id
|
|
FROM ${stats_db_name}.project_results pr,
|
|
${stats_db_name}.result r
|
|
WHERE pr.result = r.id
|
|
AND r.type = 'publication');
|
|
|
|
CREATE TABLE ${stats_db_name}.project stored as parquet as
|
|
SELECT p.id,
|
|
p.acronym,
|
|
p.title,
|
|
p.funder,
|
|
p.funding_lvl0,
|
|
p.funding_lvl1,
|
|
p.funding_lvl2,
|
|
p.ec39,
|
|
p.type,
|
|
p.startdate,
|
|
p.enddate,
|
|
p.start_year,
|
|
p.end_year,
|
|
p.duration,
|
|
CASE WHEN prr1.id IS NULL THEN 'no' ELSE 'yes' END AS haspubs,
|
|
CASE WHEN prr1.id IS NULL THEN 0 ELSE prr1.np END AS numpubs,
|
|
CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.daysForlastPub END AS daysforlastpub,
|
|
CASE WHEN prr2.id IS NULL THEN 0 ELSE prr2.dp END AS delayedpubs,
|
|
p.callidentifier,
|
|
p.code,
|
|
p.totalcost
|
|
FROM ${stats_db_name}.project_tmp p
|
|
LEFT JOIN (SELECT pr.id, count(distinct pr.result) AS np
|
|
FROM ${stats_db_name}.project_results pr
|
|
INNER JOIN ${stats_db_name}.result r ON pr.result = r.id
|
|
WHERE r.type = 'publication'
|
|
GROUP BY pr.id) AS prr1 on prr1.id = p.id
|
|
LEFT JOIN (SELECT pp.id,
|
|
max(datediff(to_date(r.date), to_date(pp.enddate))) AS daysForlastPub,
|
|
count(distinct r.id) AS dp
|
|
FROM ${stats_db_name}.project_tmp pp,
|
|
${stats_db_name}.project_results pr,
|
|
${stats_db_name}.result r
|
|
WHERE pp.id = pr.id
|
|
AND pr.result = r.id
|
|
AND r.type = 'publication'
|
|
AND datediff(to_date(r.date), to_date(pp.enddate)) > 0
|
|
GROUP BY pp.id) AS prr2
|
|
ON prr2.id = p.id;
|
|
|
|
UPDATE ${stats_db_name}.publication_tmp
|
|
SET delayed = 'yes'
|
|
WHERE publication_tmp.id IN (SELECT distinct r.id
|
|
FROM stats_wf_db_obs.result r,
|
|
${stats_db_name}.project_results pr,
|
|
${stats_db_name}.project_tmp p
|
|
WHERE r.id = pr.result
|
|
AND pr.id = p.id
|
|
AND to_date(r.date) - to_date(p.enddate) > 0);
|
|
|
|
UPDATE ${stats_db_name}.dataset_tmp
|
|
SET delayed = 'yes'
|
|
WHERE dataset_tmp.id IN (SELECT distinct r.id
|
|
FROM stats_wf_db_obs.result r,
|
|
${stats_db_name}.project_results pr,
|
|
${stats_db_name}.project_tmp p
|
|
WHERE r.id = pr.result
|
|
AND pr.id = p.id
|
|
AND to_date(r.date) - to_date(p.enddate) > 0);
|
|
|
|
UPDATE ${stats_db_name}.software_tmp
|
|
SET delayed = 'yes'
|
|
WHERE software_tmp.id IN (SELECT distinct r.id
|
|
FROM ${stats_db_name}.result r,
|
|
${stats_db_name}.project_results pr,
|
|
${stats_db_name}.project_tmp p
|
|
WHERE r.id = pr.result
|
|
AND pr.id = p.id
|
|
AND to_date(r.date) - to_date(p.enddate) > 0);
|
|
|
|
UPDATE ${stats_db_name}.otherresearchproduct_tmp
|
|
SET delayed = 'yes'
|
|
WHERE otherresearchproduct_tmp.id IN (SELECT distinct r.id
|
|
FROM ${stats_db_name}.result r,
|
|
${stats_db_name}.project_results pr,
|
|
${stats_db_name}.project_tmp p
|
|
WHERE r.id = pr.result
|
|
AND pr.id = p.id
|
|
AND to_date(r.date) - to_date(p.enddate) > 0);
|
|
|
|
CREATE OR REPLACE VIEW ${stats_db_name}.project_results_publication AS
|
|
SELECT result_projects.id AS result,
|
|
result_projects.project AS project_results,
|
|
result.date as resultdate,
|
|
project.enddate as projectenddate,
|
|
result_projects.daysfromend AS daysfromend
|
|
FROM ${stats_db_name}.result_projects,
|
|
${stats_db_name}.result,
|
|
${stats_db_name}.project
|
|
WHERE result_projects.id = result.id
|
|
AND result.type = 'publication'
|
|
AND project.id = result_projects.project;
|
|
|
|
-- ANALYZE TABLE ${stats_db_name}.project COMPUTE STATISTICS;
|
|
-- ANALYZE TABLE ${stats_db_name}.project COMPUTE STATISTICS FOR COLUMNS; |