data4impact/apps/data4impact-import-scripts/fixRelations.sql

58 lines
1.6 KiB
SQL

-- Population of the doc_project relation using the data from project -> project_doc_other_id -> doc_other_identifier -> document
insert into
doc_project(projectid, docid, inferred)
select
p.projectid,
d.docid,
true as inferred
from
project_doc_other_id p
left outer join doc_other_identifier d on (p.docid = d.id and p.docidtype = d.idtype)
where
d.docid is not null
on conflict do nothing;
-- Population of the doc_doc relation using the data from document(eg: guidelines) -> doc_doc_other_id -> doc_other_identifier -> document(eg: publication)
insert into
doc_doc(docid1, docid2, reltype, inferred)
select
d.docid1 as docid1,
i.docid as docid2,
d.reltype as reltype,
true as inferred
from
doc_doc_other_id d
left outer join doc_other_identifier i on (d.docid2 = i.id and d.docid2type = i.idtype)
where
i.docid is not null
on conflict do nothing;
-- Remove redundant doc_project relations (references to MOCK PROJECTS would be counted twice, otherwise)
create table temp_delete_doc_project as select t.docid||'@'||t.projectid as item from (
select
dp.docid,
unnest(array_agg(dp.projectid)) as projectid
from
doc_project dp
left outer join project p on (dp.projectid = p.id)
group by
dp.docid, p.funder
having
array_to_string(array_agg(dp.projectid), ',', '') like '%MOCK_PROJECT%'
and array_length(array_agg(DISTINCT dp.projectid), 1) > 1
) as t
where t.projectid like '40|MOCK_PROJECT::%';
create index temp_delete_doc_project_item_idx on temp_delete_doc_project(item);
delete from doc_project where docid||'@'||projectid in (
select item
from temp_delete_doc_project
);
drop table temp_delete_doc_project;