dnet-applications/apps/dnet-orgs-database-application/src/main/resources/sql/importDedupEvents.sql

54 lines
3.9 KiB
PL/PgSQL

BEGIN;
DELETE FROM oa_conflicts WHERE created_by = 'dedupWf' and reltype = 'suggested';
DELETE FROM oa_duplicates WHERE created_by = 'dedupWf' and reltype = 'suggested';
DELETE FROM organizations WHERE created_by = 'dedupWf' and modified_by = 'dedupWf';
-- FIX IMPORT DATA
UPDATE tmp_dedup_events SET oa_country = 'UNKNOWN' WHERE oa_country = '' OR oa_country IS NULL;
-- NEW ORGANIZATIONS
INSERT INTO organizations(id, name, country, status, ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, created_by, modified_by)
SELECT oa_original_id, oa_name, oa_country, 'suggested', ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, 'dedupWf', 'dedupWf'
FROM tmp_dedup_events
WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%' AND (oa_original_id = local_id OR local_id = '' OR local_id IS NULL)
ON CONFLICT DO NOTHING;
INSERT INTO organizations(id, name, country, status, ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, created_by, modified_by)
SELECT oa_original_id, oa_name, oa_country, 'duplicate', ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, 'dedupWf', 'dedupWf'
FROM tmp_dedup_events
WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%' AND local_id is NOT NULL AND local_id != '' AND local_id != oa_original_id
ON CONFLICT DO NOTHING;
INSERT INTO acronyms(id, acronym) SELECT oa_original_id, oa_acronym FROM tmp_dedup_events WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%' ON CONFLICT DO NOTHING;
INSERT INTO urls(id, url) SELECT oa_original_id, oa_url FROM tmp_dedup_events WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%' ON CONFLICT DO NOTHING;
INSERT INTO id_types(val, name) SELECT distinct arr[2], arr[2] FROM (SELECT string_to_array(unnest(string_to_array(pid_list, '@@@')), '###') AS arr FROM tmp_dedup_events WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%') as c ON CONFLICT DO NOTHING;
INSERT INTO other_ids(id, otherid, type) SELECT oa_original_id, arr[1] AS otherid, arr[2] AS type FROM (SELECT oa_original_id, string_to_array(unnest(string_to_array(pid_list, '@@@')), '###') AS arr FROM tmp_dedup_events WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%') as c ON CONFLICT DO NOTHING;
-- DUPLICATES
INSERT INTO oa_duplicates (local_id, oa_original_id, oa_collectedfrom, created_by, modified_by)
SELECT local_id, oa_original_id, oa_collectedfrom, 'dedupWf', 'dedupWf'
FROM tmp_dedup_events
WHERE local_id IS NOT NULL AND local_id != '' AND oa_original_id NOT LIKE 'openorgs\_\_\_\_::%' AND local_id != oa_original_id
ON CONFLICT DO NOTHING;
-- CONFLICTS (I generate all the couples)
CREATE TEMPORARY TABLE tmp_conflict_groups AS
SELECT DISTINCT group_id as gid, local_id oid from tmp_dedup_events
WHERE local_id LIKE 'openorgs\_\_\_\_::%' AND oa_original_id LIKE 'openorgs\_\_\_\_::%' AND local_id != oa_original_id AND group_id IS NOT NULL AND group_id != ''
UNION
SELECT DISTINCT group_id as gid, oa_original_id oid from tmp_dedup_events
WHERE local_id LIKE 'openorgs\_\_\_\_::%' AND oa_original_id LIKE 'openorgs\_\_\_\_::%' AND local_id != oa_original_id AND group_id IS NOT NULL AND group_id != '';
INSERT INTO oa_conflicts (id1, id2, idgroup, created_by, modified_by) SELECT DISTINCT
c1.oid, c2.oid, c1.gid, 'dedupWf', 'dedupWf'
FROM
tmp_conflict_groups c1
JOIN tmp_conflict_groups c2 ON (c1.gid = c2.gid)
WHERE
c1.oid != c2.oid
ON CONFLICT DO NOTHING;
COMMIT;