diff --git a/apps/dnet-orgs-database-application/src/main/resources/sql/importDedupEvents.sql b/apps/dnet-orgs-database-application/src/main/resources/sql/importDedupEvents.sql index 0283f98c..831fdca9 100644 --- a/apps/dnet-orgs-database-application/src/main/resources/sql/importDedupEvents.sql +++ b/apps/dnet-orgs-database-application/src/main/resources/sql/importDedupEvents.sql @@ -3,6 +3,7 @@ BEGIN; DELETE FROM oa_conflicts WHERE created_by = 'dedupWf' and modified_by = 'dedupWf' and reltype = 'suggested'; DELETE FROM oa_duplicates WHERE created_by = 'dedupWf' and modified_by = 'dedupWf' and reltype = 'suggested'; DELETE FROM organizations WHERE created_by = 'dedupWf' and modified_by = 'dedupWf' and status = 'suggested'; +// *** IMPORTANT : DO NOT DELETE THE RAW ORGS TO AVOID THE 'ON CASCADE' DELETIONS -- FIX ORIGINAL DATA UPDATE organizations SET id = 'pending_org_::'||MD5(id) WHERE status = 'suggested' AND id NOT LIKE 'pending_org_::%'; @@ -54,7 +55,9 @@ INSERT INTO organizations(id, name, country, status, ec_legalbody, ec_legalperso SELECT oa_original_id, oa_name, oa_country, 'raw', ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, 'dedupWf', 'dedupWf' FROM tmp_dedup_events WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%' -ON CONFLICT DO NOTHING; +ON CONFLICT(id) DO UPDATE SET + (name, country, ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, modification_date, modified_by) = + (EXCLUDED.name, EXCLUDED.country, EXCLUDED.ec_legalbody, EXCLUDED.ec_legalperson, EXCLUDED.ec_nonprofit, EXCLUDED.ec_researchorganization, EXCLUDED.ec_highereducation, EXCLUDED.ec_internationalorganizationeurinterests, EXCLUDED.ec_internationalorganization, EXCLUDED.ec_enterprise, EXCLUDED.ec_smevalidated, EXCLUDED.ec_nutscode, now(), 'dedupWf'); INSERT INTO acronyms(id, acronym) SELECT oa_original_id, oa_acronym @@ -110,11 +113,38 @@ WHERE ON CONFLICT DO NOTHING; --- CONSISTENCY +-- CONSISTENCY (respect the order of the deletions) + +-- remove the pending organizations that have been recently approved +DELETE FROM organizations +WHERE id in ( + SELECT o1.id + FROM + oa_duplicates d1 + JOIN organizations o1 ON (o1.id = d1.local_id) + JOIN oa_duplicates d2 on (d1.oa_original_id = d2.oa_original_id) + JOIN organizations o2 on (o2.id = d2.local_id) + WHERE d1.local_id != d2.local_id + AND o1.status = 'suggested' + AND o1.created_by = 'dedupWf' + AND o1.modified_by = 'dedupWf' + AND d1.reltype = 'suggested' + AND d2.reltype != 'is_different'); + -- Remove invalid suggestions (an existing relation has already been approved) DELETE FROM oa_duplicates d USING oa_duplicates d1 WHERE d.oa_original_id = d1.oa_original_id AND d.reltype = 'suggested' AND d1.reltype = 'is_similar'; + + + + + + + + + + COMMIT;