sql import

This commit is contained in:
Michele Artini 2021-04-27 10:12:05 +02:00
parent 491b86ddd0
commit 9bfb0dbda2
1 changed files with 32 additions and 2 deletions

View File

@ -3,6 +3,7 @@ BEGIN;
DELETE FROM oa_conflicts WHERE created_by = 'dedupWf' and modified_by = 'dedupWf' and reltype = 'suggested';
DELETE FROM oa_duplicates WHERE created_by = 'dedupWf' and modified_by = 'dedupWf' and reltype = 'suggested';
DELETE FROM organizations WHERE created_by = 'dedupWf' and modified_by = 'dedupWf' and status = 'suggested';
// *** IMPORTANT : DO NOT DELETE THE RAW ORGS TO AVOID THE 'ON CASCADE' DELETIONS
-- FIX ORIGINAL DATA
UPDATE organizations SET id = 'pending_org_::'||MD5(id) WHERE status = 'suggested' AND id NOT LIKE 'pending_org_::%';
@ -54,7 +55,9 @@ INSERT INTO organizations(id, name, country, status, ec_legalbody, ec_legalperso
SELECT oa_original_id, oa_name, oa_country, 'raw', ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, 'dedupWf', 'dedupWf'
FROM tmp_dedup_events
WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%'
ON CONFLICT DO NOTHING;
ON CONFLICT(id) DO UPDATE SET
(name, country, ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, modification_date, modified_by) =
(EXCLUDED.name, EXCLUDED.country, EXCLUDED.ec_legalbody, EXCLUDED.ec_legalperson, EXCLUDED.ec_nonprofit, EXCLUDED.ec_researchorganization, EXCLUDED.ec_highereducation, EXCLUDED.ec_internationalorganizationeurinterests, EXCLUDED.ec_internationalorganization, EXCLUDED.ec_enterprise, EXCLUDED.ec_smevalidated, EXCLUDED.ec_nutscode, now(), 'dedupWf');
INSERT INTO acronyms(id, acronym)
SELECT oa_original_id, oa_acronym
@ -110,11 +113,38 @@ WHERE
ON CONFLICT DO NOTHING;
-- CONSISTENCY
-- CONSISTENCY (respect the order of the deletions)
-- remove the pending organizations that have been recently approved
DELETE FROM organizations
WHERE id in (
SELECT o1.id
FROM
oa_duplicates d1
JOIN organizations o1 ON (o1.id = d1.local_id)
JOIN oa_duplicates d2 on (d1.oa_original_id = d2.oa_original_id)
JOIN organizations o2 on (o2.id = d2.local_id)
WHERE d1.local_id != d2.local_id
AND o1.status = 'suggested'
AND o1.created_by = 'dedupWf'
AND o1.modified_by = 'dedupWf'
AND d1.reltype = 'suggested'
AND d2.reltype != 'is_different');
-- Remove invalid suggestions (an existing relation has already been approved)
DELETE FROM oa_duplicates d
USING oa_duplicates d1
WHERE d.oa_original_id = d1.oa_original_id AND d.reltype = 'suggested' AND d1.reltype = 'is_similar';
COMMIT;