added a new working table to import the suggestions
This commit is contained in:
parent
140e14e48d
commit
8e353f7fa3
|
@ -1,6 +1,10 @@
|
||||||
CREATE OR REPLACE PROCEDURE import_dedup_events() LANGUAGE plpgsql AS $$
|
CREATE OR REPLACE PROCEDURE import_dedup_events() LANGUAGE plpgsql AS $$
|
||||||
BEGIN
|
BEGIN
|
||||||
|
|
||||||
|
-- MAKE A WORKING COPY OF THE TABLE PREPARED BY THE DEDUP JOB
|
||||||
|
DROP TABLE IF EXISTS tmp_dedup_events_work;
|
||||||
|
CREATE TABLE tmp_dedup_events_work AS SELECT * FROM tmp_dedup_events;
|
||||||
|
|
||||||
DELETE FROM oa_conflicts WHERE created_by = 'dedupWf' and modified_by = 'dedupWf' and reltype = 'suggested';
|
DELETE FROM oa_conflicts WHERE created_by = 'dedupWf' and modified_by = 'dedupWf' and reltype = 'suggested';
|
||||||
DELETE FROM oa_duplicates WHERE created_by = 'dedupWf' and modified_by = 'dedupWf' and reltype = 'suggested';
|
DELETE FROM oa_duplicates WHERE created_by = 'dedupWf' and modified_by = 'dedupWf' and reltype = 'suggested';
|
||||||
DELETE FROM organizations WHERE created_by = 'dedupWf' and modified_by = 'dedupWf' and status = 'suggested';
|
DELETE FROM organizations WHERE created_by = 'dedupWf' and modified_by = 'dedupWf' and status = 'suggested';
|
||||||
|
@ -10,51 +14,51 @@ DELETE FROM organizations WHERE created_by = 'dedupWf' and modified_by = 'dedupW
|
||||||
UPDATE organizations SET id = 'pending_org_::'||MD5(id) WHERE status = 'suggested' AND id NOT LIKE 'pending_org_::%';
|
UPDATE organizations SET id = 'pending_org_::'||MD5(id) WHERE status = 'suggested' AND id NOT LIKE 'pending_org_::%';
|
||||||
|
|
||||||
-- FIX IMPORT DATA
|
-- FIX IMPORT DATA
|
||||||
DELETE FROM tmp_dedup_events WHERE oa_original_id = '' OR oa_original_id IS NULL;
|
DELETE FROM tmp_dedup_events_work WHERE oa_original_id = '' OR oa_original_id IS NULL;
|
||||||
UPDATE tmp_dedup_events SET local_id = oa_original_id WHERE local_id = '' OR local_id IS NULL;
|
UPDATE tmp_dedup_events_work SET local_id = oa_original_id WHERE local_id = '' OR local_id IS NULL;
|
||||||
UPDATE tmp_dedup_events SET oa_country = 'UNKNOWN' WHERE oa_country = '' OR oa_country IS NULL;
|
UPDATE tmp_dedup_events_work SET oa_country = 'UNKNOWN' WHERE oa_country = '' OR oa_country IS NULL;
|
||||||
UPDATE tmp_dedup_events SET oa_name = oa_acronym WHERE oa_name = '' OR oa_name IS NULL;
|
UPDATE tmp_dedup_events_work SET oa_name = oa_acronym WHERE oa_name = '' OR oa_name IS NULL;
|
||||||
DELETE FROM tmp_dedup_events WHERE oa_name = '' OR oa_name IS NULL;
|
DELETE FROM tmp_dedup_events_work WHERE oa_name = '' OR oa_name IS NULL;
|
||||||
|
|
||||||
-- delete invalid relations (a raw org can not be suggested as duplicate and as new org)
|
-- delete invalid relations (a raw org can not be suggested as duplicate and as new org)
|
||||||
DELETE FROM tmp_dedup_events WHERE oa_original_id IN (
|
DELETE FROM tmp_dedup_events_work WHERE oa_original_id IN (
|
||||||
SELECT oa_original_id
|
SELECT oa_original_id
|
||||||
FROM tmp_dedup_events
|
FROM tmp_dedup_events_work
|
||||||
GROUP BY oa_original_id HAVING count(oa_original_id) > 1)
|
GROUP BY oa_original_id HAVING count(oa_original_id) > 1)
|
||||||
AND (local_id = '' OR local_id is NULL OR local_id = oa_original_id)
|
AND (local_id = '' OR local_id is NULL OR local_id = oa_original_id)
|
||||||
AND (group_id = '' OR group_id is NULL);
|
AND (group_id = '' OR group_id is NULL);
|
||||||
|
|
||||||
-- delete invalid relations (a raw org can not be suggested to multiple orgs)
|
-- delete invalid relations (a raw org can not be suggested to multiple orgs)
|
||||||
DELETE FROM tmp_dedup_events WHERE oa_original_id IN (
|
DELETE FROM tmp_dedup_events_work WHERE oa_original_id IN (
|
||||||
SELECT oa_original_id
|
SELECT oa_original_id
|
||||||
FROM tmp_dedup_events
|
FROM tmp_dedup_events_work
|
||||||
GROUP BY oa_original_id HAVING count(oa_original_id) > 1)
|
GROUP BY oa_original_id HAVING count(oa_original_id) > 1)
|
||||||
AND local_id NOT LIKE 'openorgs____::%';
|
AND local_id NOT LIKE 'openorgs____::%';
|
||||||
|
|
||||||
-- delete invalid groups (only one row)
|
-- delete invalid groups (only one row)
|
||||||
DELETE FROM tmp_dedup_events WHERE group_id IN (
|
DELETE FROM tmp_dedup_events_work WHERE group_id IN (
|
||||||
SELECT group_id
|
SELECT group_id
|
||||||
FROM tmp_dedup_events GROUP BY group_id
|
FROM tmp_dedup_events_work GROUP BY group_id
|
||||||
HAVING count(*) = 1
|
HAVING count(*) = 1
|
||||||
);
|
);
|
||||||
|
|
||||||
-- IMPORT MISSING TERMS
|
-- IMPORT MISSING TERMS
|
||||||
INSERT INTO id_types(val, name) SELECT distinct arr[2], arr[2] FROM (SELECT string_to_array(unnest(string_to_array(pid_list, '@@@')), '###') AS arr FROM tmp_dedup_events WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%') as c ON CONFLICT DO NOTHING;
|
INSERT INTO id_types(val, name) SELECT distinct arr[2], arr[2] FROM (SELECT string_to_array(unnest(string_to_array(pid_list, '@@@')), '###') AS arr FROM tmp_dedup_events_work WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%') as c ON CONFLICT DO NOTHING;
|
||||||
|
|
||||||
-- NEW ORGANIZATIONS (suggested)
|
-- NEW ORGANIZATIONS (suggested)
|
||||||
INSERT INTO organizations(id, name, country, status, ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, created_by, modified_by)
|
INSERT INTO organizations(id, name, country, status, ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, created_by, modified_by)
|
||||||
SELECT 'pending_org_::'||MD5(local_id), oa_name, oa_country, 'suggested', ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, 'dedupWf', 'dedupWf'
|
SELECT 'pending_org_::'||MD5(local_id), oa_name, oa_country, 'suggested', ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, 'dedupWf', 'dedupWf'
|
||||||
FROM tmp_dedup_events
|
FROM tmp_dedup_events_work
|
||||||
WHERE local_id NOT LIKE 'openorgs\_\_\_\_::%' AND local_id = oa_original_id
|
WHERE local_id NOT LIKE 'openorgs\_\_\_\_::%' AND local_id = oa_original_id
|
||||||
ON CONFLICT DO NOTHING;
|
ON CONFLICT DO NOTHING;
|
||||||
|
|
||||||
INSERT INTO acronyms(id, acronym)
|
INSERT INTO acronyms(id, acronym)
|
||||||
SELECT 'pending_org_::'||MD5(local_id), oa_acronym FROM tmp_dedup_events
|
SELECT 'pending_org_::'||MD5(local_id), oa_acronym FROM tmp_dedup_events_work
|
||||||
WHERE local_id NOT LIKE 'openorgs\_\_\_\_::%' AND local_id = oa_original_id AND oa_acronym IS NOT NULL AND oa_acronym != ''
|
WHERE local_id NOT LIKE 'openorgs\_\_\_\_::%' AND local_id = oa_original_id AND oa_acronym IS NOT NULL AND oa_acronym != ''
|
||||||
ON CONFLICT DO NOTHING;
|
ON CONFLICT DO NOTHING;
|
||||||
|
|
||||||
INSERT INTO urls(id, url)
|
INSERT INTO urls(id, url)
|
||||||
SELECT 'pending_org_::'||MD5(local_id), oa_url FROM tmp_dedup_events
|
SELECT 'pending_org_::'||MD5(local_id), oa_url FROM tmp_dedup_events_work
|
||||||
WHERE local_id NOT LIKE 'openorgs\_\_\_\_::%' AND local_id = oa_original_id AND oa_url IS NOT NULL AND oa_url != ''
|
WHERE local_id NOT LIKE 'openorgs\_\_\_\_::%' AND local_id = oa_original_id AND oa_url IS NOT NULL AND oa_url != ''
|
||||||
ON CONFLICT DO NOTHING;
|
ON CONFLICT DO NOTHING;
|
||||||
|
|
||||||
|
@ -62,7 +66,7 @@ INSERT INTO other_ids(id, otherid, type)
|
||||||
SELECT 'pending_org_::'||MD5(local_id), arr[1] AS otherid, arr[2] AS type
|
SELECT 'pending_org_::'||MD5(local_id), arr[1] AS otherid, arr[2] AS type
|
||||||
FROM (
|
FROM (
|
||||||
SELECT local_id, string_to_array(unnest(string_to_array(pid_list, '@@@')), '###') AS arr
|
SELECT local_id, string_to_array(unnest(string_to_array(pid_list, '@@@')), '###') AS arr
|
||||||
FROM tmp_dedup_events
|
FROM tmp_dedup_events_work
|
||||||
WHERE local_id NOT LIKE 'openorgs\_\_\_\_::%' AND local_id = oa_original_id
|
WHERE local_id NOT LIKE 'openorgs\_\_\_\_::%' AND local_id = oa_original_id
|
||||||
) as c
|
) as c
|
||||||
ON CONFLICT DO NOTHING;
|
ON CONFLICT DO NOTHING;
|
||||||
|
@ -70,7 +74,7 @@ ON CONFLICT DO NOTHING;
|
||||||
-- NEW ORGANIZATIONS (raw)
|
-- NEW ORGANIZATIONS (raw)
|
||||||
INSERT INTO organizations(id, name, country, status, ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, created_by, modified_by)
|
INSERT INTO organizations(id, name, country, status, ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, created_by, modified_by)
|
||||||
SELECT oa_original_id, oa_name, oa_country, 'raw', ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, 'dedupWf', 'dedupWf'
|
SELECT oa_original_id, oa_name, oa_country, 'raw', ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, 'dedupWf', 'dedupWf'
|
||||||
FROM tmp_dedup_events
|
FROM tmp_dedup_events_work
|
||||||
WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%'
|
WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%'
|
||||||
ON CONFLICT(id) DO UPDATE SET
|
ON CONFLICT(id) DO UPDATE SET
|
||||||
(name, country, ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, modification_date, modified_by) =
|
(name, country, ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, modification_date, modified_by) =
|
||||||
|
@ -78,13 +82,13 @@ ON CONFLICT(id) DO UPDATE SET
|
||||||
|
|
||||||
INSERT INTO acronyms(id, acronym)
|
INSERT INTO acronyms(id, acronym)
|
||||||
SELECT oa_original_id, oa_acronym
|
SELECT oa_original_id, oa_acronym
|
||||||
FROM tmp_dedup_events
|
FROM tmp_dedup_events_work
|
||||||
WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%' AND oa_acronym IS NOT NULL AND oa_acronym != ''
|
WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%' AND oa_acronym IS NOT NULL AND oa_acronym != ''
|
||||||
ON CONFLICT DO NOTHING;
|
ON CONFLICT DO NOTHING;
|
||||||
|
|
||||||
INSERT INTO urls(id, url)
|
INSERT INTO urls(id, url)
|
||||||
SELECT oa_original_id, oa_url
|
SELECT oa_original_id, oa_url
|
||||||
FROM tmp_dedup_events
|
FROM tmp_dedup_events_work
|
||||||
WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%' AND oa_url IS NOT NULL AND oa_url != ''
|
WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%' AND oa_url IS NOT NULL AND oa_url != ''
|
||||||
ON CONFLICT DO NOTHING;
|
ON CONFLICT DO NOTHING;
|
||||||
|
|
||||||
|
@ -93,7 +97,7 @@ SELECT oa_original_id, arr[1] AS otherid, arr[2] AS type
|
||||||
FROM (
|
FROM (
|
||||||
SELECT oa_original_id,
|
SELECT oa_original_id,
|
||||||
string_to_array(unnest(string_to_array(pid_list, '@@@')), '###') AS arr
|
string_to_array(unnest(string_to_array(pid_list, '@@@')), '###') AS arr
|
||||||
FROM tmp_dedup_events
|
FROM tmp_dedup_events_work
|
||||||
WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%'
|
WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%'
|
||||||
) as c
|
) as c
|
||||||
ON CONFLICT DO NOTHING;
|
ON CONFLICT DO NOTHING;
|
||||||
|
@ -101,23 +105,23 @@ ON CONFLICT DO NOTHING;
|
||||||
-- DUPLICATES (relations to openorgs)
|
-- DUPLICATES (relations to openorgs)
|
||||||
INSERT INTO oa_duplicates (local_id, oa_original_id, oa_collectedfrom, created_by, modified_by)
|
INSERT INTO oa_duplicates (local_id, oa_original_id, oa_collectedfrom, created_by, modified_by)
|
||||||
SELECT local_id, oa_original_id, oa_collectedfrom, 'dedupWf', 'dedupWf'
|
SELECT local_id, oa_original_id, oa_collectedfrom, 'dedupWf', 'dedupWf'
|
||||||
FROM tmp_dedup_events
|
FROM tmp_dedup_events_work
|
||||||
WHERE local_id LIKE 'openorgs\_\_\_\_::%' AND oa_original_id NOT LIKE 'openorgs\_\_\_\_::%'
|
WHERE local_id LIKE 'openorgs\_\_\_\_::%' AND oa_original_id NOT LIKE 'openorgs\_\_\_\_::%'
|
||||||
ON CONFLICT DO NOTHING;
|
ON CONFLICT DO NOTHING;
|
||||||
|
|
||||||
-- DUPLICATES (relations to suggested)
|
-- DUPLICATES (relations to suggested)
|
||||||
INSERT INTO oa_duplicates (local_id, oa_original_id, oa_collectedfrom, created_by, modified_by)
|
INSERT INTO oa_duplicates (local_id, oa_original_id, oa_collectedfrom, created_by, modified_by)
|
||||||
SELECT 'pending_org_::'||MD5(local_id), oa_original_id, oa_collectedfrom, 'dedupWf', 'dedupWf'
|
SELECT 'pending_org_::'||MD5(local_id), oa_original_id, oa_collectedfrom, 'dedupWf', 'dedupWf'
|
||||||
FROM tmp_dedup_events
|
FROM tmp_dedup_events_work
|
||||||
WHERE local_id NOT LIKE 'openorgs\_\_\_\_::%' AND oa_original_id NOT LIKE 'openorgs\_\_\_\_::%'
|
WHERE local_id NOT LIKE 'openorgs\_\_\_\_::%' AND oa_original_id NOT LIKE 'openorgs\_\_\_\_::%'
|
||||||
ON CONFLICT DO NOTHING;
|
ON CONFLICT DO NOTHING;
|
||||||
|
|
||||||
-- CONFLICTS (I generate all the couples)
|
-- CONFLICTS (I generate all the couples)
|
||||||
CREATE TEMPORARY TABLE tmp_conflict_groups AS
|
CREATE TEMPORARY TABLE tmp_conflict_groups AS
|
||||||
SELECT DISTINCT group_id as gid, local_id oid from tmp_dedup_events
|
SELECT DISTINCT group_id as gid, local_id oid from tmp_dedup_events_work
|
||||||
WHERE local_id LIKE 'openorgs\_\_\_\_::%' AND oa_original_id LIKE 'openorgs\_\_\_\_::%' AND local_id != oa_original_id AND group_id IS NOT NULL AND group_id != ''
|
WHERE local_id LIKE 'openorgs\_\_\_\_::%' AND oa_original_id LIKE 'openorgs\_\_\_\_::%' AND local_id != oa_original_id AND group_id IS NOT NULL AND group_id != ''
|
||||||
UNION
|
UNION
|
||||||
SELECT DISTINCT group_id as gid, oa_original_id oid from tmp_dedup_events
|
SELECT DISTINCT group_id as gid, oa_original_id oid from tmp_dedup_events_work
|
||||||
WHERE local_id LIKE 'openorgs\_\_\_\_::%' AND oa_original_id LIKE 'openorgs\_\_\_\_::%' AND local_id != oa_original_id AND group_id IS NOT NULL AND group_id != '';
|
WHERE local_id LIKE 'openorgs\_\_\_\_::%' AND oa_original_id LIKE 'openorgs\_\_\_\_::%' AND local_id != oa_original_id AND group_id IS NOT NULL AND group_id != '';
|
||||||
|
|
||||||
INSERT INTO oa_conflicts (id1, id2, idgroup, created_by, modified_by) SELECT DISTINCT
|
INSERT INTO oa_conflicts (id1, id2, idgroup, created_by, modified_by) SELECT DISTINCT
|
||||||
|
|
Loading…
Reference in New Issue