dnet-applications/apps/dnet-orgs-database-application/src/main/resources/sql/schema.sql

330 lines
13 KiB
PL/PgSQL

DROP VIEW IF EXISTS organizations_view;
DROP VIEW IF EXISTS organizations_info_view;
DROP VIEW IF EXISTS organizations_simple_view;
DROP VIEW IF EXISTS users_view;
DROP VIEW IF EXISTS conflict_groups_view;
DROP VIEW IF EXISTS suggestions_info_by_country_view;
DROP VIEW IF EXISTS duplicate_groups_view;
DROP TABLE IF EXISTS other_ids;
DROP TABLE IF EXISTS other_names;
DROP TABLE IF EXISTS acronyms;
DROP TABLE IF EXISTS relationships;
DROP TABLE IF EXISTS urls;
DROP TABLE IF EXISTS oa_duplicates;
DROP TABLE IF EXISTS oa_conflicts;
DROP TABLE IF EXISTS organizations;
DROP TABLE IF EXISTS org_types;
DROP TABLE IF EXISTS user_countries;
DROP TABLE IF EXISTS users;
DROP TABLE IF EXISTS user_roles;
DROP TABLE IF EXISTS countries;
DROP TABLE IF EXISTS id_types;
DROP TABLE IF EXISTS languages;
DROP SEQUENCE IF EXISTS organizations_id_seq;
CREATE TABLE org_types (val text PRIMARY KEY);
INSERT INTO org_types VALUES ('Archive'), ('Company'), ('Education'), ('Facility'), ('Government'), ('Healthcare'), ('Nonprofit'), ('Other'), ('UNKNOWN');
CREATE TABLE id_types (val text PRIMARY KEY);
INSERT INTO id_types VALUES ('CNRS'), ('FundRef'), ('HESA'), ('ISNI'), ('LinkedIn'), ('OrgRef'), ('UCAS'), ('UKPRN'), ('Wikidata'), ('GRID'), ('ROR');
CREATE TABLE languages (val text PRIMARY KEY);
INSERT INTO languages VALUES ('UNKNOWN'),
('aa'),('af'),('am'),('ar'),('as'),('az'),('ba'),('be'),('bg'),('bn'),('br'),('bs'),('ca'),('ch'),('co'),('cs'),('cy'),('da'),('de'),('dv'),
('dz'),('el'),('en'),('eo'),('es'),('et'),('eu'),('fa'),('fi'),('fo'),('fr'),('fy'),('ga'),('gd'),('gl'),('gu'),('he'),('hi'),('hr'),('ht'),
('hu'),('hy'),('id'),('is'),('it'),('iu'),('ja'),('jv'),('ka'),('kk'),('kl'),('km'),('kn'),('ko'),('ku'),('ky'),('la'),('lb'),('lo'),('lt'),
('lv'),('mg'),('mi'),('mk'),('ml'),('mn'),('mr'),('ms'),('mt'),('my'),('nb'),('ne'),('nl'),('no'),('oc'),('om'),('or'),('pa'),('pl'),('ps'),
('pt'),('rm'),('ro'),('ru'),('rw'),('sa'),('sd'),('si'),('sk'),('sl'),('sm'),('so'),('sq'),('sr'),('sv'),('sw'),('ta'),('te'),('tg'),('th'),
('tk'),('tl'),('tr'),('tt'),('ug'),('uk'),('ur'),('uz'),('vi'),('xh'),('yo'),('zh'),('zu');
CREATE TABLE countries (val text PRIMARY KEY);
INSERT INTO countries VALUES ('UNKNOWN'), ('AD'), ('AE'), ('AF'), ('AG'), ('AL'), ('AM'), ('AO'), ('AR'), ('AT'), ('AU'), ('AW'), ('AX'), ('AZ'), ('BA'), ('BB'), ('BD'), ('BE'), ('BF'), ('BG'), ('BH'), ('BI'),
('BJ'), ('BM'), ('BN'), ('BO'), ('BQ'), ('BR'), ('BS'), ('BT'), ('BW'), ('BY'), ('BZ'), ('CA'), ('CD'), ('CF'), ('CG'), ('CH'), ('CI'), ('CL'), ('CM'), ('CN'), ('CO'), ('CR'),
('CU'), ('CV'), ('CW'), ('CY'), ('CZ'), ('DE'), ('DJ'), ('DK'), ('DM'), ('DO'), ('DZ'), ('EC'), ('EE'), ('EG'), ('EH'), ('ER'), ('ES'), ('ET'), ('FI'), ('FJ'), ('FM'), ('FO'),
('FR'), ('GA'), ('GB'), ('GD'), ('GE'), ('GF'), ('GH'), ('GI'), ('GL'), ('GM'), ('GN'), ('GP'), ('GQ'), ('GR'), ('GT'), ('GW'), ('GY'), ('HN'), ('HR'), ('HT'), ('HU'), ('ID'),
('IE'), ('IL'), ('IM'), ('IN'), ('IQ'), ('IR'), ('IS'), ('IT'), ('JE'), ('JM'), ('JO'), ('JP'), ('KE'), ('KG'), ('KH'), ('KN'), ('KP'), ('KR'), ('KW'), ('KY'), ('KZ'), ('LA'),
('LB'), ('LC'), ('LI'), ('LK'), ('LR'), ('LS'), ('LT'), ('LU'), ('LV'), ('LY'), ('MA'), ('MC'), ('MD'), ('ME'), ('MG'), ('MK'), ('ML'), ('MM'), ('MN'), ('MO'), ('MQ'), ('MR'),
('MS'), ('MT'), ('MU'), ('MV'), ('MW'), ('MX'), ('MY'), ('MZ'), ('NA'), ('NC'), ('NE'), ('NG'), ('NI'), ('NL'), ('NO'), ('NP'), ('NU'), ('NZ'), ('OM'), ('PA'), ('PE'), ('PF'),
('PG'), ('PH'), ('PK'), ('PL'), ('PS'), ('PT'), ('PW'), ('PY'), ('QA'), ('RE'), ('RO'), ('RS'), ('RU'), ('RW'), ('SA'), ('SB'), ('SC'), ('SD'), ('SE'), ('SG'), ('SI'), ('SJ'),
('SK'), ('SL'), ('SM'), ('SN'), ('SO'), ('SR'), ('SS'), ('ST'), ('SV'), ('SX'), ('SY'), ('SZ'), ('TC'), ('TD'), ('TG'), ('TH'), ('TJ'), ('TL'), ('TM'), ('TN'), ('TO'), ('TR'),
('TT'), ('TV'), ('TW'), ('TZ'), ('UA'), ('UG'), ('US'), ('UY'), ('UZ'), ('VA'), ('VC'), ('VE'), ('VG'), ('VN'), ('WS'), ('XK'), ('YE'), ('ZA'), ('ZM'), ('ZW');
CREATE TABLE user_roles(role text PRIMARY KEY);
INSERT INTO user_roles VALUES ('ADMIN'), ('NATIONAL_ADMIN'), ('USER'), ('PENDING'), ('NOT_AUTHORIZED');
CREATE TABLE users (
email text PRIMARY KEY,
valid boolean DEFAULT true,
role text NOT NULL default 'USER' REFERENCES user_roles(role)
);
CREATE TABLE user_countries (
email text REFERENCES users(email),
country text REFERENCES countries(val),
PRIMARY KEY(email, country)
);
CREATE SEQUENCE organizations_id_seq;
CREATE TABLE organizations (
id text PRIMARY KEY DEFAULT 'openorgs____::'||lpad(nextval('organizations_id_seq')::text,10,'0'),
name text,
type text NOT NULL DEFAULT 'UNKNOWN' REFERENCES org_types(val),
lat double precision,
lng double precision,
city text,
country text REFERENCES countries(val),
created_by text,
creation_date timestamp with time zone DEFAULT now(),
modified_by text,
modification_date timestamp with time zone DEFAULT now(),
status text NOT NULL DEFAULT 'pending'
);
CREATE INDEX organizations_type_idx ON organizations(type);
CREATE INDEX organizations_country_idx ON organizations(country);
CREATE TABLE other_ids (
id text REFERENCES organizations(id) ON UPDATE CASCADE,
otherid text,
type text REFERENCES id_types(val),
PRIMARY KEY (id, otherid, type)
);
CREATE INDEX other_ids_id_idx ON other_ids(id);
CREATE TABLE other_names (
id text REFERENCES organizations(id) ON UPDATE CASCADE,
name text,
lang text REFERENCES languages(val),
PRIMARY KEY (id, name, lang)
);
CREATE INDEX other_names_id_idx ON other_names(id);
CREATE TABLE acronyms (
id text REFERENCES organizations(id) ON UPDATE CASCADE,
acronym text,
PRIMARY KEY (id, acronym)
);
CREATE INDEX acronyms_id_idx ON acronyms(id);
CREATE TABLE relationships (
id1 text REFERENCES organizations(id) ON UPDATE CASCADE,
reltype text,
id2 text REFERENCES organizations(id) ON UPDATE CASCADE,
PRIMARY KEY (id1, reltype, id2)
);
CREATE INDEX relationships_id1_idx ON relationships(id1);
CREATE INDEX relationships_id2_idx ON relationships(id2);
CREATE TABLE urls (
id text REFERENCES organizations(id) ON UPDATE CASCADE,
url text,
PRIMARY KEY (id, url)
);
CREATE INDEX urls_id_idx ON urls(id);
CREATE TABLE oa_duplicates (
local_id text REFERENCES organizations(id) ON UPDATE CASCADE,
oa_original_id text NOT NULL,
oa_name text NOT NULL,
oa_acronym text,
oa_country text,
oa_url text,
oa_collectedfrom text,
reltype text NOT NULL DEFAULT 'suggested',
creation_date timestamp DEFAULT NOW(),
modification_date timestamp,
modified_by text,
PRIMARY KEY (local_id, oa_original_id)
);
CREATE INDEX oa_duplicates_local_id_idx ON oa_duplicates(local_id);
CREATE TABLE oa_conflicts (
id1 text REFERENCES organizations(id) ON UPDATE CASCADE,
id2 text REFERENCES organizations(id) ON UPDATE CASCADE,
reltype text NOT NULL DEFAULT 'suggested',
idgroup text,
creation_date timestamp DEFAULT NOW(),
modification_date timestamp,
modified_by text,
PRIMARY KEY (id1, id2)
);
CREATE INDEX oa_conflicts_id1_idx ON oa_conflicts(id1);
CREATE INDEX oa_conflicts_id2_idx ON oa_conflicts(id2);
CREATE INDEX oa_conflicts_idgroup_idx ON oa_conflicts(idgroup);
CREATE VIEW organizations_view AS SELECT
org.id,
org.name,
org.type,
org.lat,
org.lng,
org.city,
org.country,
org.status,
COALESCE(jsonb_agg(DISTINCT jsonb_build_object('id', oid.otherid, 'type', oid.type)) FILTER (WHERE oid.otherid IS NOT NULL), '[]') AS other_ids,
COALESCE(jsonb_agg(DISTINCT jsonb_build_object('name', n.name, 'lang', n.lang)) FILTER (WHERE n.name IS NOT NULL), '[]') AS other_names,
COALESCE(jsonb_agg(DISTINCT a.acronym) FILTER (WHERE a.acronym IS NOT NULL), '[]') AS acronyms,
COALESCE(jsonb_agg(DISTINCT u.url) FILTER (WHERE u.url IS NOT NULL), '[]') AS urls,
COALESCE(jsonb_agg(DISTINCT jsonb_build_object('relatedOrgId', relorg.id, 'relatedOrgName', relorg.name, 'type', r.reltype)) FILTER (WHERE relorg.id IS NOT NULL), '[]') AS relations
FROM
organizations org
LEFT OUTER JOIN other_ids oid ON (org.id = oid.id)
LEFT OUTER JOIN other_names n ON (org.id = n.id)
LEFT OUTER JOIN acronyms a ON (org.id = a.id)
LEFT OUTER JOIN urls u ON (org.id = u.id)
LEFT OUTER JOIN relationships r ON (org.id = r.id1)
LEFT OUTER JOIN organizations relorg ON (relorg.id = r.id2)
GROUP BY
org.id,
org.name,
org.type,
org.lat,
org.lng,
org.city,
org.country,
org.status;
CREATE VIEW organizations_info_view AS SELECT
org.id,
org.name,
org.created_by,
org.creation_date,
org.modified_by,
org.modification_date,
count(DISTINCT d.oa_original_id) as n_duplicates,
count(DISTINCT c.id2) as n_conflicts
FROM organizations org
LEFT OUTER JOIN oa_duplicates d ON (org.id = d.local_id AND d.reltype = 'suggested')
LEFT OUTER JOIN oa_conflicts c ON (org.id = c.id1 AND c.reltype = 'suggested')
GROUP BY org.id;
CREATE VIEW organizations_simple_view AS SELECT
org.id,
org.name,
org.type,
org.city,
org.country,
org.status,
array_remove(array_agg(DISTINCT a.acronym), NULL) AS acronyms
FROM
organizations org
LEFT OUTER JOIN acronyms a ON (org.id = a.id)
GROUP BY
org.id,
org.name,
org.type,
org.city,
org.country,
org.status;
CREATE VIEW users_view AS SELECT
u.email,
u.valid,
u.role,
array_remove(array_agg(uc.country), NULL) AS countries
FROM
users u
LEFT OUTER JOIN user_countries uc ON (u.email = uc.email)
GROUP BY u.email, u.valid, u.role
ORDER BY u.email;
CREATE VIEW suggestions_info_by_country_view AS SELECT c.val AS country,
coalesce(t1.n_duplicates, 0) AS n_duplicates,
coalesce(t2.n_conflicts, 0) AS n_conflicts,
coalesce(t3.n_pending_orgs, 0) AS n_pending_orgs
FROM countries c
LEFT OUTER JOIN (SELECT o.country AS country, count(DISTINCT d.*) AS n_duplicates FROM oa_duplicates d LEFT OUTER JOIN organizations o ON (d.local_id = o.id) WHERE d.reltype = 'suggested' GROUP BY o.country) AS t1 ON (t1.country = c.val)
LEFT OUTER JOIN (SELECT o.country AS country, count(DISTINCT c.idgroup) AS n_conflicts FROM oa_conflicts c LEFT OUTER JOIN organizations o ON (c.id1 = o.id) WHERE c.reltype = 'suggested' GROUP BY o.country) AS t2 ON (t2.country = c.val)
LEFT OUTER JOIN (SELECT o.country AS country, count(DISTINCT o.id) AS n_pending_orgs FROM organizations o WHERE o.status = 'pending' GROUP BY o.country) AS t3 ON (t3.country = c.val);
CREATE VIEW conflict_groups_view AS SELECT
c.idgroup AS idgroup,
o1.id AS id_1,
o1.name AS name_1,
o1.type AS type_1,
o1.city AS city_1,
o1.country AS country_1,
o2.id AS id_2,
o2.name AS name_2,
o2.type AS type_2,
o2.city AS city_2,
o2.country AS country_2
FROM
oa_conflicts c
LEFT OUTER JOIN organizations o1 ON (c.id1 = o1.id)
LEFT OUTER JOIN organizations o2 ON (c.id2 = o2.id)
WHERE
o1.id IS NOT NULL AND O2.id IS NOT NULL AND c.idgroup IS NOT NULL;
CREATE VIEW duplicate_groups_view AS SELECT
o.id,
o.name,
o.city,
o.country,
count(d.*) as n_duplicates
FROM
oa_duplicates d
LEFT OUTER JOIN organizations o ON (o.id = d.local_id)
WHERE
d.reltype = 'suggested'
GROUP BY o.id, o.name, o.city, o.country
ORDER BY o.name;
CREATE TABLE org_index_search(id text PRIMARY KEY, txt tsvector);
CREATE INDEX org_index_search_txt_idx ON org_index_search(txt);
INSERT INTO org_index_search(id, txt) SELECT
o.id,
to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' ' ||array_to_string(array_agg(DISTINCT a.acronym), ' ',''))
FROM organizations o
LEFT OUTER JOIN other_names n on (o.id = n.id)
LEFT OUTER JOIN acronyms a on (o.id = a.id)
GROUP BY o.id, o.name;
CREATE OR REPLACE FUNCTION delete_index_search() RETURNS trigger LANGUAGE plpgsql AS $$
BEGIN
DELETE FROM org_index_search WHERE id = old.id;
RETURN OLD;
END;
$$;
CREATE OR REPLACE FUNCTION insert_or_update_index_search_trigger() RETURNS trigger LANGUAGE plpgsql AS $$
BEGIN
INSERT INTO org_index_search(id, txt) (SELECT
o.id,
to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' ' ||array_to_string(array_agg(DISTINCT a.acronym), ' ',''))
FROM organizations o
LEFT OUTER JOIN other_names n on (o.id = n.id)
LEFT OUTER JOIN acronyms a on (o.id = a.id)
WHERE o.id = new.id
GROUP BY o.id, o.name)
ON CONFLICT (id) DO UPDATE SET txt = EXCLUDED.txt;
RETURN NEW;
END;
$$;
CREATE TRIGGER insert_or_update_index_search_trigger AFTER INSERT OR UPDATE ON organizations FOR EACH ROW EXECUTE PROCEDURE insert_or_update_index_search_trigger();
CREATE TRIGGER delete_index_search_trigger BEFORE DELETE ON organizations FOR EACH ROW EXECUTE PROCEDURE delete_index_search();