remove invalid path

This commit is contained in:
Michele Artini 2022-02-23 12:01:44 +01:00
parent 13c4ef23b9
commit 5ea73251e3
704 changed files with 0 additions and 125541 deletions

View File

@ -1,109 +0,0 @@
First Import
============
The first import of the organizations should be performed using the sql script: first_import_grid_ac.sql
1) Download the last dump from https://www.grid.ac
2) Update the paths in the sql script
3) Launch the script
If you want to add missing ROR identifiers:
1) Download ror.json from https://figshare.com/collections/ROR_Data/4596503
2) Update the paths in prepare_grid_ror_update.pl and update_ror_ids.sql
3) Launch prepare_grid_ror_update.pl
4) Launch update_ror_ids.sql
NB: The grid.ac dump is richer then ror dump, Ror does not consider some fiels (city, lat, lng) and hierarchical relationships among the organizations.
If grid.ac will be DEPRACATED we'll start using the import from ror (a script is available: prepare_import_ror.pl)
General Description
===================
# Schema
Main table:
organizations
Tables for Multiple properties:
acronyms,
urls,
other_ids,
other_names
Tables for vocabularies:
countries,
languages,
id_types,
org_types,
relationships (ie: child, parent, merged_in, merges, ...)
Table for conflicts and duplicates:
oa_conflicts,
oa_duplicates
Specific Views for the UI:
organizations_view
organizations_simple_view
organizations_info_view
suggestions_info_by_country_view
oa_duplicates_view
conflict_groups_view
duplicate_groups_view
To manage authorizations:
users,
user_roles,
user_countries,
users_view (VIEW)
Other:
organizations_id_seq (SEQUENCE to generate new OpenOrg IDs),
org_index_search (for fulltext search),
tmp_dedup_events (to import new suggestion from DedupWF)
# User Roles
User:
He can work only on organizations of specific countries
He can edit metadata of approved organizations
He can manage duplicates
National Admin:
All the User rights
He can work only on organizations of specific countries
He can approve/register organizations
He can manage conflicts
He can approve users of his own countries
Super Admin:
All the National Admin rights, but for all countries
# Actions
1) Create a new org from scratch
The ID is a valid OpenOrgId (generated by the system)
The status is 'approved'
2) Approve a suggested org (prefix: pending_org_::)
ID: A new org is created with OpenOrg Id and status='approved'
Copy the duplicates from old to new organizations (status will be 'suggested')
The pending org is deleted
3) Approve a suggested duplicate (the status of the duplicates is always 'raw')
in oa_duplicates: reltype = 'is_similar'
4) Discard a suggested duplicate
in oa_duplicates: reltype = 'is_different'
5) Resolve a conflict using a subset of suggested conflicts (approve)
Generate a new org
New org status: 'approved'
Conflict reltype: 'is_similar'
Old orgs status: 'hidden'
Rels new <-> old : 'merges'
Rels old <-> new : 'merged_in'
6) Resolve a conflict using a subset of suggested conflicts (discard)
Conflict reltype: 'is_different'
# Load of new suggestion using a Dedup Workflow
The dedup wf writes the suggestions on the tmp_dedup_events at the end it calls the method /import/dedupEvents
The previous suggestions (orgs, dups and conflicts) are deleted
The suggestions are moved from the temp table according to:
1) not(isOpenOrg(oa_original_id)) AND (oa_original_id = local_id OR isEmpty(local_id)) -> new suggested org with id = 'pending_org_::...'
2) not(isOpenOrg(oa_original_id)) AND (oa_original_id != local_id OR isEmpty(local_id)) -> duplicate of a suggested org
3) isOpenOrg(oa_original_id) AND (oa_original_id != local_id OR isEmpty(local_id)) -> duplicate of a existing openOrgs
4) Create a group using 'group_id', it should contain only OpenOrg Ids (using oa_original_id and local_id): each couple of the group is a conflict

View File

@ -1,115 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>apps</artifactId>
<version>3.2.4-SNAPSHOT</version>
<relativePath>../</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dnet-orgs-database-application</artifactId>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-json</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-security</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-oauth2-client</artifactId>
</dependency>
<dependency>
<groupId>org.thymeleaf.extras</groupId>
<artifactId>thymeleaf-extras-springsecurity5</artifactId>
</dependency>
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
</dependency>
<dependency>
<groupId>com.vladmihalcea</groupId>
<artifactId>hibernate-types-52</artifactId>
</dependency>
<!-- JAXB API, java.xml.bind module -->
<dependency>
<groupId>jakarta.xml.bind</groupId>
<artifactId>jakarta.xml.bind-api</artifactId>
</dependency>
<!-- JAXB Runtime, com.sun.xml.bind module -->
<dependency>
<groupId>org.glassfish.jaxb</groupId>
<artifactId>jaxb-runtime</artifactId>
</dependency>
<!-- CSV -->
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>5.4</version>
</dependency>
<!-- hot swapping, disable cache for template, enable live reload -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-devtools</artifactId>
<optional>true</optional>
</dependency>
<!-- Tests -->
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-help-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>

File diff suppressed because it is too large Load Diff

View File

@ -1,43 +0,0 @@
#!/usr/bin/perl
use File::Path 'make_path';
use JSON::Parse 'json_file_to_perl';
use Data::Dumper;
use strict;
use utf8;
binmode(STDOUT, ":utf8");
# THE LATEST VERSION OF row.json IS AVAILABLE AT https://figshare.com/collections/ROR_Data/4596503
my $inputFile = '../../../../data/ror.json';
my $outputFile = '../../../../data/ror_grid.tsv';
my $data = json_file_to_perl($inputFile);
open(my $OUT, ">$outputFile") or die("Can't open an output file");
binmode($OUT, ":utf8");
foreach my $record (@$data) {
my $rorId = $record->{'id'};
my $gridId = '';
while (my ($type, $v) = each (%{$record->{'external_ids'}})) {
if ($type eq 'GRID') {
$gridId = $v->{'all'};
}
}
if ($rorId && $gridId) {
print $OUT $rorId;
print $OUT "\t";
print $OUT $gridId;
print $OUT "\n";
}
}
close($OUT);
print "\nDone.\n\n";
1;

View File

@ -1,190 +0,0 @@
#!/usr/bin/perl
# NOTE: this script MUST NOT BE USED, it will be used only if grid.ac will be deprecated
use File::Path 'make_path';
use Digest::MD5 qw(md5_hex);
use JSON::Parse 'json_file_to_perl';
use Data::Dumper;
use strict;
use utf8;
binmode(STDOUT, ":utf8");
# THE LATEST VERSION OF row.json IS AVAILABLE AT https://figshare.com/collections/ROR_Data/4596503
my $inputFile = '../../../../data/ror.json';
my $outputDir = '../../../../data/ror_tables';
make_path $outputDir or die "Failed to create path: $outputDir" unless (-d $outputDir);
my $data = json_file_to_perl($inputFile);
open(my $OUT_ORGS , ">$outputDir/organizations.tsv") or die("Can't open an output file");
open(my $OUT_OTHER_IDS , ">$outputDir/other_ids.tsv") or die("Can't open an output file");
open(my $OUT_OTHER_NAMES , ">$outputDir/other_names.tsv") or die("Can't open an output file");
open(my $OUT_ACRONYMS , ">$outputDir/acronyms.tsv") or die("Can't open an output file");
open(my $OUT_RELS , ">$outputDir/relationships.tsv") or die("Can't open an output file");
open(my $OUT_URLS , ">$outputDir/urls.tsv") or die("Can't open an output file");
binmode($OUT_ORGS, ":utf8");
binmode($OUT_OTHER_IDS, ":utf8");
binmode($OUT_OTHER_NAMES, ":utf8");
binmode($OUT_ACRONYMS, ":utf8");
binmode($OUT_RELS, ":utf8");
binmode($OUT_URLS, ":utf8");
foreach my $record (@$data) {
my $id = 'tmp::' . md5_hex($record->{'id'});
write_orgs($id, $record, $OUT_ORGS);
write_other_ids($id, $record, $OUT_OTHER_IDS);
write_other_names($id, $record, $OUT_OTHER_NAMES);
write_acronyms($id, $record, $OUT_ACRONYMS);
# write_rels($id, $record, $OUT_RELS);
write_urls($id, $record, $OUT_URLS);
}
close($OUT_ORGS);
close($OUT_OTHER_IDS);
close($OUT_OTHER_NAMES);
close($OUT_ACRONYMS);
close($OUT_RELS);
close($OUT_URLS);
print "\nDone.\n\n";
sub write_orgs {
my ($id, $record, $OUT) = @_;
print $OUT $id;
print $OUT "\t";
print $OUT $record->{'name'};
print $OUT "\t";
print $OUT getFirstArrayElem($record->{'types'}, 'UNKNOWN');
print $OUT "\t";
print $OUT 0; # lat - TODO MISSING
print $OUT "\t";
print $OUT 0; # lng - TODO MISSING
print $OUT "\t";
print $OUT ""; # city - TODO MISSING
print $OUT "\t";
print $OUT $record->{'country'}->{'country_code'};
print $OUT "\t";
print $OUT "import:ror"; # created_by
print $OUT "\t";
print $OUT "import:ror"; # modified_by
print $OUT "\n";
}
sub write_other_ids {
my ($id, $record, $OUT) = @_;
_write_other_ids($id, $record->{'id'}, 'ror', $OUT);
while (my ($type, $v) = each (%{$record->{'external_ids'}})) {
my $all = $v->{'all'};
if (ref $all eq 'ARRAY') {
foreach my $other (@$all) {
_write_other_ids($id, $other, $type, $OUT);
}
} else {
_write_other_ids($id, $all, $type, $OUT);
}
}
}
sub _write_other_ids {
my ($id, $other, $type, $OUT) = @_;
if ($other) {
print $OUT $id;
print $OUT "\t";
print $OUT $other;
print $OUT "\t";
print $OUT $type;
print $OUT "\n";
}
}
sub write_other_names {
my ($id, $record, $OUT) = @_;
_write_other_names($id, $record->{'name'}, 'en', $OUT);
foreach my $alias (@{$record->{'aliases'}}) {
_write_other_names($id, $alias, 'UNKNOWN', $OUT);
}
foreach my $l (@{$record->{'labels'}}) {
_write_other_names($id, $l->{label}, $l->{'iso639'}, $OUT);
}
}
sub _write_other_names {
my ($id, $name, $lang, $OUT) = @_;
if ($name) {
print $OUT $id;
print $OUT "\t";
print $OUT $name;
print $OUT "\t";
print $OUT $lang;
print $OUT "\n";
}
}
sub write_acronyms {
my ($id, $record, $OUT) = @_;
foreach my $acr (@{$record->{'acronyms'}}) {
print $OUT $id;
print $OUT "\t";
print $OUT $acr;
print $OUT "\n";
}
}
sub write_rels {
my ($id, $record, $OUT) = @_;
# print $OUT $id;
# print $OUT "\t";
# print $OUT ""; # reltype - TODO
# print $OUT "\t";
# print $OUT ""; # id2 - TODO # Example: 'tmp::'||md5(o.grid_id)
# print $OUT "\n";
}
sub write_urls {
my ($id, $record, $OUT) = @_;
foreach my $url (@{$record->{'links'}}) {
print $OUT $id;
print $OUT "\t";
print $OUT $url;
print $OUT "\n";
}
}
sub getFirstArrayElem {
my ($arr, $default) = @_;
if (@$arr) {
return @$arr[0];
} else {
return $default;
}
}
1;

View File

@ -1,5 +0,0 @@
{"properties": [{
"name": "openaire.api.valid.subnet",
"type": "java.lang.String",
"description": "A description for 'openaire.api.valid.subnet'"
}]}

View File

@ -1,50 +0,0 @@
server.port=8480
spring.profiles.active=dev
maven.pom.path = /META-INF/maven/eu.dnetlib.dhp/dnet-orgs-database-application/effective-pom.xml
spring.main.banner-mode = off
logging.level.root = INFO
#logging.level.org.springframework = DEBUG
management.endpoints.web.exposure.include = prometheus,health
management.endpoints.web.base-path = /
management.endpoints.web.path-mapping.prometheus = metrics
management.endpoints.web.path-mapping.health = health
spring.datasource.url=jdbc:postgresql://localhost:5432/oa_organizations
spring.datasource.username=oa_organizations
spring.datasource.password=
# Hibernate ddl auto (create, create-drop, validate, update)
spring.jpa.hibernate.ddl-auto = validate
spring.jpa.properties.hibernate.dialect = org.hibernate.dialect.PostgreSQLDialect
spring.jpa.properties.hibernate.hbm2dll.extra_physical_table_types = MATERIALIZED VIEW
spring.jpa.properties.hibernate.jdbc.lob.non_contextual_creation=true
spring.jpa.open-in-view=true
spring.jpa.properties.hibernate.show_sql=false
spring.jpa.properties.hibernate.use_sql_comments=false
spring.jpa.properties.hibernate.format_sql=false
# the ICM private network
openaire.api.valid.subnet = 10.19.65.0/24
openaire.api.https.proxy = 10.19.65.35
# spring.security.oauth2.client.registration.oidc.provider = oidc
# spring.security.oauth2.client.registration.oidc.client-id = 964b69cd-4658-4251-a153-edfadfaf15aa
# spring.security.oauth2.client.registration.oidc.client-secret = ALsqw6oBp7J0JOYmWExlT6PMN3R8-j413KOipsDZJVOPv1EfMwHfiDhvsa96gkiU8YmIpGmJgLDkDycvQp30QiE
# spring.security.oauth2.client.registration.oidc.scope = openid,email
# #spring.security.oauth2.client.registration.oidc.redirect-uri = https://beta.orgs.openaire.eu/login/oauth2/code/oidc
# spring.security.oauth2.client.provider.oidc.issuer-uri = http://localhost:8080/openid-connect-server-webapp/
# spring.security.oauth2.client.provider.oidc.authorization-uri = http://localhost:8080/openid-connect-server-webapp/authorize
# spring.security.oauth2.client.provider.oidc.jwk-set-uri = http://localhost:8080/openid-connect-server-webapp/jwk
# spring.security.oauth2.client.provider.oidc.token-uri = http://localhost:8080/openid-connect-server-webapp/token
# spring.security.oauth2.client.provider.oidc.user-info-uri = http://localhost:8080/openid-connect-server-webapp/userinfo
openorgs.support.pages = { "Ask a question": "https://www.openaire.eu/support/helpdesk?view=ticket&layout=open", "FAQ": "https://www.openaire.eu/faqs" }
openaire.override.logout.url =

View File

@ -1,103 +0,0 @@
CREATE TEMP TABLE grid_institutes (
grid_id text,
name text,
wikipedia_url text,
email_address text,
established int
);
CREATE TEMP TABLE grid_geonames (
geonames_city_id text,
city text,
nuts_level1_code text,
nuts_level1_name text,
nuts_level2_code text,
nuts_level2_name text,
nuts_level3_code text,
nuts_level3_name text,
geonames_admin1_code text,
geonames_admin1_name text,
geonames_admin1_ascii_name text,
geonames_admin2_code text,
geonames_admin2_name text,
geonames_admin2_ascii_name text
);
CREATE TEMP TABLE grid_addresses (
grid_id text,
line_1 text,
line_2 text,
line_3 text,
lat double precision,
lng double precision,
postcode text,
is_primary boolean,
city text,
state text,
state_code text,
country text,
country_code text,
geonames_city_id int
);
CREATE TEMP TABLE grid_external_ids (
grid_id text,
external_id_type text,
external_id text
);
CREATE TEMP TABLE grid_labels (
grid_id text,
iso639 text,
label text
);
CREATE TEMP TABLE grid_relationships (
grid_id text,
relationship_type text,
related_grid_id text
);
CREATE TEMP TABLE grid_types (
grid_id text,
type text
);
CREATE TEMP TABLE grid_links (
grid_id text,
link text
);
CREATE TEMP TABLE grid_acronyms (
grid_id text,
acronym text
);
CREATE TEMP TABLE grid_aliases (
grid_id text,
alias text
);
COPY grid_institutes (grid_id,name,wikipedia_url,email_address,established) FROM '/Users/michele/Develop/dnet45/dnet-applications/apps/dnet-orgs-database-application/data/grid-2020-06-29/full_tables/institutes.csv' CSV HEADER;
COPY grid_geonames (geonames_city_id,city,nuts_level1_code,nuts_level1_name,nuts_level2_code,nuts_level2_name,nuts_level3_code,nuts_level3_name,geonames_admin1_code,geonames_admin1_name,geonames_admin1_ascii_name,geonames_admin2_code,geonames_admin2_name,geonames_admin2_ascii_name) FROM '/Users/michele/Develop/dnet45/dnet-applications/apps/dnet-orgs-database-application/data/grid-2020-06-29/full_tables/geonames.csv' CSV HEADER;
COPY grid_addresses (grid_id,line_1,line_2,line_3,lat,lng,postcode,is_primary,city,state,state_code,country,country_code,geonames_city_id) FROM '/Users/michele/Develop/dnet45/dnet-applications/apps/dnet-orgs-database-application/data/grid-2020-06-29/full_tables/addresses.csv' CSV HEADER;
COPY grid_external_ids (grid_id,external_id_type,external_id) FROM '/Users/michele/Develop/dnet45/dnet-applications/apps/dnet-orgs-database-application/data/grid-2020-06-29/full_tables/external_ids.csv' CSV HEADER;
COPY grid_labels (grid_id,iso639,label) FROM '/Users/michele/Develop/dnet45/dnet-applications/apps/dnet-orgs-database-application/data/grid-2020-06-29/full_tables/labels.csv' CSV HEADER;
COPY grid_relationships (grid_id,relationship_type,related_grid_id) FROM '/Users/michele/Develop/dnet45/dnet-applications/apps/dnet-orgs-database-application/data/grid-2020-06-29/full_tables/relationships.csv' CSV HEADER;
COPY grid_types (grid_id,type) FROM '/Users/michele/Develop/dnet45/dnet-applications/apps/dnet-orgs-database-application/data/grid-2020-06-29/full_tables/types.csv' CSV HEADER;
COPY grid_links (grid_id,link) FROM '/Users/michele/Develop/dnet45/dnet-applications/apps/dnet-orgs-database-application/data/grid-2020-06-29/full_tables/links.csv' CSV HEADER;
COPY grid_acronyms (grid_id,acronym) FROM '/Users/michele/Develop/dnet45/dnet-applications/apps/dnet-orgs-database-application/data/grid-2020-06-29/full_tables/acronyms.csv' CSV HEADER;
COPY grid_aliases (grid_id,alias) FROM '/Users/michele/Develop/dnet45/dnet-applications/apps/dnet-orgs-database-application/data/grid-2020-06-29/full_tables/aliases.csv' CSV HEADER;
INSERT INTO organizations(id, name, type, lat, lng, city, country, created_by, modified_by) (SELECT 'tmp::'||md5(o.grid_id), o.name, COALESCE(t.type, 'UNKNOWN'), a.lat, a.lng, a.city, a.country_code, 'import:grid.ac', 'import:grid.ac' FROM grid_institutes o LEFT OUTER JOIN grid_addresses a ON (o.grid_id=a.grid_id) LEFT OUTER JOIN grid_types t ON (o.grid_id=t.grid_id)) ON CONFLICT DO NOTHING;
INSERT INTO other_ids (id, otherid, type) (SELECT 'tmp::'||md5(grid_id), grid_id, 'GRID' FROM grid_institutes ) ON CONFLICT DO NOTHING;
INSERT INTO other_ids (id, otherid, type) (SELECT 'tmp::'||md5(grid_id), external_id, external_id_type FROM grid_external_ids ) ON CONFLICT DO NOTHING;
INSERT INTO other_names (id, lang, name) (SELECT 'tmp::'||md5(grid_id), 'en', name FROM grid_institutes ) ON CONFLICT DO NOTHING;
INSERT INTO other_names (id, lang, name) (SELECT 'tmp::'||md5(grid_id), iso639, label FROM grid_labels ) ON CONFLICT DO NOTHING;
INSERT INTO other_names (id, lang, name) (SELECT 'tmp::'||md5(grid_id), 'UNKNOWN', alias FROM grid_aliases ) ON CONFLICT DO NOTHING;
INSERT INTO acronyms (id, acronym) (SELECT 'tmp::'||md5(grid_id), acronym FROM grid_acronyms ) ON CONFLICT DO NOTHING;
INSERT INTO relationships(id1, reltype, id2) (SELECT 'tmp::'||md5(grid_id), relationship_type, 'tmp::'||md5(related_grid_id) FROM grid_relationships) ON CONFLICT DO NOTHING;
INSERT INTO urls (id, url) (SELECT 'tmp::'||md5(grid_id), link FROM grid_links ) ON CONFLICT DO NOTHING;
update organizations set id = DEFAULT;

View File

@ -1,148 +0,0 @@
BEGIN;
DELETE FROM oa_conflicts WHERE created_by = 'dedupWf' and modified_by = 'dedupWf' and reltype = 'suggested';
DELETE FROM oa_duplicates WHERE created_by = 'dedupWf' and modified_by = 'dedupWf' and reltype = 'suggested';
DELETE FROM organizations WHERE created_by = 'dedupWf' and modified_by = 'dedupWf' and status = 'suggested';
-- IMPORTANT : DO NOT DELETE THE RAW ORGS TO AVOID THE 'ON CASCADE' DELETIONS
-- FIX ORIGINAL DATA
UPDATE organizations SET id = 'pending_org_::'||MD5(id) WHERE status = 'suggested' AND id NOT LIKE 'pending_org_::%';
-- FIX IMPORT DATA
DELETE FROM tmp_dedup_events WHERE oa_original_id = '' OR oa_original_id IS NULL;
UPDATE tmp_dedup_events SET local_id = oa_original_id WHERE local_id = '' OR local_id IS NULL;
UPDATE tmp_dedup_events SET oa_country = 'UNKNOWN' WHERE oa_country = '' OR oa_country IS NULL;
UPDATE tmp_dedup_events SET oa_name = oa_acronym WHERE oa_name = '' OR oa_name IS NULL;
DELETE FROM tmp_dedup_events WHERE oa_name = '' OR oa_name IS NULL;
-- delete invalid relations (a raw org can not be suggested as duplicate and as new org)
DELETE FROM tmp_dedup_events WHERE oa_original_id IN (
SELECT oa_original_id
FROM tmp_dedup_events
GROUP BY oa_original_id HAVING count(oa_original_id) > 1)
AND (local_id = '' OR local_id is NULL);
-- delete invalid relations (a raw org can not be suggested to multiple orgs)
DELETE FROM tmp_dedup_events WHERE oa_original_id IN (
SELECT oa_original_id
FROM tmp_dedup_events
GROUP BY oa_original_id HAVING count(oa_original_id) > 1)
AND local_id NOT LIKE 'openorgs____::%';
-- IMPORT MISSING TERMS
INSERT INTO id_types(val, name) SELECT distinct arr[2], arr[2] FROM (SELECT string_to_array(unnest(string_to_array(pid_list, '@@@')), '###') AS arr FROM tmp_dedup_events WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%') as c ON CONFLICT DO NOTHING;
-- NEW ORGANIZATIONS (suggested)
INSERT INTO organizations(id, name, country, status, ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, created_by, modified_by)
SELECT 'pending_org_::'||MD5(local_id), oa_name, oa_country, 'suggested', ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, 'dedupWf', 'dedupWf'
FROM tmp_dedup_events
WHERE local_id NOT LIKE 'openorgs\_\_\_\_::%' AND local_id = oa_original_id
ON CONFLICT DO NOTHING;
INSERT INTO acronyms(id, acronym)
SELECT 'pending_org_::'||MD5(local_id), oa_acronym FROM tmp_dedup_events
WHERE local_id NOT LIKE 'openorgs\_\_\_\_::%' AND local_id = oa_original_id AND oa_acronym IS NOT NULL AND oa_acronym != ''
ON CONFLICT DO NOTHING;
INSERT INTO urls(id, url)
SELECT 'pending_org_::'||MD5(local_id), oa_url FROM tmp_dedup_events
WHERE local_id NOT LIKE 'openorgs\_\_\_\_::%' AND local_id = oa_original_id AND oa_url IS NOT NULL AND oa_url != ''
ON CONFLICT DO NOTHING;
INSERT INTO other_ids(id, otherid, type)
SELECT 'pending_org_::'||MD5(local_id), arr[1] AS otherid, arr[2] AS type
FROM (
SELECT local_id, string_to_array(unnest(string_to_array(pid_list, '@@@')), '###') AS arr
FROM tmp_dedup_events
WHERE local_id NOT LIKE 'openorgs\_\_\_\_::%' AND local_id = oa_original_id
) as c
ON CONFLICT DO NOTHING;
-- NEW ORGANIZATIONS (raw)
INSERT INTO organizations(id, name, country, status, ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, created_by, modified_by)
SELECT oa_original_id, oa_name, oa_country, 'raw', ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, 'dedupWf', 'dedupWf'
FROM tmp_dedup_events
WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%'
ON CONFLICT(id) DO UPDATE SET
(name, country, ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, modification_date, modified_by) =
(EXCLUDED.name, EXCLUDED.country, EXCLUDED.ec_legalbody, EXCLUDED.ec_legalperson, EXCLUDED.ec_nonprofit, EXCLUDED.ec_researchorganization, EXCLUDED.ec_highereducation, EXCLUDED.ec_internationalorganizationeurinterests, EXCLUDED.ec_internationalorganization, EXCLUDED.ec_enterprise, EXCLUDED.ec_smevalidated, EXCLUDED.ec_nutscode, now(), 'dedupWf');
INSERT INTO acronyms(id, acronym)
SELECT oa_original_id, oa_acronym
FROM tmp_dedup_events
WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%' AND oa_acronym IS NOT NULL AND oa_acronym != ''
ON CONFLICT DO NOTHING;
INSERT INTO urls(id, url)
SELECT oa_original_id, oa_url
FROM tmp_dedup_events
WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%' AND oa_url IS NOT NULL AND oa_url != ''
ON CONFLICT DO NOTHING;
INSERT INTO other_ids(id, otherid, type)
SELECT oa_original_id, arr[1] AS otherid, arr[2] AS type
FROM (
SELECT oa_original_id,
string_to_array(unnest(string_to_array(pid_list, '@@@')), '###') AS arr
FROM tmp_dedup_events
WHERE oa_original_id NOT LIKE 'openorgs\_\_\_\_::%'
) as c
ON CONFLICT DO NOTHING;
-- DUPLICATES (relations to openorgs)
INSERT INTO oa_duplicates (local_id, oa_original_id, oa_collectedfrom, created_by, modified_by)
SELECT local_id, oa_original_id, oa_collectedfrom, 'dedupWf', 'dedupWf'
FROM tmp_dedup_events
WHERE local_id LIKE 'openorgs\_\_\_\_::%' AND oa_original_id NOT LIKE 'openorgs\_\_\_\_::%'
ON CONFLICT DO NOTHING;
-- DUPLICATES (relations to suggested)
INSERT INTO oa_duplicates (local_id, oa_original_id, oa_collectedfrom, created_by, modified_by)
SELECT 'pending_org_::'||MD5(local_id), oa_original_id, oa_collectedfrom, 'dedupWf', 'dedupWf'
FROM tmp_dedup_events
WHERE local_id NOT LIKE 'openorgs\_\_\_\_::%' AND oa_original_id NOT LIKE 'openorgs\_\_\_\_::%'
ON CONFLICT DO NOTHING;
-- CONFLICTS (I generate all the couples)
CREATE TEMPORARY TABLE tmp_conflict_groups AS
SELECT DISTINCT group_id as gid, local_id oid from tmp_dedup_events
WHERE local_id LIKE 'openorgs\_\_\_\_::%' AND oa_original_id LIKE 'openorgs\_\_\_\_::%' AND local_id != oa_original_id AND group_id IS NOT NULL AND group_id != ''
UNION
SELECT DISTINCT group_id as gid, oa_original_id oid from tmp_dedup_events
WHERE local_id LIKE 'openorgs\_\_\_\_::%' AND oa_original_id LIKE 'openorgs\_\_\_\_::%' AND local_id != oa_original_id AND group_id IS NOT NULL AND group_id != '';
INSERT INTO oa_conflicts (id1, id2, idgroup, created_by, modified_by) SELECT DISTINCT
c1.oid, c2.oid, c1.gid, 'dedupWf', 'dedupWf'
FROM
tmp_conflict_groups c1
JOIN tmp_conflict_groups c2 ON (c1.gid = c2.gid)
WHERE
c1.oid != c2.oid
ON CONFLICT DO NOTHING;
-- CONSISTENCY (respect the order of the deletions)
-- remove the pending organizations that have been recently approved
DELETE FROM organizations
WHERE id in (
SELECT o1.id
FROM
oa_duplicates d1
JOIN organizations o1 ON (o1.id = d1.local_id)
JOIN oa_duplicates d2 on (d1.oa_original_id = d2.oa_original_id)
JOIN organizations o2 on (o2.id = d2.local_id)
WHERE d1.local_id != d2.local_id
AND o1.status = 'suggested'
AND o1.created_by = 'dedupWf'
AND o1.modified_by = 'dedupWf'
AND d1.reltype = 'suggested'
AND d2.reltype != 'is_different');
-- Remove invalid suggestions (an existing relation has already been approved)
DELETE FROM oa_duplicates d
USING oa_duplicates d1
WHERE d.oa_original_id = d1.oa_original_id AND d.reltype = 'suggested' AND d1.reltype = 'is_similar';
COMMIT;

View File

@ -1 +0,0 @@
INSERT INTO users(email, valid, role) VALUES ('michele.artini@isti.cnr.it', true, 'ADMIN');

View File

@ -1,697 +0,0 @@
DROP VIEW IF EXISTS organizations_view;
DROP VIEW IF EXISTS organizations_info_view;
DROP VIEW IF EXISTS organizations_simple_view;
DROP VIEW IF EXISTS users_view;
DROP VIEW IF EXISTS conflict_groups_view;
DROP VIEW IF EXISTS suggestions_info_by_country_view;
DROP VIEW IF EXISTS duplicate_groups_view;
DROP TABLE IF EXISTS sysconf;
DROP TABLE IF EXISTS other_ids;
DROP TABLE IF EXISTS other_names;
DROP TABLE IF EXISTS acronyms;
DROP TABLE IF EXISTS relationships;
DROP TABLE IF EXISTS urls;
DROP TABLE IF EXISTS oa_duplicates;
DROP TABLE IF EXISTS oa_conflicts;
DROP TABLE IF EXISTS organizations;
DROP TABLE IF EXISTS org_types;
DROP TABLE IF EXISTS user_countries;
DROP TABLE IF EXISTS users;
DROP TABLE IF EXISTS user_roles;
DROP TABLE IF EXISTS countries;
DROP TABLE IF EXISTS id_types;
DROP TABLE IF EXISTS languages;
DROP SEQUENCE IF EXISTS organizations_id_seq;
CREATE TABLE sysconf (
id text PRIMARY KEY DEFAULT 'default',
title text NOT NULL,
homepage_msg text NOT NULL DEFAULT '',
readonly boolean NOT NULL DEFAULT false
);
INSERT INTO sysconf(id, title) VALUES ('default', 'OpenOrgs Database');
CREATE TABLE org_types (val text PRIMARY KEY, name text);
INSERT INTO org_types(val) VALUES ('Archive'), ('Company'), ('Education'), ('Facility'), ('Government'), ('Healthcare'), ('Nonprofit'), ('Other'), ('UNKNOWN');
UPDATE org_types SET name = val;
CREATE TABLE id_types (val text PRIMARY KEY, name text);
INSERT INTO id_types(val) VALUES ('CNRS'), ('FundRef'), ('HESA'), ('ISNI'), ('LinkedIn'), ('OrgRef'), ('UCAS'), ('UKPRN'), ('Wikidata'), ('GRID'), ('ROR');
UPDATE id_types SET name = val;
CREATE TABLE languages (val text PRIMARY KEY, name text);
INSERT INTO languages(val) VALUES ('UNKNOWN'),
('aa'),('af'),('am'),('ar'),('as'),('az'),('ba'),('be'),('bg'),('bn'),('br'),('bs'),('ca'),('ch'),('co'),('cs'),('cy'),('da'),('de'),('dv'),
('dz'),('el'),('en'),('eo'),('es'),('et'),('eu'),('fa'),('fi'),('fo'),('fr'),('fy'),('ga'),('gd'),('gl'),('gu'),('he'),('hi'),('hr'),('ht'),
('hu'),('hy'),('id'),('is'),('it'),('iu'),('ja'),('jv'),('ka'),('kk'),('kl'),('km'),('kn'),('ko'),('ku'),('ky'),('la'),('lb'),('lo'),('lt'),
('lv'),('mg'),('mi'),('mk'),('ml'),('mn'),('mr'),('ms'),('mt'),('my'),('nb'),('ne'),('nl'),('no'),('oc'),('om'),('or'),('pa'),('pl'),('ps'),
('pt'),('rm'),('ro'),('ru'),('rw'),('sa'),('sd'),('si'),('sk'),('sl'),('sm'),('so'),('sq'),('sr'),('sv'),('sw'),('ta'),('te'),('tg'),('th'),
('tk'),('tl'),('tr'),('tt'),('ug'),('uk'),('ur'),('uz'),('vi'),('xh'),('yo'),('zh'),('zu');
UPDATE languages SET name = val;
CREATE TABLE countries (val text PRIMARY KEY, name text);
INSERT INTO countries(val, name) VALUES
('AD', 'Andorra'),
('AE', 'United Arab Emirates'),
('AF', 'Afghanistan'),
('AG', 'Antigua and Barbuda'),
('AI', 'Anguilla'),
('AL', 'Albania'),
('AM', 'Armenia'),
('AN', 'Netherlands Antilles'),
('AO', 'Angola'),
('AQ', 'Antarctica'),
('AR', 'Argentina'),
('AS', 'American Samoa'),
('AT', 'Austria'),
('AU', 'Australia'),
('AW', 'Aruba'),
('AX', 'Åland Islands'),
('AZ', 'Azerbaijan'),
('BA', 'Bosnia and Herzegovina'),
('BB', 'Barbados'),
('BD', 'Bangladesh'),
('BE', 'Belgium'),
('BF', 'Burkina Faso'),
('BG', 'Bulgaria'),
('BH', 'Bahrain'),
('BI', 'Burundi'),
('BJ', 'Benin'),
('BL', 'Saint-Barthélemy'),
('BM', 'Bermuda'),
('BN', 'Brunei Darussalam'),
('BO', 'Bolivia'),
('BQ', 'Bonaire, Sint Eustatius and Saba'),
('BR', 'Brazil'),
('BS', 'Bahamas'),
('BT', 'Bhutan'),
('BV', 'Bouvet Island'),
('BW', 'Botswana'),
('BY', 'Belarus'),
('BZ', 'Belize'),
('CA', 'Canada'),
('CC', 'Cocos (Keeling) Islands'),
('CD', 'Congo (Democratic Republic of)'),
('CF', 'Central African Republic'),
('CG', 'Congo'),
('CH', 'Switzerland'),
('CI', 'Cote d''Ivoire'),
('CK', 'Cook Islands'),
('CL', 'Chile'),
('CM', 'Cameroon'),
('CN', 'China (People''s Republic of)'),
('CO', 'Colombia'),
('CR', 'Costa Rica'),
('CS', 'Serbia and Montenegro'),
('CU', 'Cuba'),
('CV', 'Cape Verde'),
('CW', 'Curaçao'),
('CX', 'Christmas Island'),
('CY', 'Cyprus'),
('CZ', 'Czech Republic'),
('DE', 'Germany'),
('DJ', 'Djibouti'),
('DK', 'Denmark'),
('DM', 'Dominica'),
('DO', 'Dominican Republic'),
('DZ', 'Algeria'),
('EC', 'Ecuador'),
('EE', 'Estonia'),
('EG', 'Egypt'),
('EH', 'Western Sahara'),
('ER', 'Eritrea'),
('ES', 'Spain'),
('ET', 'Ethiopia'),
('EU', 'European Union'),
('FI', 'Finland'),
('FJ', 'Fiji'),
('FK', 'Falkland Islands (Malvinas)'),
('FM', 'Micronesia, Federated States of'),
('FO', 'Faroe Islands'),
('FR', 'France'),
('GA', 'Gabon'),
('GB', 'United Kingdom'),
('GD', 'Grenada'),
('GE', 'Georgia'),
('GF', 'French Guiana'),
('GG', 'Guernsey'),
('GH', 'Ghana'),
('GI', 'Gibraltar'),
('GL', 'Greenland'),
('GM', 'Gambia'),
('GN', 'Guinea'),
('GP', 'Guadeloupe'),
('GQ', 'Equatorial Guinea'),
('GR', 'Greece'),
('GS', 'South Georgia and the South Sandwich Islands'),
('GT', 'Guatemala'),
('GU', 'Guam'),
('GW', 'Guinea-Bissau'),
('GY', 'Guyana'),
('HK', 'Hong Kong'),
('HM', 'Heard Island and McDonald Islands'),
('HN', 'Honduras'),
('HR', 'Croatia'),
('HT', 'Haiti'),
('HU', 'Hungary'),
('ID', 'Indonesia'),
('IE', 'Ireland'),
('IL', 'Israel'),
('IM', 'Isle of Man'),
('IN', 'India'),
('IO', 'British Indian Ocean Territory'),
('IQ', 'Iraq'),
('IR', 'Iran (Islamic Republic of)'),
('IS', 'Iceland'),
('IT', 'Italy'),
('JE', 'Jersey'),
('JM', 'Jamaica'),
('JO', 'Jordan'),
('JP', 'Japan'),
('KE', 'Kenya'),
('KG', 'Kyrgyzstan'),
('KH', 'Cambodia'),
('KI', 'Kiribati'),
('KM', 'Comoros'),
('KN', 'Saint Kitts and Nevis'),
('KO', 'Kosovo * UN resolution'),
('KP', 'Korea, Democatric People''s Republic of'),
('KR', 'Korea (Republic of)'),
('KW', 'Kuwait'),
('KY', 'Cayman Islands'),
('KZ', 'Kazakhstan'),
('LA', 'Lao (People''s Democratic Republic)'),
('LB', 'Lebanon'),
('LC', 'Saint Lucia'),
('LI', 'Liechtenstein'),
('LK', 'Sri Lanka'),
('LR', 'Liberia'),
('LS', 'Lesotho'),
('LT', 'Lithuania'),
('LU', 'Luxembourg'),
('LV', 'Latvia'),
('LY', 'Libyan Arab Jamahiriya'),
('MA', 'Morocco'),
('MC', 'Monaco'),
('MD', 'Moldova (Republic of)'),
('ME', 'Montenegro'),
('MF', 'Saint Martin (French Part)'),
('MG', 'Madagascar'),
('MH', 'Marshall Islands'),
('MK', 'Former Yugoslav Republic of Macedonia'),
('ML', 'Mali'),
('MM', 'Myanmar'),
('MN', 'Mongolia'),
('MO', 'Macao'),
('MP', 'Northern Mariana Islands'),
('MQ', 'Martinique'),
('MR', 'Mauritania'),
('MS', 'Montserrat'),
('MT', 'Malta'),
('MU', 'Mauritius'),
('MV', 'Maldives'),
('MW', 'Malawi'),
('MX', 'Mexico'),
('MY', 'Malaysia'),
('MZ', 'Mozambique'),
('NA', 'Namibia'),
('NC', 'New Caledonia'),
('NE', 'Niger'),
('NF', 'Norfolk Island'),
('NG', 'Nigeria'),
('NI', 'Nicaragua'),
('NL', 'Netherlands'),
('NO', 'Norway'),
('NP', 'Nepal'),
('NR', 'Nauru'),
('NU', 'Niue'),
('NZ', 'New Zealand'),
('OC', 'Oceania'),
('OM', 'Oman'),
('PA', 'Panama'),
('PE', 'Peru'),
('PF', 'French Polynesia'),
('PG', 'Papua New Guinea'),
('PH', 'Philippines'),
('PK', 'Pakistan'),
('PL', 'Poland'),
('PM', 'Saint Pierre and Miquelon'),
('PN', 'Pitcairn'),
('PR', 'Puerto Rico'),
('PS', 'Palestinian-administered areas'),
('PT', 'Portugal'),
('PW', 'Palau'),
('PY', 'Paraguay'),
('QA', 'Qatar'),
('RE', 'Réunion'),
('RO', 'Romania'),
('RS', 'Serbia'),
('RU', 'Russian Federation'),
('RW', 'Rwanda'),
('SA', 'Saudi Arabia'),
('SB', 'Solomon Islands'),
('SC', 'Seychelles'),
('SD', 'Sudan'),
('SE', 'Sweden'),
('SG', 'Singapore'),
('SH', 'Saint Helena, Ascension and Tristan da Cunha'),
('SI', 'Slovenia'),
('SJ', 'Svalbard and Jan Mayen'),
('SK', 'Slovakia'),
('SL', 'Sierra Leone'),
('SM', 'San Marino'),
('SN', 'Senegal'),
('SO', 'Somalia'),
('SR', 'Suriname'),
('SS', 'South Sudan'),
('ST', 'São Tomé and Príncipe'),
('SV', 'El Salvador'),
('SX', 'Sint Maarten (Dutch Part)'),
('SY', 'Syrian Arab Republic'),
('SZ', 'Swaziland'),
('TC', 'Turks and Caicos Islands'),
('TD', 'Chad'),
('TF', 'French Southern Territories'),
('TG', 'Togo'),
('TH', 'Thailand'),
('TJ', 'Tajikistan'),
('TK', 'Tokelau'),
('TL', 'Timor-Leste'),
('TM', 'Turkmenistan'),
('TN', 'Tunisia'),
('TO', 'Tonga'),
('TR', 'Turkey'),
('TT', 'Trinidad and Tobago'),
('TV', 'Tuvalu'),
('TW', 'Taiwan'),
('TZ', 'Tanzania (United Republic of)'),
('UA', 'Ukraine'),
('UG', 'Uganda'),
('UM', 'United States Minor Outlying Islands'),
('UNKNOWN', 'UNKNOWN'),
('US', 'United States'),
('UY', 'Uruguay'),
('UZ', 'Uzbekistan'),
('VA', 'Holy See (Vatican City State)'),
('VC', 'Saint Vincent and the Grenadines'),
('VE', 'Venezuela'),
('VG', 'Virgin Islands (British)'),
('VI', 'Virgin Islands, U.S.'),
('VN', 'Viet Nam'),
('VU', 'Vanuatu'),
('WF', 'Wallis and Futuna'),
('WS', 'Samoa'),
('XK', 'Kosovo * UN resolution'),
('YE', 'Yemen'),
('YT', 'Mayotte'),
('YU', 'Yugoslavia'),
('ZA', 'South Africa'),
('ZM', 'Zambia'),
('ZW', 'Zimbabwe');
CREATE TABLE user_roles(role text PRIMARY KEY);
INSERT INTO user_roles VALUES ('ADMIN'), ('NATIONAL_ADMIN'), ('USER'), ('PENDING'), ('NOT_AUTHORIZED');
CREATE TABLE users (
email text PRIMARY KEY,
valid boolean DEFAULT true,
role text NOT NULL default 'USER' REFERENCES user_roles(role),
first_access timestamp with time zone DEFAULT now(),
last_access timestamp with time zone DEFAULT now()
);
CREATE TABLE user_countries (
email text REFERENCES users(email) ON UPDATE CASCADE ON DELETE CASCADE,
country text REFERENCES countries(val),
PRIMARY KEY(email, country)
);
CREATE SEQUENCE organizations_id_seq;
CREATE TABLE organizations (
id text PRIMARY KEY DEFAULT 'openorgs____::'||lpad(nextval('organizations_id_seq')::text,10,'0'),
name text,
type text NOT NULL DEFAULT 'UNKNOWN' REFERENCES org_types(val),
lat double precision,
lng double precision,
city text,
country text REFERENCES countries(val),
created_by text,
creation_date timestamp with time zone DEFAULT now(),
modified_by text,
modification_date timestamp with time zone DEFAULT now(),
status text NOT NULL DEFAULT 'suggested',
ec_legalbody boolean,
ec_legalperson boolean,
ec_nonprofit boolean,
ec_researchorganization boolean,
ec_highereducation boolean,
ec_internationalorganizationeurinterests boolean,
ec_internationalorganization boolean,
ec_enterprise boolean,
ec_smevalidated boolean,
ec_nutscode boolean
);
CREATE INDEX organizations_type_idx ON organizations(type);
CREATE INDEX organizations_country_idx ON organizations(country);
CREATE TABLE other_ids (
id text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
otherid text,
type text REFERENCES id_types(val),
PRIMARY KEY (id, otherid, type)
);
CREATE INDEX other_ids_id_idx ON other_ids(id);
CREATE TABLE other_names (
id text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
name text,
lang text REFERENCES languages(val),
PRIMARY KEY (id, name, lang)
);
CREATE INDEX other_names_id_idx ON other_names(id);
CREATE TABLE acronyms (
id text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
acronym text,
PRIMARY KEY (id, acronym)
);
CREATE INDEX acronyms_id_idx ON acronyms(id);
CREATE TABLE relationships (
id1 text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
reltype text,
id2 text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
PRIMARY KEY (id1, reltype, id2)
);
CREATE INDEX relationships_id1_idx ON relationships(id1);
CREATE INDEX relationships_id2_idx ON relationships(id2);
CREATE TABLE urls (
id text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
url text,
PRIMARY KEY (id, url)
);
CREATE INDEX urls_id_idx ON urls(id);
CREATE TABLE notes (
id text PRIMARY KEY REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
note text,
modified_by text,
modification_date timestamp
);
CREATE TABLE journal (
jid SERIAL PRIMARY KEY,
id text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
operation text,
description text,
op_date timestamp DEFAULT NOW(),
email text
);
CREATE INDEX journal_id_idx ON journal(id);
CREATE TABLE oa_duplicates (
local_id text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
oa_original_id text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
oa_collectedfrom text,
reltype text NOT NULL DEFAULT 'suggested',
creation_date timestamp DEFAULT NOW(),
created_by text,
modification_date timestamp,
modified_by text,
PRIMARY KEY (local_id, oa_original_id)
);
CREATE INDEX oa_duplicates_local_id_idx ON oa_duplicates(local_id);
CREATE VIEW oa_duplicates_view AS
d.local_id as local_id,
d.oa_original_id as oa_original_id,
o.name as oa_name,
array_to_string(array_agg(DISTINCT a.acronym), ', ') as oa_acronym,
o.country as oa_country,
array_to_string(array_agg(DISTINCT u.url), ', ') as oa_url,
COALESCE(jsonb_agg(DISTINCT jsonb_build_object('id', oid.otherid, 'type', oid.type)) FILTER (WHERE oid.otherid IS NOT NULL), '[]') AS oa_other_ids,
d.oa_collectedfrom as oa_collectedfrom,
d.reltype as reltype,
d.created_by as created_by,
o.ec_legalbody,
o.ec_legalperson,
o.ec_nonprofit,
o.ec_researchorganization,
o.ec_highereducation,
o.ec_internationalorganizationeurinterests,
o.ec_internationalorganization,
o.ec_enterprise,
o.ec_smevalidated,
o.ec_nutscode
FROM
oa_duplicates d
LEFT OUTER JOIN organizations o ON (o.id = d.oa_original_id)
LEFT OUTER JOIN acronyms a ON (o.id = a.id)
LEFT OUTER JOIN urls u ON (o.id = u.id)
LEFT OUTER JOIN other_ids oid ON (o.id = oid.id)
GROUP BY
d.local_id,
d.oa_original_id,
d.created_by,
o.name,
o.country,
d.oa_collectedfrom,
d.reltype,
o.ec_legalbody,
o.ec_legalperson,
o.ec_nonprofit,
o.ec_researchorganization,
o.ec_highereducation,
o.ec_internationalorganizationeurinterests,
o.ec_internationalorganization,
o.ec_enterprise,
o.ec_smevalidated,
o.ec_nutscode;
CREATE TABLE oa_conflicts (
id1 text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
id2 text REFERENCES organizations(id) ON UPDATE CASCADE ON DELETE CASCADE,
reltype text NOT NULL DEFAULT 'suggested',
idgroup text,
creation_date timestamp DEFAULT NOW(),
created_by text,
modification_date timestamp,
modified_by text,
PRIMARY KEY (id1, id2)
);
CREATE INDEX oa_conflicts_id1_idx ON oa_conflicts(id1);
CREATE INDEX oa_conflicts_id2_idx ON oa_conflicts(id2);
CREATE INDEX oa_conflicts_idgroup_idx ON oa_conflicts(idgroup);
CREATE VIEW organizations_view AS SELECT
org.id,
org.name,
org.type,
org.lat,
org.lng,
org.city,
org.country,
org.status,
org.ec_legalbody,
org.ec_legalperson,
org.ec_nonprofit,
org.ec_researchorganization,
org.ec_highereducation,
org.ec_internationalorganizationeurinterests,
org.ec_internationalorganization,
org.ec_enterprise,
org.ec_smevalidated,
org.ec_nutscode,
COALESCE(jsonb_agg(DISTINCT jsonb_build_object('id', oid.otherid, 'type', oid.type)) FILTER (WHERE oid.otherid IS NOT NULL), '[]') AS other_ids,
COALESCE(jsonb_agg(DISTINCT jsonb_build_object('name', n.name, 'lang', n.lang)) FILTER (WHERE n.name IS NOT NULL), '[]') AS other_names,
COALESCE(jsonb_agg(DISTINCT a.acronym) FILTER (WHERE a.acronym IS NOT NULL), '[]') AS acronyms,
COALESCE(jsonb_agg(DISTINCT u.url) FILTER (WHERE u.url IS NOT NULL), '[]') AS urls,
COALESCE(jsonb_agg(DISTINCT jsonb_build_object('relatedOrgId', relorg.id, 'relatedOrgName', relorg.name, 'type', r.reltype)) FILTER (WHERE relorg.id IS NOT NULL), '[]') AS relations
FROM
organizations org
LEFT OUTER JOIN other_ids oid ON (org.id = oid.id)
LEFT OUTER JOIN other_names n ON (org.id = n.id)
LEFT OUTER JOIN acronyms a ON (org.id = a.id)
LEFT OUTER JOIN urls u ON (org.id = u.id)
LEFT OUTER JOIN relationships r ON (org.id = r.id1)
LEFT OUTER JOIN organizations relorg ON (relorg.id = r.id2)
GROUP BY
org.id,
org.name,
org.type,
org.lat,
org.lng,
org.city,
org.country,
org.status,
org.ec_legalbody,
org.ec_legalperson,
org.ec_nonprofit,
org.ec_researchorganization,
org.ec_highereducation,
org.ec_internationalorganizationeurinterests,
org.ec_internationalorganization,
org.ec_enterprise,
org.ec_smevalidated,
org.ec_nutscode;
CREATE VIEW organizations_info_view AS SELECT
org.id,
org.name,
org.created_by,
org.creation_date,
org.modified_by,
org.modification_date,
count(DISTINCT d.oa_original_id) as n_duplicates,
count(DISTINCT c.id2) as n_conflicts,
count(DISTINCT n.note) > 0 as note
FROM organizations org
LEFT OUTER JOIN oa_duplicates d ON (org.id = d.local_id AND d.reltype = 'suggested')
LEFT OUTER JOIN oa_conflicts c ON (org.id = c.id1 AND c.reltype = 'suggested')
LEFT OUTER JOIN notes n ON (org.id = n.id)
GROUP BY org.id;
CREATE VIEW organizations_simple_view AS SELECT
org.id,
org.name,
org.type,
org.city,
org.country,
org.status,
array_remove(array_agg(DISTINCT a.acronym), NULL) AS acronyms,
array_remove(array_agg(DISTINCT u.url), NULL) AS urls,
count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_similar' ) AS n_similar_dups,
count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'suggested' ) AS n_suggested_dups,
count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_different') AS n_different_dups
FROM
organizations org
LEFT OUTER JOIN acronyms a ON (org.id = a.id)
LEFT OUTER JOIN urls u ON (org.id = u.id)
LEFT OUTER JOIN oa_duplicates d1 ON (org.id = d1.local_id)
GROUP BY
org.id,
org.name,
org.type,
org.city,
org.country,
org.status;
CREATE VIEW users_view AS SELECT
u.email,
u.valid,
u.role,
u.first_access,
u.last_access,
array_remove(array_agg(uc.country), NULL) AS countries
FROM
users u
LEFT OUTER JOIN user_countries uc ON (u.email = uc.email)
GROUP BY u.email, u.valid, u.role, u.first_access, u.last_access
ORDER BY u.email;
CREATE VIEW suggestions_info_by_country_view AS SELECT
c.val AS code,
c.name AS name,
coalesce(t1.n_duplicates, 0) AS n_duplicates,
coalesce(t2.n_conflicts, 0) AS n_conflicts,
coalesce(t3.n_pending_orgs, 0) AS n_pending_orgs
FROM countries c
LEFT OUTER JOIN (SELECT o.country AS country, count(DISTINCT d.local_id) AS n_duplicates FROM oa_duplicates d LEFT OUTER JOIN organizations o ON (d.local_id = o.id) WHERE d.reltype = 'suggested' AND o.status = 'approved' GROUP BY o.country) AS t1 ON (t1.country = c.val)
LEFT OUTER JOIN (SELECT o.country AS country, count(DISTINCT c.idgroup) AS n_conflicts FROM oa_conflicts c LEFT OUTER JOIN organizations o ON (c.id1 = o.id) WHERE c.reltype = 'suggested' AND o.status = 'approved' GROUP BY o.country) AS t2 ON (t2.country = c.val)
LEFT OUTER JOIN (SELECT o.country AS country, count(DISTINCT o.id) AS n_pending_orgs FROM organizations o WHERE o.status = 'suggested' GROUP BY o.country) AS t3 ON (t3.country = c.val)
ORDER BY c.name;
CREATE VIEW conflict_groups_view AS SELECT
c.idgroup AS idgroup,
o1.id AS id_1,
o1.name AS name_1,
o1.type AS type_1,
o1.city AS city_1,
o1.country AS country_1,
o2.id AS id_2,
o2.name AS name_2,
o2.type AS type_2,
o2.city AS city_2,
o2.country AS country_2
FROM
oa_conflicts c
LEFT OUTER JOIN organizations o1 ON (c.id1 = o1.id)
LEFT OUTER JOIN organizations o2 ON (c.id2 = o2.id)
WHERE
o1.id IS NOT NULL
AND o2.id IS NOT NULL
AND o1.status = 'approved'
AND o2.status = 'approved'
AND c.idgroup IS NOT NULL
AND c.reltype = 'suggested';
CREATE VIEW duplicate_groups_view AS SELECT
o.id,
o.name,
o.city,
o.country,
count(d.*) as n_duplicates
FROM
oa_duplicates d
LEFT OUTER JOIN organizations o ON (o.id = d.local_id)
WHERE
d.reltype = 'suggested' AND o.status = 'approved'
GROUP BY o.id, o.name, o.city, o.country
ORDER BY o.name;
CREATE TABLE org_index_search(id text PRIMARY KEY, txt tsvector);
CREATE INDEX org_index_search_txt_idx ON org_index_search(txt);
CREATE INDEX org_index_search_txt_gin_idx ON org_index_search USING gin(txt);
CREATE OR REPLACE FUNCTION refresh_index_search() RETURNS bigint AS $$
DELETE FROM org_index_search;
WITH d as (
INSERT INTO org_index_search(id, txt) SELECT
o.id,
to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' '||array_to_string(array_agg(DISTINCT a.acronym), ' ','')||' '||array_to_string(array_agg(DISTINCT u.url), ' ',''))
FROM organizations o
LEFT OUTER JOIN other_names n on (o.id = n.id)
LEFT OUTER JOIN acronyms a on (o.id = a.id)
LEFT OUTER JOIN urls u on (o.id = u.id)
GROUP BY o.id, o.name RETURNING *
) SELECT COUNT(*) FROM d;
$$ LANGUAGE SQL;
SELECT refresh_index_search();
CREATE OR REPLACE FUNCTION delete_index_search() RETURNS trigger LANGUAGE plpgsql AS $$
BEGIN
DELETE FROM org_index_search WHERE id = old.id;
RETURN OLD;
END;
$$;
CREATE OR REPLACE FUNCTION insert_or_update_index_search_trigger() RETURNS trigger LANGUAGE plpgsql AS $$
BEGIN
INSERT INTO org_index_search(id, txt) (SELECT
o.id,
to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' '||array_to_string(array_agg(DISTINCT a.acronym), ' ','')||' '||array_to_string(array_agg(DISTINCT u.url), ' ',''))
FROM organizations o
LEFT OUTER JOIN other_names n on (o.id = n.id)
LEFT OUTER JOIN acronyms a on (o.id = a.id)
LEFT OUTER JOIN urls u on (o.id = u.id)
WHERE o.id = new.id
GROUP BY o.id, o.name)
ON CONFLICT (id) DO UPDATE SET txt = EXCLUDED.txt;
RETURN NEW;
END;
$$;
CREATE TRIGGER insert_or_update_index_search_trigger AFTER INSERT OR UPDATE ON organizations FOR EACH ROW EXECUTE PROCEDURE insert_or_update_index_search_trigger();
CREATE TRIGGER delete_index_search_trigger BEFORE DELETE ON organizations FOR EACH ROW EXECUTE PROCEDURE delete_index_search();

View File

@ -1,6 +0,0 @@
-- TO RESOLVE MANUALLY MANAGING CONFLICTS
select d.oa_original_id, array_agg(o.id), array_agg(o.name) from oa_duplicates d left outer join organizations o on (o.id = d.local_id) where d.reltype='is_similar' group by oa_original_id having count(reltype) > 1 and count (distinct o.status) = 1;
-- TO force the status of the orgs
-- select name from organizations WHERE country='IT' and type='UNKNOWN' and lower(reverse(split_part(reverse(trim(name)), ' ', 1))) in ('spa', 'snc', 'srl', 'scarl') and name not ilike '%universit%' and name not ilike '%ospedale%' and name not ilike '%hospital%';

View File

@ -1,17 +0,0 @@
CREATE TEMPORARY TABLE temp_ror_grid (
ror_id text,
grid_id text
);
COPY temp_ror_grid (ror_id, grid_id) FROM '/Users/michele/Develop/dnet45/dnet-applications/apps/dnet-orgs-database-application/data/ror_grid.tsv' DELIMITER E'\t';
INSERT INTO other_ids(id, otherid, type) (SELECT
o.id as id,
ror.ror_id as otherid,
'ROR' as type
FROM
organizations o
JOIN other_ids oth ON (o.id = oth.id AND oth.type = 'GRID')
JOIN temp_ror_grid ror ON (oth.otherid = ror.grid_id)
) ON CONFLICT DO NOTHING;

View File

@ -1,7 +0,0 @@
INSERT INTO users(email, valid, role) VALUES ('michele.artini@isti.cnr.it', true, 'ADMIN');
INSERT INTO users(email, valid, role) VALUES ('michele.debonis@isti.cnr.it', true, 'ADMIN');
INSERT INTO users(email, valid, role) VALUES ('andreas.czerniak@uni-bielefeld.de', true, 'ADMIN');
INSERT INTO users(email, valid, role) VALUES ('claudio.atzori@isti.cnr.it', true, 'ADMIN');
INSERT INTO users(email, valid, role) VALUES ('emma.lazzeri@isti.cnr.it', true, 'ADMIN');
INSERT INTO users(email, valid, role) VALUES ('gina.pavone@isti.cnr.it', true, 'ADMIN');
INSERT INTO users(email, valid, role) VALUES ('paolo.manghi@isti.cnr.it', true, 'ADMIN');

Some files were not shown because too many files have changed in this diff Show More