From 94d298761acddcf13cbe14c9bbac0b30d4621ca1 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Thu, 1 Oct 2020 11:33:14 +0200 Subject: [PATCH] index for fulltext search --- apps/dnet-orgs-database-application/TODO.txt | 17 ++++-- .../controller/OrganizationController.java | 4 +- .../OrganizationSimpleViewRepository.java | 8 +-- .../src/main/resources/sql/schema.sql | 53 +++++++++++++++++++ 4 files changed, 72 insertions(+), 10 deletions(-) diff --git a/apps/dnet-orgs-database-application/TODO.txt b/apps/dnet-orgs-database-application/TODO.txt index 32af9846..0643b424 100644 --- a/apps/dnet-orgs-database-application/TODO.txt +++ b/apps/dnet-orgs-database-application/TODO.txt @@ -1,5 +1,12 @@ -1) Completare operazioni di insert/update sul db -2) Migliorare ricerca (utilizzare fulltext search di postgres) -3) Primo caricamento di organizzazioni da openaire (con prefisso openorgs____) con simrels -4) Caricamenti successivi di nuove oganizazzioni -5) Caricamenti successivi di nuove simrels +TODO +---- + +1) Aggiungere i campi approved (boolean) e is_openaire_master (boolean) alla tabella organizations e relativa gestione (ui per l'apprrovazione, filtro, ecc... ) +2) Ricerca di un'organizzazione tra quelle not approved and is_openaire_master creare una nuova organizazione +3) Ricerca di un'organizzazione tra quelle not_appproved and is_openaire_master per creare una nuova simrel (le s) +4) Implemetare fulltext search su i campi (name, alternative names, acronyms) + +create table tmp_fulltext(id text primary key, fulltext tsvector); +CREATE INDEX tmp_fulltext_idx ON tmp_fulltext(fulltext); +insert into org_index_search(id, txt) select o.id, to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' ' ||array_to_string(array_agg(DISTINCT a.acronym), ' ','')) from organizations o left outer join other_names n on (o.id = n.id) left outer join acronyms a on (o.id = a.id) group by o.id, o.name; +select o.name from organizations o left outer join tmp_fulltext f on (o.id = f.id) where f.fulltext @@ plainto_tsquery('pisa university'); diff --git a/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/controller/OrganizationController.java b/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/controller/OrganizationController.java index f578a7b8..af78fadb 100644 --- a/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/controller/OrganizationController.java +++ b/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/controller/OrganizationController.java @@ -209,8 +209,8 @@ public class OrganizationController { @RequestParam final String q, final Authentication authentication) { return UserInfo.isSuperAdmin(authentication) - ? organizationSimpleViewRepository.findByNameContainingIgnoreCase(q, PageRequest.of(page, size)) - : organizationSimpleViewRepository.findByNameForUser(q, authentication.getName(), PageRequest.of(page, size)); + ? organizationSimpleViewRepository.search(q, PageRequest.of(page, size)) + : organizationSimpleViewRepository.searchForUser(q, authentication.getName(), PageRequest.of(page, size)); } @GetMapping("/byCountry/{code}/{page}/{size}") diff --git a/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/repository/readonly/OrganizationSimpleViewRepository.java b/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/repository/readonly/OrganizationSimpleViewRepository.java index ced650fa..2712b9c1 100644 --- a/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/repository/readonly/OrganizationSimpleViewRepository.java +++ b/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/repository/readonly/OrganizationSimpleViewRepository.java @@ -3,6 +3,7 @@ package eu.dnetlib.organizations.repository.readonly; import org.springframework.data.domain.Page; import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; import org.springframework.stereotype.Repository; import eu.dnetlib.organizations.model.view.OrganizationSimpleView; @@ -11,11 +12,12 @@ import eu.dnetlib.organizations.model.view.OrganizationSimpleView; public interface OrganizationSimpleViewRepository extends ReadOnlyRepository { // SEARCH - Page findByNameContainingIgnoreCase(String name, Pageable pageable); + @Query(value = "select o.* from organizations_simple_view o left outer join org_index_search idx on (idx.id = o.id) where idx.txt @@ plainto_tsquery(:text)", nativeQuery = true) + Page search(@Param("text") String text, Pageable pageable); // SEARCH FOR USER - @Query(value = "select o.* from organizations_simple_view o left outer join user_countries uc on (uc.country = o.country) where o.name ilike %:text% and uc.email = :email", nativeQuery = true) - Page findByNameForUser(String text, String email, Pageable pageable); + @Query(value = "select o.* from organizations_simple_view o left outer join org_index_search idx on (idx.id = o.id) left outer join user_countries uc on (uc.country = o.country) where idx.txt @@ plainto_tsquery(:text) and uc.email = :email", nativeQuery = true) + Page searchForUser(@Param("text") String text, @Param("email") String email, Pageable pageable); Page findByCountry(String country, Pageable pageable); diff --git a/apps/dnet-orgs-database-application/src/main/resources/sql/schema.sql b/apps/dnet-orgs-database-application/src/main/resources/sql/schema.sql index bc4ee3eb..25537692 100644 --- a/apps/dnet-orgs-database-application/src/main/resources/sql/schema.sql +++ b/apps/dnet-orgs-database-application/src/main/resources/sql/schema.sql @@ -267,4 +267,57 @@ WHERE d.reltype = 'suggested' GROUP BY o.id, o.name, o.city, o.country ORDER BY o.name; + +CREATE TABLE org_index_search(id text PRIMARY KEY, txt tsvector); +CREATE INDEX org_index_search_txt_idx ON org_index_search(txt); +INSERT INTO org_index_search(id, txt) SELECT + o.id, + to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' ' ||array_to_string(array_agg(DISTINCT a.acronym), ' ','')) +FROM organizations o + LEFT OUTER JOIN other_names n on (o.id = n.id) + LEFT OUTER JOIN acronyms a on (o.id = a.id) +GROUP BY o.id, o.name; + + +CREATE OR REPLACE FUNCTION delete_index_search() RETURNS trigger LANGUAGE plpgsql AS $$ +BEGIN + DELETE FROM org_index_search WHERE id = old.id; + RETURN NULL; +END; +$$; + +CREATE OR REPLACE FUNCTION insert_or_update_index_search_trigger() RETURNS trigger LANGUAGE plpgsql AS $$ +BEGIN + INSERT INTO org_index_search(id, txt) (SELECT + o.id, + to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' ' ||array_to_string(array_agg(DISTINCT a.acronym), ' ','')) + FROM organizations o + LEFT OUTER JOIN other_names n on (o.id = n.id) + LEFT OUTER JOIN acronyms a on (o.id = a.id) + WHERE o.id = new.id + GROUP BY o.id, o.name) + ON CONFLICT (id) DO UPDATE SET txt = EXCLUDED.txt; + RETURN NULL; +END; +$$; + +CREATE TRIGGER insert_or_update_index_search_trigger AFTER INSERT OR UPDATE ON organizations FOR EACH ROW EXECUTE PROCEDURE insert_or_update_index_search_trigger(); +CREATE TRIGGER delete_index_search_trigger BEFORE DELETE ON organizations FOR EACH ROW EXECUTE PROCEDURE delete_index_search(); + + + + + + + + + + + + + + + + + \ No newline at end of file