index for fulltext search

This commit is contained in:
Michele Artini 2020-10-01 11:33:14 +02:00
parent 9799ff3bda
commit f1c39ee6df
4 changed files with 72 additions and 10 deletions

View File

@ -1,5 +1,12 @@
1) Completare operazioni di insert/update sul db
2) Migliorare ricerca (utilizzare fulltext search di postgres)
3) Primo caricamento di organizzazioni da openaire (con prefisso openorgs____) con simrels
4) Caricamenti successivi di nuove oganizazzioni
5) Caricamenti successivi di nuove simrels
TODO
----
1) Aggiungere i campi approved (boolean) e is_openaire_master (boolean) alla tabella organizations e relativa gestione (ui per l'apprrovazione, filtro, ecc... )
2) Ricerca di un'organizzazione tra quelle not approved and is_openaire_master creare una nuova organizazione
3) Ricerca di un'organizzazione tra quelle not_appproved and is_openaire_master per creare una nuova simrel (le s)
4) Implemetare fulltext search su i campi (name, alternative names, acronyms)
create table tmp_fulltext(id text primary key, fulltext tsvector);
CREATE INDEX tmp_fulltext_idx ON tmp_fulltext(fulltext);
insert into org_index_search(id, txt) select o.id, to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' ' ||array_to_string(array_agg(DISTINCT a.acronym), ' ','')) from organizations o left outer join other_names n on (o.id = n.id) left outer join acronyms a on (o.id = a.id) group by o.id, o.name;
select o.name from organizations o left outer join tmp_fulltext f on (o.id = f.id) where f.fulltext @@ plainto_tsquery('pisa university');

View File

@ -209,8 +209,8 @@ public class OrganizationController {
@RequestParam final String q,
final Authentication authentication) {
return UserInfo.isSuperAdmin(authentication)
? organizationSimpleViewRepository.findByNameContainingIgnoreCase(q, PageRequest.of(page, size))
: organizationSimpleViewRepository.findByNameForUser(q, authentication.getName(), PageRequest.of(page, size));
? organizationSimpleViewRepository.search(q, PageRequest.of(page, size))
: organizationSimpleViewRepository.searchForUser(q, authentication.getName(), PageRequest.of(page, size));
}
@GetMapping("/byCountry/{code}/{page}/{size}")

View File

@ -3,6 +3,7 @@ package eu.dnetlib.organizations.repository.readonly;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.query.Param;
import org.springframework.stereotype.Repository;
import eu.dnetlib.organizations.model.view.OrganizationSimpleView;
@ -11,11 +12,12 @@ import eu.dnetlib.organizations.model.view.OrganizationSimpleView;
public interface OrganizationSimpleViewRepository extends ReadOnlyRepository<OrganizationSimpleView, String> {
// SEARCH
Page<OrganizationSimpleView> findByNameContainingIgnoreCase(String name, Pageable pageable);
@Query(value = "select o.* from organizations_simple_view o left outer join org_index_search idx on (idx.id = o.id) where idx.txt @@ plainto_tsquery(:text)", nativeQuery = true)
Page<OrganizationSimpleView> search(@Param("text") String text, Pageable pageable);
// SEARCH FOR USER
@Query(value = "select o.* from organizations_simple_view o left outer join user_countries uc on (uc.country = o.country) where o.name ilike %:text% and uc.email = :email", nativeQuery = true)
Page<OrganizationSimpleView> findByNameForUser(String text, String email, Pageable pageable);
@Query(value = "select o.* from organizations_simple_view o left outer join org_index_search idx on (idx.id = o.id) left outer join user_countries uc on (uc.country = o.country) where idx.txt @@ plainto_tsquery(:text) and uc.email = :email", nativeQuery = true)
Page<OrganizationSimpleView> searchForUser(@Param("text") String text, @Param("email") String email, Pageable pageable);
Page<OrganizationSimpleView> findByCountry(String country, Pageable pageable);

View File

@ -268,3 +268,56 @@ WHERE
GROUP BY o.id, o.name, o.city, o.country
ORDER BY o.name;
CREATE TABLE org_index_search(id text PRIMARY KEY, txt tsvector);
CREATE INDEX org_index_search_txt_idx ON org_index_search(txt);
INSERT INTO org_index_search(id, txt) SELECT
o.id,
to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' ' ||array_to_string(array_agg(DISTINCT a.acronym), ' ',''))
FROM organizations o
LEFT OUTER JOIN other_names n on (o.id = n.id)
LEFT OUTER JOIN acronyms a on (o.id = a.id)
GROUP BY o.id, o.name;
CREATE OR REPLACE FUNCTION delete_index_search() RETURNS trigger LANGUAGE plpgsql AS $$
BEGIN
DELETE FROM org_index_search WHERE id = old.id;
RETURN NULL;
END;
$$;
CREATE OR REPLACE FUNCTION insert_or_update_index_search_trigger() RETURNS trigger LANGUAGE plpgsql AS $$
BEGIN
INSERT INTO org_index_search(id, txt) (SELECT
o.id,
to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' ' ||array_to_string(array_agg(DISTINCT a.acronym), ' ',''))
FROM organizations o
LEFT OUTER JOIN other_names n on (o.id = n.id)
LEFT OUTER JOIN acronyms a on (o.id = a.id)
WHERE o.id = new.id
GROUP BY o.id, o.name)
ON CONFLICT (id) DO UPDATE SET txt = EXCLUDED.txt;
RETURN NULL;
END;
$$;
CREATE TRIGGER insert_or_update_index_search_trigger AFTER INSERT OR UPDATE ON organizations FOR EACH ROW EXECUTE PROCEDURE insert_or_update_index_search_trigger();
CREATE TRIGGER delete_index_search_trigger BEFORE DELETE ON organizations FOR EACH ROW EXECUTE PROCEDURE delete_index_search();