From 94277982e07c6147d12c3c27e31c65d9b99e0707 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Fri, 15 Jan 2021 12:21:10 +0100 Subject: [PATCH] fulltext indexing --- apps/dnet-orgs-database-application/TODO.txt | 2 +- .../OpenaireInternalApiController.java | 11 ++++++ .../organizations/utils/DatabaseUtils.java | 11 ++++++ .../src/main/resources/sql/schema.sql | 37 ++++++++++--------- 4 files changed, 42 insertions(+), 19 deletions(-) diff --git a/apps/dnet-orgs-database-application/TODO.txt b/apps/dnet-orgs-database-application/TODO.txt index 168b99e2..c8fef124 100644 --- a/apps/dnet-orgs-database-application/TODO.txt +++ b/apps/dnet-orgs-database-application/TODO.txt @@ -18,4 +18,4 @@ select o.name from organizations o left outer join tmp_fulltext f on (o.id = f.i # Nella risoluzione dei conflicts aggiungere i bottoni merge_all, all_different, merge_advanced # Nella risoluzione dei conflicts a partire dai singoli record redirigere la pagina sul nuovo record invece che sul record di partenza # Includere la url nell'idice fulltetxt -# verificare acronimo UNIBO +# Prepareare un metodo update fulltext index to be scheduled diff --git a/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/controller/OpenaireInternalApiController.java b/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/controller/OpenaireInternalApiController.java index e16667fa..5127ec3e 100644 --- a/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/controller/OpenaireInternalApiController.java +++ b/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/controller/OpenaireInternalApiController.java @@ -37,4 +37,15 @@ public class OpenaireInternalApiController { return Arrays.asList("Importing simrels (request from " + req.getRemoteAddr() + ") ..."); } + + @GetMapping("/refresh/fulltextIndex") + public List updateFulltextIndex(final HttpServletRequest req) { + if (req.getRemoteAddr().equals(httpsProxy)) { + log.warn("Call received by blaklisted ip (https proxy): " + req.getRemoteAddr()); + throw new RuntimeException("Call received by blaklisted ip (https proxy): " + req.getRemoteAddr()); + } + new Thread(databaseUtils::updateFulltextIndex).start(); + + return Arrays.asList("Updating ..."); + } } diff --git a/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/utils/DatabaseUtils.java b/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/utils/DatabaseUtils.java index 652b087b..35221403 100644 --- a/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/utils/DatabaseUtils.java +++ b/apps/dnet-orgs-database-application/src/main/java/eu/dnetlib/organizations/utils/DatabaseUtils.java @@ -320,6 +320,17 @@ public class DatabaseUtils { } } + @Transactional + public void updateFulltextIndex() { + try { + log.info("Updating Fulltext Index..."); + jdbcTemplate.queryForList("SELECT refresh_index_search()"); + log.info("...done"); + } catch (final Exception e) { + log.error("Error updating Fulltext Index", e); + } + } + private void updateStatus(final String id, final OrganizationStatus status, final String user, final OffsetDateTime now) { organizationRepository.updateStatus(id, status.toString()); organizationRepository.updateModificationDate(id, user, now); diff --git a/apps/dnet-orgs-database-application/src/main/resources/sql/schema.sql b/apps/dnet-orgs-database-application/src/main/resources/sql/schema.sql index 2828a769..cb25ee12 100644 --- a/apps/dnet-orgs-database-application/src/main/resources/sql/schema.sql +++ b/apps/dnet-orgs-database-application/src/main/resources/sql/schema.sql @@ -567,13 +567,23 @@ ORDER BY o.name; CREATE TABLE org_index_search(id text PRIMARY KEY, txt tsvector); CREATE INDEX org_index_search_txt_idx ON org_index_search(txt); -INSERT INTO org_index_search(id, txt) SELECT - o.id, - to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' ' ||array_to_string(array_agg(DISTINCT a.acronym), ' ','')) -FROM organizations o - LEFT OUTER JOIN other_names n on (o.id = n.id) - LEFT OUTER JOIN acronyms a on (o.id = a.id) -GROUP BY o.id, o.name; + +CREATE OR REPLACE FUNCTION refresh_index_search() RETURNS bigint AS $$ + DELETE FROM org_index_search; + WITH d as ( + INSERT INTO org_index_search(id, txt) SELECT + o.id, + to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' '||array_to_string(array_agg(DISTINCT a.acronym), ' ','')||' '||array_to_string(array_agg(DISTINCT u.url), ' ','')) + FROM organizations o + LEFT OUTER JOIN other_names n on (o.id = n.id) + LEFT OUTER JOIN acronyms a on (o.id = a.id) + LEFT OUTER JOIN urls u on (o.id = u.id) + GROUP BY o.id, o.name RETURNING * + ) SELECT COUNT(*) FROM d; +$$ LANGUAGE SQL; + +SELECT refresh_index_search(); + CREATE OR REPLACE FUNCTION delete_index_search() RETURNS trigger LANGUAGE plpgsql AS $$ @@ -587,10 +597,11 @@ CREATE OR REPLACE FUNCTION insert_or_update_index_search_trigger() RETURNS trigg BEGIN INSERT INTO org_index_search(id, txt) (SELECT o.id, - to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' ' ||array_to_string(array_agg(DISTINCT a.acronym), ' ','')) + to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' '||array_to_string(array_agg(DISTINCT a.acronym), ' ','')||' '||array_to_string(array_agg(DISTINCT u.url), ' ','')) FROM organizations o LEFT OUTER JOIN other_names n on (o.id = n.id) LEFT OUTER JOIN acronyms a on (o.id = a.id) + LEFT OUTER JOIN urls u on (o.id = u.id) WHERE o.id = new.id GROUP BY o.id, o.name) ON CONFLICT (id) DO UPDATE SET txt = EXCLUDED.txt; @@ -608,13 +619,3 @@ CREATE TRIGGER delete_index_search_trigger BEFORE DELETE ON o - - - - - - - - - - \ No newline at end of file