fulltext indexing

This commit is contained in:
Michele Artini 2021-01-15 12:21:10 +01:00
parent ab1fc66bbd
commit 94277982e0
4 changed files with 42 additions and 19 deletions

View File

@ -18,4 +18,4 @@ select o.name from organizations o left outer join tmp_fulltext f on (o.id = f.i
# Nella risoluzione dei conflicts aggiungere i bottoni merge_all, all_different, merge_advanced
# Nella risoluzione dei conflicts a partire dai singoli record redirigere la pagina sul nuovo record invece che sul record di partenza
# Includere la url nell'idice fulltetxt
# verificare acronimo UNIBO
# Prepareare un metodo update fulltext index to be scheduled

View File

@ -37,4 +37,15 @@ public class OpenaireInternalApiController {
return Arrays.asList("Importing simrels (request from " + req.getRemoteAddr() + ") ...");
}
@GetMapping("/refresh/fulltextIndex")
public List<String> updateFulltextIndex(final HttpServletRequest req) {
if (req.getRemoteAddr().equals(httpsProxy)) {
log.warn("Call received by blaklisted ip (https proxy): " + req.getRemoteAddr());
throw new RuntimeException("Call received by blaklisted ip (https proxy): " + req.getRemoteAddr());
}
new Thread(databaseUtils::updateFulltextIndex).start();
return Arrays.asList("Updating ...");
}
}

View File

@ -320,6 +320,17 @@ public class DatabaseUtils {
}
}
@Transactional
public void updateFulltextIndex() {
try {
log.info("Updating Fulltext Index...");
jdbcTemplate.queryForList("SELECT refresh_index_search()");
log.info("...done");
} catch (final Exception e) {
log.error("Error updating Fulltext Index", e);
}
}
private void updateStatus(final String id, final OrganizationStatus status, final String user, final OffsetDateTime now) {
organizationRepository.updateStatus(id, status.toString());
organizationRepository.updateModificationDate(id, user, now);

View File

@ -567,13 +567,23 @@ ORDER BY o.name;
CREATE TABLE org_index_search(id text PRIMARY KEY, txt tsvector);
CREATE INDEX org_index_search_txt_idx ON org_index_search(txt);
INSERT INTO org_index_search(id, txt) SELECT
o.id,
to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' ' ||array_to_string(array_agg(DISTINCT a.acronym), ' ',''))
FROM organizations o
LEFT OUTER JOIN other_names n on (o.id = n.id)
LEFT OUTER JOIN acronyms a on (o.id = a.id)
GROUP BY o.id, o.name;
CREATE OR REPLACE FUNCTION refresh_index_search() RETURNS bigint AS $$
DELETE FROM org_index_search;
WITH d as (
INSERT INTO org_index_search(id, txt) SELECT
o.id,
to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' '||array_to_string(array_agg(DISTINCT a.acronym), ' ','')||' '||array_to_string(array_agg(DISTINCT u.url), ' ',''))
FROM organizations o
LEFT OUTER JOIN other_names n on (o.id = n.id)
LEFT OUTER JOIN acronyms a on (o.id = a.id)
LEFT OUTER JOIN urls u on (o.id = u.id)
GROUP BY o.id, o.name RETURNING *
) SELECT COUNT(*) FROM d;
$$ LANGUAGE SQL;
SELECT refresh_index_search();
CREATE OR REPLACE FUNCTION delete_index_search() RETURNS trigger LANGUAGE plpgsql AS $$
@ -587,10 +597,11 @@ CREATE OR REPLACE FUNCTION insert_or_update_index_search_trigger() RETURNS trigg
BEGIN
INSERT INTO org_index_search(id, txt) (SELECT
o.id,
to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' ' ||array_to_string(array_agg(DISTINCT a.acronym), ' ',''))
to_tsvector(o.id||' '||o.name||' '||array_to_string(array_agg(DISTINCT n.name), ' ','')||' '||array_to_string(array_agg(DISTINCT a.acronym), ' ','')||' '||array_to_string(array_agg(DISTINCT u.url), ' ',''))
FROM organizations o
LEFT OUTER JOIN other_names n on (o.id = n.id)
LEFT OUTER JOIN acronyms a on (o.id = a.id)
LEFT OUTER JOIN urls u on (o.id = u.id)
WHERE o.id = new.id
GROUP BY o.id, o.name)
ON CONFLICT (id) DO UPDATE SET txt = EXCLUDED.txt;
@ -608,13 +619,3 @@ CREATE TRIGGER delete_index_search_trigger BEFORE DELETE ON o