query optimization

This commit is contained in:
Michele Artini 2021-06-22 12:57:52 +02:00
parent 5dcce6d148
commit cf638dbd16
3 changed files with 46 additions and 22 deletions

View File

@ -62,9 +62,6 @@ public class OrganizationSimpleView implements Serializable, Comparable<Organiza
@Column(name = "n_different_dups")
private Long nDifferentDups;
@Column(name = "candidate_dup")
private Boolean candidateDup;
public OrganizationSimpleView() {}
public OrganizationSimpleView(final String id) {
@ -159,14 +156,6 @@ public class OrganizationSimpleView implements Serializable, Comparable<Organiza
this.nDifferentDups = nDifferentDups;
}
public Boolean getCandidateDup() {
return candidateDup;
}
public void setCandidateDup(final Boolean candidateDup) {
this.candidateDup = candidateDup;
}
@Override
public int hashCode() {
return Objects.hash(id);

View File

@ -25,14 +25,12 @@ public interface OrganizationSimpleViewRepository extends ReadOnlyRepository<Org
+ " array_remove(array_agg(DISTINCT u.url), NULL) AS urls,\n"
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_similar') AS n_similar_dups,\n"
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'suggested') AS n_suggested_dups,\n"
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_different') AS n_different_dups,\n"
+ " org.status = 'raw' AND NOT ('is_similar' = ANY (array_agg(d2.reltype))) AS candidate_dup\n"
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_different') AS n_different_dups\n"
+ "FROM org_index_search idx "
+ " JOIN organizations org ON (idx.id = org.id) \n"
+ " LEFT OUTER JOIN acronyms a ON org.id = a.id\n"
+ " LEFT OUTER JOIN urls u ON org.id = u.id\n"
+ " LEFT OUTER JOIN oa_duplicates d1 ON org.id = d1.local_id\n"
+ " LEFT OUTER JOIN oa_duplicates d2 ON org.id = d2.oa_original_id\n"
+ "WHERE idx.txt @@ plainto_tsquery(:text) AND org.status in :statuses\n"
+ "GROUP BY org.id, org.name, org.type, org.city, org.country, org.status\n"
+ "ORDER BY org.name", nativeQuery = true)
@ -50,14 +48,12 @@ public interface OrganizationSimpleViewRepository extends ReadOnlyRepository<Org
+ " array_remove(array_agg(DISTINCT u.url), NULL) AS urls,\n"
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_similar' ) AS n_similar_dups,\n"
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'suggested' ) AS n_suggested_dups,\n"
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_different') AS n_different_dups,\n"
+ " (org.status = 'raw' AND not('is_similar' = ANY(array_agg(d2.reltype)))) AS candidate_dup\n"
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_different') AS n_different_dups\n"
+ "FROM org_index_search idx\n"
+ " JOIN organizations org ON (idx.id = org.id)\n"
+ " LEFT OUTER JOIN acronyms a ON (org.id = a.id)\n"
+ " LEFT OUTER JOIN urls u ON (org.id = u.id)\n"
+ " LEFT OUTER JOIN oa_duplicates d1 ON (org.id = d1.local_id)\n"
+ " LEFT OUTER JOIN oa_duplicates d2 ON (org.id = d2.oa_original_id)\n"
+ " LEFT OUTER JOIN user_countries uc ON (uc.country = org.country) \n"
+ "WHERE idx.txt @@ plainto_tsquery(:text) AND uc.email = :email AND org.status IN :statuses \n"
+ "GROUP BY org.id, org.name, org.type, org.city, org.country, org.status\n"
@ -86,11 +82,52 @@ public interface OrganizationSimpleViewRepository extends ReadOnlyRepository<Org
Page<OrganizationSimpleView> findByTypeAndStatusForUser(String type, String status, String name, Pageable pageable);
// SEARCH FOR VALID DUPLICATE CANDIDATES
@Query(value = "select o.* from organizations_simple_view o left outer join org_index_search idx on (idx.id = o.id) where idx.txt @@ plainto_tsquery(:text) and o.candidate_dup order by o.name", nativeQuery = true)
@Query(value = "SELECT\n"
+ " org.id,\n"
+ " org.name,\n"
+ " org.type,\n"
+ " org.city,\n"
+ " org.country,\n"
+ " org.status,\n"
+ " array_remove(array_agg(DISTINCT a.acronym), NULL) AS acronyms,\n"
+ " array_remove(array_agg(DISTINCT u.url), NULL) AS urls,\n"
+ " NULL AS n_similar_dups,\n"
+ " NULL AS n_suggested_dups,\n"
+ " NULL AS n_different_dups\n"
+ "FROM org_index_search idx\n"
+ " JOIN organizations org ON (idx.id = org.id) \n"
+ " LEFT OUTER JOIN acronyms a ON (org.id = a.id)\n"
+ " LEFT OUTER JOIN urls u ON (org.id = u.id)\n"
+ " LEFT OUTER JOIN oa_duplicates d2 ON (org.id = d2.oa_original_id)\n"
+ "WHERE org.status = 'raw' AND idx.txt @@ plainto_tsquery(:text)\n"
+ "GROUP BY org.id, org.name, org.type, org.city, org.country, org.status\n"
+ "HAVING not('is_similar' = ANY(array_agg(d2.reltype)))\n"
+ "ORDER BY org.name", nativeQuery = true)
Page<OrganizationSimpleView> searchCandidateDuplicates(@Param("text") String text, Pageable pageable);
// SEARCH FOR VALID DUPLICATE CANDIDATES FOR USER
@Query(value = "select o.* from organizations_simple_view o left outer join org_index_search idx on (idx.id = o.id) left outer join user_countries uc on (uc.country = o.country) where idx.txt @@ plainto_tsquery(:text) and uc.email = :email and o.candidate_dup order by o.name", nativeQuery = true)
@Query(value = "SELECT\n"
+ " org.id,\n"
+ " org.name,\n"
+ " org.type,\n"
+ " org.city,\n"
+ " org.country,\n"
+ " org.status,\n"
+ " array_remove(array_agg(DISTINCT a.acronym), NULL) AS acronyms,\n"
+ " array_remove(array_agg(DISTINCT u.url), NULL) AS urls,\n"
+ " NULL AS n_similar_dups,\n"
+ " NULL AS n_suggested_dups,\n"
+ " NULL AS n_different_dups\n"
+ "FROM org_index_search idx\n"
+ " JOIN organizations org ON (idx.id = org.id) \n"
+ " LEFT OUTER JOIN acronyms a ON (org.id = a.id)\n"
+ " LEFT OUTER JOIN urls u ON (org.id = u.id)\n"
+ " LEFT OUTER JOIN oa_duplicates d2 ON (org.id = d2.oa_original_id)\n"
+ " LEFT OUTER JOIN user_countries uc ON (uc.country = org.country)\n"
+ "WHERE org.status = 'raw' AND uc.email = :email AND idx.txt @@ plainto_tsquery(:text)\n"
+ "GROUP BY org.id, org.name, org.type, org.city, org.country, org.status\n"
+ "HAVING not('is_similar' = ANY(array_agg(d2.reltype)))\n"
+ "ORDER BY org.name", nativeQuery = true)
Page<OrganizationSimpleView> searchCandidateDuplicatesForUser(@Param("text") String text, @Param("email") String email, Pageable pageable);
}

View File

@ -562,14 +562,12 @@ CREATE VIEW organizations_simple_view AS SELECT
array_remove(array_agg(DISTINCT u.url), NULL) AS urls,
count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_similar' ) AS n_similar_dups,
count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'suggested' ) AS n_suggested_dups,
count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_different') AS n_different_dups,
(org.status = 'raw' AND not('is_similar' = ANY(array_agg(d2.reltype)))) AS candidate_dup
count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_different') AS n_different_dups
FROM
organizations org
LEFT OUTER JOIN acronyms a ON (org.id = a.id)
LEFT OUTER JOIN urls u ON (org.id = u.id)
LEFT OUTER JOIN oa_duplicates d1 ON (org.id = d1.local_id)
LEFT OUTER JOIN oa_duplicates d2 ON (org.id = d2.oa_original_id)
GROUP BY
org.id,
org.name,