query optimization
This commit is contained in:
parent
5dcce6d148
commit
cf638dbd16
|
@ -62,9 +62,6 @@ public class OrganizationSimpleView implements Serializable, Comparable<Organiza
|
||||||
@Column(name = "n_different_dups")
|
@Column(name = "n_different_dups")
|
||||||
private Long nDifferentDups;
|
private Long nDifferentDups;
|
||||||
|
|
||||||
@Column(name = "candidate_dup")
|
|
||||||
private Boolean candidateDup;
|
|
||||||
|
|
||||||
public OrganizationSimpleView() {}
|
public OrganizationSimpleView() {}
|
||||||
|
|
||||||
public OrganizationSimpleView(final String id) {
|
public OrganizationSimpleView(final String id) {
|
||||||
|
@ -159,14 +156,6 @@ public class OrganizationSimpleView implements Serializable, Comparable<Organiza
|
||||||
this.nDifferentDups = nDifferentDups;
|
this.nDifferentDups = nDifferentDups;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Boolean getCandidateDup() {
|
|
||||||
return candidateDup;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setCandidateDup(final Boolean candidateDup) {
|
|
||||||
this.candidateDup = candidateDup;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(id);
|
return Objects.hash(id);
|
||||||
|
|
|
@ -25,14 +25,12 @@ public interface OrganizationSimpleViewRepository extends ReadOnlyRepository<Org
|
||||||
+ " array_remove(array_agg(DISTINCT u.url), NULL) AS urls,\n"
|
+ " array_remove(array_agg(DISTINCT u.url), NULL) AS urls,\n"
|
||||||
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_similar') AS n_similar_dups,\n"
|
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_similar') AS n_similar_dups,\n"
|
||||||
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'suggested') AS n_suggested_dups,\n"
|
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'suggested') AS n_suggested_dups,\n"
|
||||||
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_different') AS n_different_dups,\n"
|
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_different') AS n_different_dups\n"
|
||||||
+ " org.status = 'raw' AND NOT ('is_similar' = ANY (array_agg(d2.reltype))) AS candidate_dup\n"
|
|
||||||
+ "FROM org_index_search idx "
|
+ "FROM org_index_search idx "
|
||||||
+ " JOIN organizations org ON (idx.id = org.id) \n"
|
+ " JOIN organizations org ON (idx.id = org.id) \n"
|
||||||
+ " LEFT OUTER JOIN acronyms a ON org.id = a.id\n"
|
+ " LEFT OUTER JOIN acronyms a ON org.id = a.id\n"
|
||||||
+ " LEFT OUTER JOIN urls u ON org.id = u.id\n"
|
+ " LEFT OUTER JOIN urls u ON org.id = u.id\n"
|
||||||
+ " LEFT OUTER JOIN oa_duplicates d1 ON org.id = d1.local_id\n"
|
+ " LEFT OUTER JOIN oa_duplicates d1 ON org.id = d1.local_id\n"
|
||||||
+ " LEFT OUTER JOIN oa_duplicates d2 ON org.id = d2.oa_original_id\n"
|
|
||||||
+ "WHERE idx.txt @@ plainto_tsquery(:text) AND org.status in :statuses\n"
|
+ "WHERE idx.txt @@ plainto_tsquery(:text) AND org.status in :statuses\n"
|
||||||
+ "GROUP BY org.id, org.name, org.type, org.city, org.country, org.status\n"
|
+ "GROUP BY org.id, org.name, org.type, org.city, org.country, org.status\n"
|
||||||
+ "ORDER BY org.name", nativeQuery = true)
|
+ "ORDER BY org.name", nativeQuery = true)
|
||||||
|
@ -50,14 +48,12 @@ public interface OrganizationSimpleViewRepository extends ReadOnlyRepository<Org
|
||||||
+ " array_remove(array_agg(DISTINCT u.url), NULL) AS urls,\n"
|
+ " array_remove(array_agg(DISTINCT u.url), NULL) AS urls,\n"
|
||||||
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_similar' ) AS n_similar_dups,\n"
|
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_similar' ) AS n_similar_dups,\n"
|
||||||
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'suggested' ) AS n_suggested_dups,\n"
|
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'suggested' ) AS n_suggested_dups,\n"
|
||||||
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_different') AS n_different_dups,\n"
|
+ " count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_different') AS n_different_dups\n"
|
||||||
+ " (org.status = 'raw' AND not('is_similar' = ANY(array_agg(d2.reltype)))) AS candidate_dup\n"
|
|
||||||
+ "FROM org_index_search idx\n"
|
+ "FROM org_index_search idx\n"
|
||||||
+ " JOIN organizations org ON (idx.id = org.id)\n"
|
+ " JOIN organizations org ON (idx.id = org.id)\n"
|
||||||
+ " LEFT OUTER JOIN acronyms a ON (org.id = a.id)\n"
|
+ " LEFT OUTER JOIN acronyms a ON (org.id = a.id)\n"
|
||||||
+ " LEFT OUTER JOIN urls u ON (org.id = u.id)\n"
|
+ " LEFT OUTER JOIN urls u ON (org.id = u.id)\n"
|
||||||
+ " LEFT OUTER JOIN oa_duplicates d1 ON (org.id = d1.local_id)\n"
|
+ " LEFT OUTER JOIN oa_duplicates d1 ON (org.id = d1.local_id)\n"
|
||||||
+ " LEFT OUTER JOIN oa_duplicates d2 ON (org.id = d2.oa_original_id)\n"
|
|
||||||
+ " LEFT OUTER JOIN user_countries uc ON (uc.country = org.country) \n"
|
+ " LEFT OUTER JOIN user_countries uc ON (uc.country = org.country) \n"
|
||||||
+ "WHERE idx.txt @@ plainto_tsquery(:text) AND uc.email = :email AND org.status IN :statuses \n"
|
+ "WHERE idx.txt @@ plainto_tsquery(:text) AND uc.email = :email AND org.status IN :statuses \n"
|
||||||
+ "GROUP BY org.id, org.name, org.type, org.city, org.country, org.status\n"
|
+ "GROUP BY org.id, org.name, org.type, org.city, org.country, org.status\n"
|
||||||
|
@ -86,11 +82,52 @@ public interface OrganizationSimpleViewRepository extends ReadOnlyRepository<Org
|
||||||
Page<OrganizationSimpleView> findByTypeAndStatusForUser(String type, String status, String name, Pageable pageable);
|
Page<OrganizationSimpleView> findByTypeAndStatusForUser(String type, String status, String name, Pageable pageable);
|
||||||
|
|
||||||
// SEARCH FOR VALID DUPLICATE CANDIDATES
|
// SEARCH FOR VALID DUPLICATE CANDIDATES
|
||||||
@Query(value = "select o.* from organizations_simple_view o left outer join org_index_search idx on (idx.id = o.id) where idx.txt @@ plainto_tsquery(:text) and o.candidate_dup order by o.name", nativeQuery = true)
|
@Query(value = "SELECT\n"
|
||||||
|
+ " org.id,\n"
|
||||||
|
+ " org.name,\n"
|
||||||
|
+ " org.type,\n"
|
||||||
|
+ " org.city,\n"
|
||||||
|
+ " org.country,\n"
|
||||||
|
+ " org.status,\n"
|
||||||
|
+ " array_remove(array_agg(DISTINCT a.acronym), NULL) AS acronyms,\n"
|
||||||
|
+ " array_remove(array_agg(DISTINCT u.url), NULL) AS urls,\n"
|
||||||
|
+ " NULL AS n_similar_dups,\n"
|
||||||
|
+ " NULL AS n_suggested_dups,\n"
|
||||||
|
+ " NULL AS n_different_dups\n"
|
||||||
|
+ "FROM org_index_search idx\n"
|
||||||
|
+ " JOIN organizations org ON (idx.id = org.id) \n"
|
||||||
|
+ " LEFT OUTER JOIN acronyms a ON (org.id = a.id)\n"
|
||||||
|
+ " LEFT OUTER JOIN urls u ON (org.id = u.id)\n"
|
||||||
|
+ " LEFT OUTER JOIN oa_duplicates d2 ON (org.id = d2.oa_original_id)\n"
|
||||||
|
+ "WHERE org.status = 'raw' AND idx.txt @@ plainto_tsquery(:text)\n"
|
||||||
|
+ "GROUP BY org.id, org.name, org.type, org.city, org.country, org.status\n"
|
||||||
|
+ "HAVING not('is_similar' = ANY(array_agg(d2.reltype)))\n"
|
||||||
|
+ "ORDER BY org.name", nativeQuery = true)
|
||||||
Page<OrganizationSimpleView> searchCandidateDuplicates(@Param("text") String text, Pageable pageable);
|
Page<OrganizationSimpleView> searchCandidateDuplicates(@Param("text") String text, Pageable pageable);
|
||||||
|
|
||||||
// SEARCH FOR VALID DUPLICATE CANDIDATES FOR USER
|
// SEARCH FOR VALID DUPLICATE CANDIDATES FOR USER
|
||||||
@Query(value = "select o.* from organizations_simple_view o left outer join org_index_search idx on (idx.id = o.id) left outer join user_countries uc on (uc.country = o.country) where idx.txt @@ plainto_tsquery(:text) and uc.email = :email and o.candidate_dup order by o.name", nativeQuery = true)
|
@Query(value = "SELECT\n"
|
||||||
|
+ " org.id,\n"
|
||||||
|
+ " org.name,\n"
|
||||||
|
+ " org.type,\n"
|
||||||
|
+ " org.city,\n"
|
||||||
|
+ " org.country,\n"
|
||||||
|
+ " org.status,\n"
|
||||||
|
+ " array_remove(array_agg(DISTINCT a.acronym), NULL) AS acronyms,\n"
|
||||||
|
+ " array_remove(array_agg(DISTINCT u.url), NULL) AS urls,\n"
|
||||||
|
+ " NULL AS n_similar_dups,\n"
|
||||||
|
+ " NULL AS n_suggested_dups,\n"
|
||||||
|
+ " NULL AS n_different_dups\n"
|
||||||
|
+ "FROM org_index_search idx\n"
|
||||||
|
+ " JOIN organizations org ON (idx.id = org.id) \n"
|
||||||
|
+ " LEFT OUTER JOIN acronyms a ON (org.id = a.id)\n"
|
||||||
|
+ " LEFT OUTER JOIN urls u ON (org.id = u.id)\n"
|
||||||
|
+ " LEFT OUTER JOIN oa_duplicates d2 ON (org.id = d2.oa_original_id)\n"
|
||||||
|
+ " LEFT OUTER JOIN user_countries uc ON (uc.country = org.country)\n"
|
||||||
|
+ "WHERE org.status = 'raw' AND uc.email = :email AND idx.txt @@ plainto_tsquery(:text)\n"
|
||||||
|
+ "GROUP BY org.id, org.name, org.type, org.city, org.country, org.status\n"
|
||||||
|
+ "HAVING not('is_similar' = ANY(array_agg(d2.reltype)))\n"
|
||||||
|
+ "ORDER BY org.name", nativeQuery = true)
|
||||||
Page<OrganizationSimpleView> searchCandidateDuplicatesForUser(@Param("text") String text, @Param("email") String email, Pageable pageable);
|
Page<OrganizationSimpleView> searchCandidateDuplicatesForUser(@Param("text") String text, @Param("email") String email, Pageable pageable);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -562,14 +562,12 @@ CREATE VIEW organizations_simple_view AS SELECT
|
||||||
array_remove(array_agg(DISTINCT u.url), NULL) AS urls,
|
array_remove(array_agg(DISTINCT u.url), NULL) AS urls,
|
||||||
count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_similar' ) AS n_similar_dups,
|
count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_similar' ) AS n_similar_dups,
|
||||||
count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'suggested' ) AS n_suggested_dups,
|
count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'suggested' ) AS n_suggested_dups,
|
||||||
count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_different') AS n_different_dups,
|
count(DISTINCT d1.oa_original_id) FILTER (WHERE d1.reltype = 'is_different') AS n_different_dups
|
||||||
(org.status = 'raw' AND not('is_similar' = ANY(array_agg(d2.reltype)))) AS candidate_dup
|
|
||||||
FROM
|
FROM
|
||||||
organizations org
|
organizations org
|
||||||
LEFT OUTER JOIN acronyms a ON (org.id = a.id)
|
LEFT OUTER JOIN acronyms a ON (org.id = a.id)
|
||||||
LEFT OUTER JOIN urls u ON (org.id = u.id)
|
LEFT OUTER JOIN urls u ON (org.id = u.id)
|
||||||
LEFT OUTER JOIN oa_duplicates d1 ON (org.id = d1.local_id)
|
LEFT OUTER JOIN oa_duplicates d1 ON (org.id = d1.local_id)
|
||||||
LEFT OUTER JOIN oa_duplicates d2 ON (org.id = d2.oa_original_id)
|
|
||||||
GROUP BY
|
GROUP BY
|
||||||
org.id,
|
org.id,
|
||||||
org.name,
|
org.name,
|
||||||
|
|
Loading…
Reference in New Issue