From 306defc3455e4914b3750aa0f5fd065f3b8f3773 Mon Sep 17 00:00:00 2001 From: Costantino Perciante Date: Sun, 15 Jan 2017 18:50:20 +0000 Subject: [PATCH] added method to query a single field. Minor methods/classes'names fix git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/trunk/social-networking/social-data-search-client@141582 82a268e6-3cf1-43bd-a215-b396298e98cf --- distro/changelog.xml | 4 + pom.xml | 2 +- .../ElasticSearchClientInterface.java | 34 ------- .../GCubeSearchClient.java | 45 +++++++++ ...ntImpl.java => GCubeSearchClientImpl.java} | 97 +++++++++++++++---- 5 files changed, 127 insertions(+), 55 deletions(-) delete mode 100644 src/main/java/org/gcube/socialnetworking/social_data_search_client/ElasticSearchClientInterface.java create mode 100644 src/main/java/org/gcube/socialnetworking/social_data_search_client/GCubeSearchClient.java rename src/main/java/org/gcube/socialnetworking/social_data_search_client/{ElasticSearchClientImpl.java => GCubeSearchClientImpl.java} (65%) diff --git a/distro/changelog.xml b/distro/changelog.xml index 9209707..0d935ef 100644 --- a/distro/changelog.xml +++ b/distro/changelog.xml @@ -1,4 +1,8 @@ + + Added method to query a single field + Minor bug fixes diff --git a/pom.xml b/pom.xml index 1152a60..1275f01 100644 --- a/pom.xml +++ b/pom.xml @@ -11,7 +11,7 @@ org.gcube.socialnetworking social-data-search-client - 1.1.0-SNAPSHOT + 1.2.0-SNAPSHOT jar Social Data Search Client diff --git a/src/main/java/org/gcube/socialnetworking/social_data_search_client/ElasticSearchClientInterface.java b/src/main/java/org/gcube/socialnetworking/social_data_search_client/ElasticSearchClientInterface.java deleted file mode 100644 index dd082be..0000000 --- a/src/main/java/org/gcube/socialnetworking/social_data_search_client/ElasticSearchClientInterface.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.gcube.socialnetworking.social_data_search_client; - -import java.util.List; -import java.util.Set; - -import org.gcube.portal.databook.shared.EnhancedFeed; - -/** - * The ElasticSearch client interface. - * @author Costantino Perciante at ISTI-CNR - * (costantino.perciante@isti.cnr.it) - * - */ -public interface ElasticSearchClientInterface { - - /** - * Given a query, the method find matching enhanced feeds into the elasticsearch index and return - * at most quantity hits starting from from. - * @param query the query to match - * @param vreIDS specifies the vre(s) to which the returning feeds must belong - * @param from start hits index - * @param quantity max number of hits to return starting from from - * @return A list of matching enhanced feeds or nothing - */ - List searchInEnhancedFeeds(String query, Set vreIDS, int from, int quantity); - - /** - * Delete from the index a document with id docID. - * @param docID the id of the doc to delete - * @return true on success, false otherwise - */ - boolean deleteDocument(String docID); - -} diff --git a/src/main/java/org/gcube/socialnetworking/social_data_search_client/GCubeSearchClient.java b/src/main/java/org/gcube/socialnetworking/social_data_search_client/GCubeSearchClient.java new file mode 100644 index 0000000..4ea8293 --- /dev/null +++ b/src/main/java/org/gcube/socialnetworking/social_data_search_client/GCubeSearchClient.java @@ -0,0 +1,45 @@ +package org.gcube.socialnetworking.social_data_search_client; + +import java.util.List; +import java.util.Set; +import org.gcube.socialnetworking.social_data_indexing_common.utils.SearchableFields; +import org.gcube.portal.databook.shared.EnhancedFeed; + +/** + * The GCubeSearchClient client interface to search in social data. + * @author Costantino Perciante at ISTI-CNR (costantino.perciante@isti.cnr.it) + */ +public interface GCubeSearchClient { + + /** + * Given a query, the method find matching enhanced feeds into the elasticsearch index and return + * at most quantity hits starting from from. A multimatch query is performed against all + * searchable fields. + * @param query the query to match + * @param vreIDS specifies the vre(s) to which the returning feeds must belong + * @param from start hits index + * @param quantity max number of hits to return starting from from + * @return A list of matching enhanced feeds or nothing + */ + List search(String query, Set vreIDS, int from, int quantity); + + /** + * Given a query, the method find matching enhanced feeds into the elasticsearch index and return + * at most quantity hits starting from from. The query is performed against one of the searchable fields. + * @param query the query to match + * @param vreIDS specifies the vre(s) to which the returning feeds must belong + * @param from start hits index + * @param quantity max number of hits to return starting from from + * @param field the field against which the query is performed + * @return A list of matching enhanced feeds or nothing + */ + List searchInField(String query, Set vreIDS, int from, int quantity, SearchableFields field); + + /** + * Delete from the index a document with id docID. + * @param docID the id of the doc to delete + * @return true on success, false otherwise + */ + boolean deleteDocument(String docID); + +} diff --git a/src/main/java/org/gcube/socialnetworking/social_data_search_client/ElasticSearchClientImpl.java b/src/main/java/org/gcube/socialnetworking/social_data_search_client/GCubeSearchClientImpl.java similarity index 65% rename from src/main/java/org/gcube/socialnetworking/social_data_search_client/ElasticSearchClientImpl.java rename to src/main/java/org/gcube/socialnetworking/social_data_search_client/GCubeSearchClientImpl.java index 07bff14..84f9318 100644 --- a/src/main/java/org/gcube/socialnetworking/social_data_search_client/ElasticSearchClientImpl.java +++ b/src/main/java/org/gcube/socialnetworking/social_data_search_client/GCubeSearchClientImpl.java @@ -6,7 +6,7 @@ import java.net.UnknownHostException; import java.util.ArrayList; import java.util.List; import java.util.Set; - +import org.gcube.socialnetworking.social_data_indexing_common.utils.SearchableFields; import org.codehaus.jackson.map.ObjectMapper; import org.elasticsearch.action.delete.DeleteResponse; import org.elasticsearch.action.search.SearchResponse; @@ -20,22 +20,18 @@ import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.TermsQueryBuilder; import org.elasticsearch.search.SearchHit; import org.gcube.portal.databook.shared.EnhancedFeed; -import org.gcube.socialnetworking.social_data_indexing_common.utils.ElasticSearchRunningCluster; -import org.gcube.socialnetworking.social_data_indexing_common.utils.IndexFields; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * The elasticsearch client for gcube portlets. - * @author Costantino Perciante at ISTI-CNR - * (costantino.perciante@isti.cnr.it) - * + * @author Costantino Perciante at ISTI-CNR (costantino.perciante@isti.cnr.it) */ -public class ElasticSearchClientImpl implements ElasticSearchClientInterface{ +public class GCubeSearchClientImpl implements GCubeSearchClient{ //logger - public static final Logger logger = LoggerFactory.getLogger(ElasticSearchClientImpl.class); + public static final Logger logger = LoggerFactory.getLogger(GCubeSearchClientImpl.class); // the elasticsearch client private TransportClient client; @@ -54,7 +50,7 @@ public class ElasticSearchClientImpl implements ElasticSearchClientInterface{ * @param scope the scope in the infrastructure * @throws Exception */ - public ElasticSearchClientImpl(String scope) throws Exception { + public GCubeSearchClientImpl(String scope) throws Exception { // retrieve ElasticSearch Endpoint and set hosts/port number ElasticSearchRunningCluster elasticCluster = new ElasticSearchRunningCluster(scope); @@ -96,34 +92,33 @@ public class ElasticSearchClientImpl implements ElasticSearchClientInterface{ } } - - logger.debug("Connection to ElasticSearch cluster done."); + logger.info("Connection to ElasticSearch cluster done."); } @Override - public List searchInEnhancedFeeds(String query, Set vreIDS, int from, int quantity){ + public List search(String query, Set vreIDS, int from, int quantity){ List toReturn = new ArrayList<>(); - if(from < 0 || quantity <= 0) + if(from < 0 || quantity <= 0 || vreIDS.isEmpty()) return toReturn; // build the query MultiMatchQueryBuilder mq = QueryBuilders.multiMatchQuery( query, - IndexFields.EF_FEED_AUTHOR + "^3", // weight of 3 for feed's author - IndexFields.EF_FEED_TEXT + "^2", // weight of 2 for feed's description - IndexFields.EF_ATTACHMENT_NAME, - IndexFields.EF_PREVIEW_DESCRIPTION, - IndexFields.EF_COMMENT_TEXT + "^2", - IndexFields.EF_COMMENT_FULL_NAME) + SearchableFields.POST_AUTHOR + "^3", // weight of 3 for feed's author + SearchableFields.POST_TEXT + "^2", // weight of 2 for feed's description + SearchableFields.ATTACHMENT_NAME, + SearchableFields.PREVIEW_DESCRIPTION, + SearchableFields.COMMENT_TEXT + "^2", + SearchableFields.COMMENT_AUTHOR) .type(Type.MOST_FIELDS); // logger.debug(mq.toString()); // filter on vre BoolQueryBuilder filter = QueryBuilders.boolQuery(); - TermsQueryBuilder queryFilter = QueryBuilders.termsQuery(IndexFields.EF_FEED_VRE_ID, vreIDS); + TermsQueryBuilder queryFilter = QueryBuilders.termsQuery(SearchableFields.POST_VRE_ID, vreIDS); filter.should(queryFilter); //logger.debug(filter.toString()); @@ -167,6 +162,68 @@ public class ElasticSearchClientImpl implements ElasticSearchClientInterface{ logger.debug("Returning " + toReturn.size() + " results"); return toReturn; } + + @Override + public List searchInField(String query, Set vreIDS, + int from, int quantity, SearchableFields field) { + + List toReturn = new ArrayList<>(); + + if(from < 0 || quantity <= 0 || field == null || vreIDS.isEmpty()) + return toReturn; + + // build the query + MultiMatchQueryBuilder mq = QueryBuilders.matchQuery(field, query); + + // logger.debug(mq.toString()); + + // filter on vre + BoolQueryBuilder filter = QueryBuilders.boolQuery(); + TermsQueryBuilder queryFilter = QueryBuilders.termsQuery(SearchableFields.POST_VRE_ID, vreIDS); + filter.should(queryFilter); + + //logger.debug(filter.toString()); + + // final filtered query + BoolQueryBuilder filteredQuery = QueryBuilders.boolQuery(); + filteredQuery.must(mq); + filteredQuery.filter(filter); + + //logger.debug(filteredQuery.toString()); + + SearchResponse response = client.prepareSearch(IndexFields.INDEX_NAME) + .setQuery(filteredQuery) + .setFrom(from) + .setSize(quantity) + .setExplain(true) + .execute() + .actionGet(); + + logger.debug("The search took " + response.getTookInMillis() + " ms"); + + SearchHit[] results = response.getHits().getHits(); + + logger.debug("Number of hits is " + results.length); + + ObjectMapper mapper = new ObjectMapper(); + + // rebuild objects + for (SearchHit hit : results) { + EnhancedFeed enhFeed; + try { + + enhFeed = mapper.readValue(hit.getSourceAsString(), EnhancedFeed.class); + toReturn.add(enhFeed); + + } catch (IOException e) { + logger.error(e.toString()); + } + } + + logger.debug("Returning " + toReturn.size() + " results"); + return toReturn; + + } @Override public boolean deleteDocument(String docID) {