From 8874f2154cb3c3b9c24dd61b36c8afcc709461ae Mon Sep 17 00:00:00 2001 From: Costantino Perciante Date: Fri, 12 Jan 2018 16:05:56 +0000 Subject: [PATCH] added code for getting statistics (i.e. num types, num orgs, num groups and num items) as well as the landing pages of such information git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-catalogue/ckan-util-library@162131 82a268e6-3cf1-43bd-a215-b396298e98cf --- pom.xml | 5 ++ .../ckanutillibrary/server/DataCatalogue.java | 8 ++ .../server/DataCatalogueImpl.java | 90 ++++++++++++++++--- .../server/DataCatalogueRunningCluster.java | 33 +++++-- .../ckanutillibrary/shared/Statistics.java | 84 +++++++++++++++++ .../server/TestDataCatalogueLib.java | 19 ++-- 6 files changed, 215 insertions(+), 24 deletions(-) create mode 100644 src/main/java/org/gcube/datacatalogue/ckanutillibrary/shared/Statistics.java diff --git a/pom.xml b/pom.xml index 4ec2051..6f47b2c 100644 --- a/pom.xml +++ b/pom.xml @@ -130,6 +130,11 @@ 1.1.1 compile + + org.apache.solr + solr-solrj + [4.10.2,5.0.0) + diff --git a/src/main/java/org/gcube/datacatalogue/ckanutillibrary/server/DataCatalogue.java b/src/main/java/org/gcube/datacatalogue/ckanutillibrary/server/DataCatalogue.java index ad71c92..22e5ffc 100644 --- a/src/main/java/org/gcube/datacatalogue/ckanutillibrary/server/DataCatalogue.java +++ b/src/main/java/org/gcube/datacatalogue/ckanutillibrary/server/DataCatalogue.java @@ -10,6 +10,7 @@ import org.gcube.datacatalogue.ckanutillibrary.shared.CkanDatasetRelationship; import org.gcube.datacatalogue.ckanutillibrary.shared.DatasetRelationships; import org.gcube.datacatalogue.ckanutillibrary.shared.ResourceBean; import org.gcube.datacatalogue.ckanutillibrary.shared.RolesCkanGroupOrOrg; +import org.gcube.datacatalogue.ckanutillibrary.shared.Statistics; import org.json.simple.JSONObject; import eu.trentorise.opendata.jackan.model.CkanDataset; @@ -23,6 +24,13 @@ import eu.trentorise.opendata.jackan.model.CkanResource; * @author Costantino Perciante at ISTI-CNR (costantino.perciante@isti.cnr.it) */ public interface DataCatalogue { + + /** + * Returns the statistics for this catalogue + * @return + * @throws Exception + */ + Statistics getStatistics() throws Exception; /** * Retrieve the API_KEY given the username (only if it is active). diff --git a/src/main/java/org/gcube/datacatalogue/ckanutillibrary/server/DataCatalogueImpl.java b/src/main/java/org/gcube/datacatalogue/ckanutillibrary/server/DataCatalogueImpl.java index c476795..75cb59f 100644 --- a/src/main/java/org/gcube/datacatalogue/ckanutillibrary/server/DataCatalogueImpl.java +++ b/src/main/java/org/gcube/datacatalogue/ckanutillibrary/server/DataCatalogueImpl.java @@ -28,6 +28,11 @@ import net.htmlparser.jericho.Renderer; import net.htmlparser.jericho.Segment; import net.htmlparser.jericho.Source; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.impl.HttpSolrServer; +import org.apache.solr.client.solrj.response.FacetField; +import org.apache.solr.client.solrj.response.FacetField.Count; +import org.apache.solr.client.solrj.response.QueryResponse; import org.gcube.datacatalogue.ckanutillibrary.server.utils.UtilMethods; import org.gcube.datacatalogue.ckanutillibrary.server.utils.url.EntityContext; import org.gcube.datacatalogue.ckanutillibrary.shared.CKanUserWrapper; @@ -36,6 +41,7 @@ import org.gcube.datacatalogue.ckanutillibrary.shared.DatasetRelationships; import org.gcube.datacatalogue.ckanutillibrary.shared.ResourceBean; import org.gcube.datacatalogue.ckanutillibrary.shared.RolesCkanGroupOrOrg; import org.gcube.datacatalogue.ckanutillibrary.shared.State; +import org.gcube.datacatalogue.ckanutillibrary.shared.Statistics; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.json.simple.JSONValue; @@ -83,6 +89,7 @@ public class DataCatalogueImpl implements DataCatalogue{ private String CKAN_DB_URL; private Integer CKAN_DB_PORT; private String PORTLET_URL_FOR_SCOPE; + private String SOLR_URL; private String CKAN_TOKEN_SYS; private String URI_RESOLVER_URL; private boolean MANAGE_PRODUCT_BUTTON; @@ -133,6 +140,7 @@ public class DataCatalogueImpl implements DataCatalogue{ MANAGE_PRODUCT_BUTTON = runningInstance.isManageProductEnabled(); URI_RESOLVER_URL = runningInstance.getUrlResolver(); ALERT_USERS_ON_POST_CREATION = runningInstance.isAlertEnabled(); + SOLR_URL = runningInstance.getUrlSolr(); logger.debug("Plain sys admin token first 3 chars are " + CKAN_TOKEN_SYS.substring(0, 3)); logger.debug("Plain db password first 3 chars are " + CKAN_DB_PASSWORD.substring(0, 3)); @@ -2858,22 +2866,22 @@ public class DataCatalogueImpl implements DataCatalogue{ } } - + @Override public List searchForPackage(String apiKey, String query, int start, int offset) throws Exception{ - + checkNotNull(apiKey); checkNotNull(query); checkArgument(start >= 0); checkArgument(offset >= 0); - + CheckedCkanClient client = new CheckedCkanClient(CKAN_CATALOGUE_URL, apiKey); - + try{ - + CkanQuery queryCkan = CkanQuery.filter().byText(query); return client.searchDatasets(queryCkan, offset, start).getResults(); - + }catch(Exception e){ logger.error("Error while executing query", e); throw e; @@ -2882,23 +2890,83 @@ public class DataCatalogueImpl implements DataCatalogue{ @Override public List searchForPackageInOrganization(String apiKey, String query, int start, int offset, String organization) throws Exception{ - + checkNotNull(apiKey); checkNotNull(query); checkArgument(start >= 0); checkArgument(offset >= 0); - + CheckedCkanClient client = new CheckedCkanClient(CKAN_CATALOGUE_URL, apiKey); - + try{ - + CkanQuery queryCkan = CkanQuery.filter().byOrganizationName(organization.toLowerCase()).byText(query); return client.searchDatasets(queryCkan, offset, start).getResults(); - + }catch(Exception e){ logger.error("Error while executing query", e); throw e; } } + @Override + public Statistics getStatistics() throws Exception { + Statistics stats = new Statistics(); + + int numGroups = getGroups().size(); + int numOrganizations = getOrganizationsNames().size(); + SOLR_URL = "https://ckan-d-d4s.d4science.org/solr/collection1"; + + logger.debug("SOLR address is " + SOLR_URL); + HttpSolrServer solr = new HttpSolrServer(SOLR_URL); + + // ask solr for the current counter of the dataset -> dataset_type:"dataset" AND state:"active" + SolrQuery queryNumItems = new SolrQuery(); + queryNumItems.setRows(0); // do not require data + queryNumItems.setQuery("dataset_type:\"dataset\" AND state:\"active\""); + long numItems; + try{ + QueryResponse response = solr.query(queryNumItems); + numItems = response.getResults().getNumFound(); + }catch(Exception e){ + logger.error("Failed to retrieve the number of items", e); + throw e; + } + + // ask solr for types + SolrQuery queryNumTypes = new SolrQuery("dataset_type:\"dataset\" AND state:\"active\""); + queryNumTypes.addFacetField("systemtype"); + queryNumItems.setRows(0); // do not require data + queryNumTypes.setFacet(true); + long numTypes = 0; + try{ + QueryResponse response = solr.query(queryNumTypes); + Set notEmptyTypes = new HashSet(); + List facet = response.getFacetFields(); + for (FacetField facetField : facet) { + List values = facetField.getValues(); // the different types, even the ones with zero datasets associated + for (Count count : values) { + if(count.getCount() > 0) + notEmptyTypes.add(count.getName()); + } + } + numTypes = notEmptyTypes.size(); + }catch(Exception e){ + logger.error("Failed to retrieve the number of types", e); + throw e; + } + + // build the urls + stats.setNumGroups(numGroups); + stats.setNumItems(numItems); + stats.setNumOrganizations(numOrganizations); + stats.setNumTypes(numTypes); + stats.setUrlGroups(PORTLET_URL_FOR_SCOPE + "?path=/group/"); + stats.setUrlItems(PORTLET_URL_FOR_SCOPE + "?path=/item/"); + stats.setUrlOrganizations(PORTLET_URL_FOR_SCOPE + "?path=/organization/"); + stats.setUrlTypes(PORTLET_URL_FOR_SCOPE + "?path=/type/"); + + return stats; + } + } \ No newline at end of file diff --git a/src/main/java/org/gcube/datacatalogue/ckanutillibrary/server/DataCatalogueRunningCluster.java b/src/main/java/org/gcube/datacatalogue/ckanutillibrary/server/DataCatalogueRunningCluster.java index c99cf43..ae98d59 100644 --- a/src/main/java/org/gcube/datacatalogue/ckanutillibrary/server/DataCatalogueRunningCluster.java +++ b/src/main/java/org/gcube/datacatalogue/ckanutillibrary/server/DataCatalogueRunningCluster.java @@ -63,6 +63,7 @@ public class DataCatalogueRunningCluster { private final static String IS_MASTER_ROOT_KEY_PROPERTY = "IS_ROOT_MASTER"; // true, false.. missing means false as well private final static String IS_MANAGE_PRODUCT_ENABLED = "IS_MANAGE_PRODUCT_ENABLED"; // true, false.. missing means false as well (for GRSF records) private final static String ALERT_USERS_ON_POST_CREATION = "ALERT_USERS_ON_POST_CREATION"; + private final static String SOLR_INDEX_ADDRESS = "SOLR_INDEX_ADDRESS"; // url of the http uri for this scope private final static String URL_RESOLVER = "URL_RESOLVER"; @@ -78,11 +79,12 @@ public class DataCatalogueRunningCluster { private String userDB; private String passwordDB; private String portletUrl; + private String urlSolr; private boolean manageProductEnabled; private String urlResolver; private boolean alertUsers; private static Map extendRoleInOrganization = new HashMap(0); - + // generic role key public static final String CKAN_GENERIC_ROLE = "*"; public static final String ROLE_ORGANIZATION_SEPARATOR = "|"; @@ -91,6 +93,8 @@ public class DataCatalogueRunningCluster { // this token is needed in order to assign roles to user private String sysAdminToken; + + public DataCatalogueRunningCluster(String scope) throws Exception{ if(scope == null || scope.isEmpty()) @@ -216,7 +220,7 @@ public class DataCatalogueRunningCluster { manageProductEnabled = true; } - // retrieve URL_RESOLVER + // retrieve option for user alert if(accessPoint.propertyMap().containsKey(ALERT_USERS_ON_POST_CREATION)) if(accessPoint.propertyMap().get(ALERT_USERS_ON_POST_CREATION).value().trim().equalsIgnoreCase("true")) alertUsers = true; @@ -224,6 +228,11 @@ public class DataCatalogueRunningCluster { // retrieve URL_RESOLVER if(accessPoint.propertyMap().containsKey(URL_RESOLVER)) urlResolver = accessPoint.propertyMap().get(URL_RESOLVER).value(); + + // retrieve url of the solr index for further queries + if(accessPoint.propertyMap().containsKey(SOLR_INDEX_ADDRESS)) + urlSolr = accessPoint.propertyMap().get(SOLR_INDEX_ADDRESS).value(); + } } @@ -415,7 +424,7 @@ public class DataCatalogueRunningCluster { DiscoveryClient client = client(); List appProfile = client.submit(q); - + logger.debug("Resource for extending role has size " + appProfile.size()); if (appProfile == null || appProfile.size() == 0) @@ -446,12 +455,12 @@ public class DataCatalogueRunningCluster { continue; }else{ String currentValueForKey = extendRoleInOrganization.get(sourceOrg); - + if(currentValueForKey == null) currentValueForKey = ""; else currentValueForKey += TUPLES_SEPARATOR; - + currentValueForKey += destOrg + ROLE_ORGANIZATION_SEPARATOR + role; extendRoleInOrganization.put(sourceOrg, currentValueForKey); } @@ -460,10 +469,10 @@ public class DataCatalogueRunningCluster { } } - + logger.debug("Extended role map in this scope is " + extendRoleInOrganization); } - + /** Retrieve the ckan portlet url * @return the portletUrl */ @@ -558,4 +567,14 @@ public class DataCatalogueRunningCluster { return extendRoleInOrganization; } + /** + * Get the solr index base url + * @return + */ + public String getUrlSolr() { + return urlSolr; + } + + + } diff --git a/src/main/java/org/gcube/datacatalogue/ckanutillibrary/shared/Statistics.java b/src/main/java/org/gcube/datacatalogue/ckanutillibrary/shared/Statistics.java new file mode 100644 index 0000000..261a518 --- /dev/null +++ b/src/main/java/org/gcube/datacatalogue/ckanutillibrary/shared/Statistics.java @@ -0,0 +1,84 @@ +package org.gcube.datacatalogue.ckanutillibrary.shared; + +import java.io.Serializable; + +/** + * This bean offers the following statistics and information: + *
    + *
  • number of items of the catalogue plus landing page url to the items page (within the portlet) + *
  • number of organizations of the catalogue plus landing page url to the orgs page (within the portlet) + *
  • number of groups of the catalogue plus landing page url to the groups page (within the portlet) + *
  • number of types of the catalogue plus landing page url to the types page (within the portlet) + *
+ * @author Costantino Perciante at ISTI-CNR (costantino.perciante@isti.cnr.it) + */ +public class Statistics implements Serializable{ + + private static final long serialVersionUID = 2871906712366452266L; + private long numTypes; + private long numOrganizations; + private long numGroups; + private long numItems; + private String urlTypes; + private String urlOrganizations; + private String urlGroups; + private String urlItems; + + public String getUrlTypes() { + return urlTypes; + } + public void setUrlTypes(String urlTypes) { + this.urlTypes = urlTypes; + } + public String getUrlOrganizations() { + return urlOrganizations; + } + public void setUrlOrganizations(String urlOrganizations) { + this.urlOrganizations = urlOrganizations; + } + public String getUrlGroups() { + return urlGroups; + } + public void setUrlGroups(String urlGroups) { + this.urlGroups = urlGroups; + } + public String getUrlItems() { + return urlItems; + } + public void setUrlItems(String urlItems) { + this.urlItems = urlItems; + } + public long getNumTypes() { + return numTypes; + } + public void setNumTypes(long numTypes) { + this.numTypes = numTypes; + } + public long getNumOrganizations() { + return numOrganizations; + } + public void setNumOrganizations(long numOrganizations) { + this.numOrganizations = numOrganizations; + } + public long getNumGroups() { + return numGroups; + } + public void setNumGroups(long numGroups) { + this.numGroups = numGroups; + } + public long getNumItems() { + return numItems; + } + public void setNumItems(long numItems) { + this.numItems = numItems; + } + @Override + public String toString() { + return "CatalogueHomeStats [numTypes=" + numTypes + + ", numOrganizations=" + numOrganizations + ", numGroups=" + + numGroups + ", numItems=" + numItems + ", urlTypes=" + + urlTypes + ", urlOrganizations=" + urlOrganizations + + ", urlGroups=" + urlGroups + ", urlItems=" + urlItems + "]"; + } + +} diff --git a/src/test/java/org/gcube/datacatalogue/ckanutillibrary/server/TestDataCatalogueLib.java b/src/test/java/org/gcube/datacatalogue/ckanutillibrary/server/TestDataCatalogueLib.java index 56fb7e9..d52ad87 100644 --- a/src/test/java/org/gcube/datacatalogue/ckanutillibrary/server/TestDataCatalogueLib.java +++ b/src/test/java/org/gcube/datacatalogue/ckanutillibrary/server/TestDataCatalogueLib.java @@ -15,7 +15,6 @@ import org.gcube.datacatalogue.ckanutillibrary.shared.CKanUserWrapper; import org.gcube.datacatalogue.ckanutillibrary.shared.CkanDatasetRelationship; import org.gcube.datacatalogue.ckanutillibrary.shared.DatasetRelationships; import org.gcube.datacatalogue.ckanutillibrary.shared.RolesCkanGroupOrOrg; -import org.junit.Test; import org.slf4j.LoggerFactory; import eu.trentorise.opendata.jackan.CheckedCkanClient; @@ -44,13 +43,21 @@ public class TestDataCatalogueLib { } + //@Test + public void getStatistics() throws Exception{ + + DataCatalogueImpl utils = factory.getUtilsPerScope(scope); + logger.debug("Statistics " + utils.getStatistics()); + + } + //@Test public void getDatasetIdsFromDB() throws Exception{ DataCatalogueImpl utils = factory.getUtilsPerScope(scope); List ids = utils.getProductsIdsInGroupOrOrg("aquamaps", true, 0, Integer.MAX_VALUE); logger.debug("Size is " + ids.size()); } - + //@Test public void searchInOrganization() throws Exception{ DataCatalogueImpl utils = factory.getUtilsPerScope(scope); @@ -58,8 +65,8 @@ public class TestDataCatalogueLib { List matches = utils.searchForPackageInOrganization(apiKey, "\"asfis:HMC+eez:AGO;FAO+grsf-org:INT+eez:AGO;RFB+iso3:AGO+isscfg:01.1.1\"", 0, 10, "grsf_admin"); logger.debug("Size is " + matches.size()); } - - @Test + + //@Test public void search() throws Exception{ DataCatalogueImpl utils = factory.getUtilsPerScope(scope); String apiKey = utils.getApiKeyFromUsername(testUser); @@ -493,12 +500,12 @@ public class TestDataCatalogueLib { long end = System.currentTimeMillis(); logger.debug("Time taken " + (end - init)); } - + //@Test public void getHigher(){ logger.debug("Max is " + RolesCkanGroupOrOrg.getHigher(RolesCkanGroupOrOrg.ADMIN, RolesCkanGroupOrOrg.ADMIN)); } - + //@Test public void getUrlProduct() throws Exception{ DataCatalogueImpl instance = factory.getUtilsPerScope(scope);