added code for getting statistics (i.e. num types, num orgs, num groups and num items) as well as the landing pages of such information

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-catalogue/ckan-util-library@162131 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Costantino Perciante 2018-01-12 16:05:56 +00:00
parent 35968f0fc6
commit 8874f2154c
6 changed files with 215 additions and 24 deletions

View File

@ -130,6 +130,11 @@
<version>1.1.1</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-solrj</artifactId>
<version>[4.10.2,5.0.0)</version>
</dependency>
</dependencies>
<build>

View File

@ -10,6 +10,7 @@ import org.gcube.datacatalogue.ckanutillibrary.shared.CkanDatasetRelationship;
import org.gcube.datacatalogue.ckanutillibrary.shared.DatasetRelationships;
import org.gcube.datacatalogue.ckanutillibrary.shared.ResourceBean;
import org.gcube.datacatalogue.ckanutillibrary.shared.RolesCkanGroupOrOrg;
import org.gcube.datacatalogue.ckanutillibrary.shared.Statistics;
import org.json.simple.JSONObject;
import eu.trentorise.opendata.jackan.model.CkanDataset;
@ -23,6 +24,13 @@ import eu.trentorise.opendata.jackan.model.CkanResource;
* @author Costantino Perciante at ISTI-CNR (costantino.perciante@isti.cnr.it)
*/
public interface DataCatalogue {
/**
* Returns the statistics for this catalogue
* @return
* @throws Exception
*/
Statistics getStatistics() throws Exception;
/**
* Retrieve the API_KEY given the username (only if it is active).

View File

@ -28,6 +28,11 @@ import net.htmlparser.jericho.Renderer;
import net.htmlparser.jericho.Segment;
import net.htmlparser.jericho.Source;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.FacetField.Count;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.gcube.datacatalogue.ckanutillibrary.server.utils.UtilMethods;
import org.gcube.datacatalogue.ckanutillibrary.server.utils.url.EntityContext;
import org.gcube.datacatalogue.ckanutillibrary.shared.CKanUserWrapper;
@ -36,6 +41,7 @@ import org.gcube.datacatalogue.ckanutillibrary.shared.DatasetRelationships;
import org.gcube.datacatalogue.ckanutillibrary.shared.ResourceBean;
import org.gcube.datacatalogue.ckanutillibrary.shared.RolesCkanGroupOrOrg;
import org.gcube.datacatalogue.ckanutillibrary.shared.State;
import org.gcube.datacatalogue.ckanutillibrary.shared.Statistics;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.JSONValue;
@ -83,6 +89,7 @@ public class DataCatalogueImpl implements DataCatalogue{
private String CKAN_DB_URL;
private Integer CKAN_DB_PORT;
private String PORTLET_URL_FOR_SCOPE;
private String SOLR_URL;
private String CKAN_TOKEN_SYS;
private String URI_RESOLVER_URL;
private boolean MANAGE_PRODUCT_BUTTON;
@ -133,6 +140,7 @@ public class DataCatalogueImpl implements DataCatalogue{
MANAGE_PRODUCT_BUTTON = runningInstance.isManageProductEnabled();
URI_RESOLVER_URL = runningInstance.getUrlResolver();
ALERT_USERS_ON_POST_CREATION = runningInstance.isAlertEnabled();
SOLR_URL = runningInstance.getUrlSolr();
logger.debug("Plain sys admin token first 3 chars are " + CKAN_TOKEN_SYS.substring(0, 3));
logger.debug("Plain db password first 3 chars are " + CKAN_DB_PASSWORD.substring(0, 3));
@ -2858,22 +2866,22 @@ public class DataCatalogueImpl implements DataCatalogue{
}
}
@Override
public List<CkanDataset> searchForPackage(String apiKey, String query, int start, int offset) throws Exception{
checkNotNull(apiKey);
checkNotNull(query);
checkArgument(start >= 0);
checkArgument(offset >= 0);
CheckedCkanClient client = new CheckedCkanClient(CKAN_CATALOGUE_URL, apiKey);
try{
CkanQuery queryCkan = CkanQuery.filter().byText(query);
return client.searchDatasets(queryCkan, offset, start).getResults();
}catch(Exception e){
logger.error("Error while executing query", e);
throw e;
@ -2882,23 +2890,83 @@ public class DataCatalogueImpl implements DataCatalogue{
@Override
public List<CkanDataset> searchForPackageInOrganization(String apiKey, String query, int start, int offset, String organization) throws Exception{
checkNotNull(apiKey);
checkNotNull(query);
checkArgument(start >= 0);
checkArgument(offset >= 0);
CheckedCkanClient client = new CheckedCkanClient(CKAN_CATALOGUE_URL, apiKey);
try{
CkanQuery queryCkan = CkanQuery.filter().byOrganizationName(organization.toLowerCase()).byText(query);
return client.searchDatasets(queryCkan, offset, start).getResults();
}catch(Exception e){
logger.error("Error while executing query", e);
throw e;
}
}
@Override
public Statistics getStatistics() throws Exception {
Statistics stats = new Statistics();
int numGroups = getGroups().size();
int numOrganizations = getOrganizationsNames().size();
SOLR_URL = "https://ckan-d-d4s.d4science.org/solr/collection1";
logger.debug("SOLR address is " + SOLR_URL);
HttpSolrServer solr = new HttpSolrServer(SOLR_URL);
// ask solr for the current counter of the dataset -> dataset_type:"dataset" AND state:"active"
SolrQuery queryNumItems = new SolrQuery();
queryNumItems.setRows(0); // do not require data
queryNumItems.setQuery("dataset_type:\"dataset\" AND state:\"active\"");
long numItems;
try{
QueryResponse response = solr.query(queryNumItems);
numItems = response.getResults().getNumFound();
}catch(Exception e){
logger.error("Failed to retrieve the number of items", e);
throw e;
}
// ask solr for types
SolrQuery queryNumTypes = new SolrQuery("dataset_type:\"dataset\" AND state:\"active\"");
queryNumTypes.addFacetField("systemtype");
queryNumItems.setRows(0); // do not require data
queryNumTypes.setFacet(true);
long numTypes = 0;
try{
QueryResponse response = solr.query(queryNumTypes);
Set<String> notEmptyTypes = new HashSet<String>();
List<FacetField> facet = response.getFacetFields();
for (FacetField facetField : facet) {
List<Count> values = facetField.getValues(); // the different types, even the ones with zero datasets associated
for (Count count : values) {
if(count.getCount() > 0)
notEmptyTypes.add(count.getName());
}
}
numTypes = notEmptyTypes.size();
}catch(Exception e){
logger.error("Failed to retrieve the number of types", e);
throw e;
}
// build the urls
stats.setNumGroups(numGroups);
stats.setNumItems(numItems);
stats.setNumOrganizations(numOrganizations);
stats.setNumTypes(numTypes);
stats.setUrlGroups(PORTLET_URL_FOR_SCOPE + "?path=/group/");
stats.setUrlItems(PORTLET_URL_FOR_SCOPE + "?path=/item/");
stats.setUrlOrganizations(PORTLET_URL_FOR_SCOPE + "?path=/organization/");
stats.setUrlTypes(PORTLET_URL_FOR_SCOPE + "?path=/type/");
return stats;
}
}

View File

@ -63,6 +63,7 @@ public class DataCatalogueRunningCluster {
private final static String IS_MASTER_ROOT_KEY_PROPERTY = "IS_ROOT_MASTER"; // true, false.. missing means false as well
private final static String IS_MANAGE_PRODUCT_ENABLED = "IS_MANAGE_PRODUCT_ENABLED"; // true, false.. missing means false as well (for GRSF records)
private final static String ALERT_USERS_ON_POST_CREATION = "ALERT_USERS_ON_POST_CREATION";
private final static String SOLR_INDEX_ADDRESS = "SOLR_INDEX_ADDRESS";
// url of the http uri for this scope
private final static String URL_RESOLVER = "URL_RESOLVER";
@ -78,11 +79,12 @@ public class DataCatalogueRunningCluster {
private String userDB;
private String passwordDB;
private String portletUrl;
private String urlSolr;
private boolean manageProductEnabled;
private String urlResolver;
private boolean alertUsers;
private static Map<String, String> extendRoleInOrganization = new HashMap<String, String>(0);
// generic role key
public static final String CKAN_GENERIC_ROLE = "*";
public static final String ROLE_ORGANIZATION_SEPARATOR = "|";
@ -91,6 +93,8 @@ public class DataCatalogueRunningCluster {
// this token is needed in order to assign roles to user
private String sysAdminToken;
public DataCatalogueRunningCluster(String scope) throws Exception{
if(scope == null || scope.isEmpty())
@ -216,7 +220,7 @@ public class DataCatalogueRunningCluster {
manageProductEnabled = true;
}
// retrieve URL_RESOLVER
// retrieve option for user alert
if(accessPoint.propertyMap().containsKey(ALERT_USERS_ON_POST_CREATION))
if(accessPoint.propertyMap().get(ALERT_USERS_ON_POST_CREATION).value().trim().equalsIgnoreCase("true"))
alertUsers = true;
@ -224,6 +228,11 @@ public class DataCatalogueRunningCluster {
// retrieve URL_RESOLVER
if(accessPoint.propertyMap().containsKey(URL_RESOLVER))
urlResolver = accessPoint.propertyMap().get(URL_RESOLVER).value();
// retrieve url of the solr index for further queries
if(accessPoint.propertyMap().containsKey(SOLR_INDEX_ADDRESS))
urlSolr = accessPoint.propertyMap().get(SOLR_INDEX_ADDRESS).value();
}
}
@ -415,7 +424,7 @@ public class DataCatalogueRunningCluster {
DiscoveryClient<String> client = client();
List<String> appProfile = client.submit(q);
logger.debug("Resource for extending role has size " + appProfile.size());
if (appProfile == null || appProfile.size() == 0)
@ -446,12 +455,12 @@ public class DataCatalogueRunningCluster {
continue;
}else{
String currentValueForKey = extendRoleInOrganization.get(sourceOrg);
if(currentValueForKey == null)
currentValueForKey = "";
else
currentValueForKey += TUPLES_SEPARATOR;
currentValueForKey += destOrg + ROLE_ORGANIZATION_SEPARATOR + role;
extendRoleInOrganization.put(sourceOrg, currentValueForKey);
}
@ -460,10 +469,10 @@ public class DataCatalogueRunningCluster {
}
}
logger.debug("Extended role map in this scope is " + extendRoleInOrganization);
}
/** Retrieve the ckan portlet url
* @return the portletUrl
*/
@ -558,4 +567,14 @@ public class DataCatalogueRunningCluster {
return extendRoleInOrganization;
}
/**
* Get the solr index base url
* @return
*/
public String getUrlSolr() {
return urlSolr;
}
}

View File

@ -0,0 +1,84 @@
package org.gcube.datacatalogue.ckanutillibrary.shared;
import java.io.Serializable;
/**
* This bean offers the following statistics and information:
* <ul>
* <li> number of items of the catalogue plus landing page url to the items page (within the portlet)
* <li> number of organizations of the catalogue plus landing page url to the orgs page (within the portlet)
* <li> number of groups of the catalogue plus landing page url to the groups page (within the portlet)
* <li> number of types of the catalogue plus landing page url to the types page (within the portlet)
* </ul>
* @author Costantino Perciante at ISTI-CNR (costantino.perciante@isti.cnr.it)
*/
public class Statistics implements Serializable{
private static final long serialVersionUID = 2871906712366452266L;
private long numTypes;
private long numOrganizations;
private long numGroups;
private long numItems;
private String urlTypes;
private String urlOrganizations;
private String urlGroups;
private String urlItems;
public String getUrlTypes() {
return urlTypes;
}
public void setUrlTypes(String urlTypes) {
this.urlTypes = urlTypes;
}
public String getUrlOrganizations() {
return urlOrganizations;
}
public void setUrlOrganizations(String urlOrganizations) {
this.urlOrganizations = urlOrganizations;
}
public String getUrlGroups() {
return urlGroups;
}
public void setUrlGroups(String urlGroups) {
this.urlGroups = urlGroups;
}
public String getUrlItems() {
return urlItems;
}
public void setUrlItems(String urlItems) {
this.urlItems = urlItems;
}
public long getNumTypes() {
return numTypes;
}
public void setNumTypes(long numTypes) {
this.numTypes = numTypes;
}
public long getNumOrganizations() {
return numOrganizations;
}
public void setNumOrganizations(long numOrganizations) {
this.numOrganizations = numOrganizations;
}
public long getNumGroups() {
return numGroups;
}
public void setNumGroups(long numGroups) {
this.numGroups = numGroups;
}
public long getNumItems() {
return numItems;
}
public void setNumItems(long numItems) {
this.numItems = numItems;
}
@Override
public String toString() {
return "CatalogueHomeStats [numTypes=" + numTypes
+ ", numOrganizations=" + numOrganizations + ", numGroups="
+ numGroups + ", numItems=" + numItems + ", urlTypes="
+ urlTypes + ", urlOrganizations=" + urlOrganizations
+ ", urlGroups=" + urlGroups + ", urlItems=" + urlItems + "]";
}
}

View File

@ -15,7 +15,6 @@ import org.gcube.datacatalogue.ckanutillibrary.shared.CKanUserWrapper;
import org.gcube.datacatalogue.ckanutillibrary.shared.CkanDatasetRelationship;
import org.gcube.datacatalogue.ckanutillibrary.shared.DatasetRelationships;
import org.gcube.datacatalogue.ckanutillibrary.shared.RolesCkanGroupOrOrg;
import org.junit.Test;
import org.slf4j.LoggerFactory;
import eu.trentorise.opendata.jackan.CheckedCkanClient;
@ -44,13 +43,21 @@ public class TestDataCatalogueLib {
}
//@Test
public void getStatistics() throws Exception{
DataCatalogueImpl utils = factory.getUtilsPerScope(scope);
logger.debug("Statistics " + utils.getStatistics());
}
//@Test
public void getDatasetIdsFromDB() throws Exception{
DataCatalogueImpl utils = factory.getUtilsPerScope(scope);
List<String> ids = utils.getProductsIdsInGroupOrOrg("aquamaps", true, 0, Integer.MAX_VALUE);
logger.debug("Size is " + ids.size());
}
//@Test
public void searchInOrganization() throws Exception{
DataCatalogueImpl utils = factory.getUtilsPerScope(scope);
@ -58,8 +65,8 @@ public class TestDataCatalogueLib {
List<CkanDataset> matches = utils.searchForPackageInOrganization(apiKey, "\"asfis:HMC+eez:AGO;FAO+grsf-org:INT+eez:AGO;RFB+iso3:AGO+isscfg:01.1.1\"", 0, 10, "grsf_admin");
logger.debug("Size is " + matches.size());
}
@Test
//@Test
public void search() throws Exception{
DataCatalogueImpl utils = factory.getUtilsPerScope(scope);
String apiKey = utils.getApiKeyFromUsername(testUser);
@ -493,12 +500,12 @@ public class TestDataCatalogueLib {
long end = System.currentTimeMillis();
logger.debug("Time taken " + (end - init));
}
//@Test
public void getHigher(){
logger.debug("Max is " + RolesCkanGroupOrOrg.getHigher(RolesCkanGroupOrOrg.ADMIN, RolesCkanGroupOrOrg.ADMIN));
}
//@Test
public void getUrlProduct() throws Exception{
DataCatalogueImpl instance = factory.getUtilsPerScope(scope);