diff --git a/src/main/java/org/gcube/dataharvest/AccountingDataHarvesterPlugin.java b/src/main/java/org/gcube/dataharvest/AccountingDataHarvesterPlugin.java index bd6f774..f176a25 100644 --- a/src/main/java/org/gcube/dataharvest/AccountingDataHarvesterPlugin.java +++ b/src/main/java/org/gcube/dataharvest/AccountingDataHarvesterPlugin.java @@ -177,7 +177,7 @@ public class AccountingDataHarvesterPlugin extends Plugin harvested = dataMethodDownloadHarvester.getData(); data.addAll(harvested); } catch(Exception e) { diff --git a/src/main/java/org/gcube/dataharvest/harvester/sobigdata/DataMethodDownloadHarvester.java b/src/main/java/org/gcube/dataharvest/harvester/sobigdata/DataMethodDownloadHarvester.java index fb5993d..fa65688 100644 --- a/src/main/java/org/gcube/dataharvest/harvester/sobigdata/DataMethodDownloadHarvester.java +++ b/src/main/java/org/gcube/dataharvest/harvester/sobigdata/DataMethodDownloadHarvester.java @@ -34,39 +34,72 @@ public class DataMethodDownloadHarvester extends SoBigDataHarvester { private int count = 0; - public DataMethodDownloadHarvester(Date start, Date end, SortedSet contexts) throws ParseException { - super(start, end, contexts); + public DataMethodDownloadHarvester(Date start, Date end, String catalogueContext, SortedSet contexts) throws ParseException { + super(start, end, catalogueContext, contexts); } @Override public List getData() throws Exception { - String context = Utils.getCurrentContext(); + String defaultContext = Utils.getCurrentContext(); + logger.debug("The context is: "+defaultContext); try { - ArrayList data = new ArrayList(); - count = 0; + String vreName = getVRENameToHL(defaultContext); + logger.debug("Getting VRE Name to HL from context/scope returns: "+vreName); + String user = vreName + "-Manager"; + logger.debug("Using user '"+user+"' to getHome from HL"); + + //ISTANCING HL AND GETTING HOME FOR VRE MANAGER HomeManager manager = HomeLibrary.getHomeManagerFactory().getHomeManager(); - - String user = getVREName(context) + "-Manager"; - @SuppressWarnings("deprecation") Home home = manager.getHome(user); - JCRWorkspace ws = (JCRWorkspace) home.getWorkspace(); +// + String path = "/Workspace/MySpecialFolders/" + vreName; + logger.debug("Getting item by Path: "+path); + JCRWorkspaceItem item = (JCRWorkspaceItem) ws.getItemByPath(path); +// + logger.info("Analyzing " + defaultContext + " in the period [" + startDate.toString() + " to " + endDate.toString() +"] starting from root: "+item.getName()); - JCRWorkspaceItem item = (JCRWorkspaceItem) ws - .getItemByPath("/Workspace/MySpecialFolders/" + getVREName(context)); + HarvestedData defaultHarvesteData = new HarvestedData(HarvestedDataKey.DATA_METHOD_DOWNLOAD, defaultContext, count); - logger.debug("Analyzing " + context + " from " + startDate.toString() + " to " + endDate.toString()); + List data = new ArrayList(); + for (WorkspaceItem children: item.getChildren()) { + count = 0; //resettings the counter + HarvestedData harvestedData; + //Getting statistics for folder + if(children.isFolder()){ + logger.info("Getting statistics for folder: "+children.getName()); + getStats(children, startDate, endDate); + + String normalizedName = children.getName().replaceAll("[^A-Za-z0-9]",""); + String scope = mapWsFolderNameToVRE.get(normalizedName); + //Checking if it is a VRE name to right accouning... + if(scope!=null && !scope.isEmpty()){ + logger.info("Found scope '" + scope + "' matching with normalized VRE name: "+normalizedName); + harvestedData = new HarvestedData(HarvestedDataKey.DATA_METHOD_DOWNLOAD, scope, count); + data.add(harvestedData); + logger.info("Added data: "+harvestedData); + }else{ + logger.info("No scope found matching the folder name: "+normalizedName +", accounting its stats in the default context: "+defaultContext); + //INCREASING THE DEFAULT CONTEXT COUNTER... + defaultHarvesteData.setMeasure(defaultHarvesteData.getMeasure()+count); + logger.info("Increased default context stats: "+defaultHarvesteData); + } + + } + } + + //ADDING DEFAULT ACCOUNTING + data.add(defaultHarvesteData); + + logger.info("In the period [from "+startDate+" to "+endDate+ "] returning workspace accouting data:"); + for (HarvestedData harvestedData : data) { + logger.info(harvestedData.toString()); + } - logger.error("Before getStats()"); - getStats(item, startDate, endDate); - logger.error("After getStats()"); - HarvestedData harvest = new HarvestedData(HarvestedDataKey.DATA_METHOD_DOWNLOAD, context, count); - data.add(harvest); - logger.debug(harvest.toString()); return data; } catch(Exception e) { @@ -75,16 +108,26 @@ public class DataMethodDownloadHarvester extends SoBigDataHarvester { } - private void getStats(WorkspaceItem root, Date start, Date end) throws InternalErrorException { + + /** + * Gets the stats. + * + * @param baseItem the base item + * @param start the start + * @param end the end + * @return the stats + * @throws InternalErrorException the internal error exception + */ + private void getStats(WorkspaceItem baseItem, Date start, Date end) throws InternalErrorException { List children; - if(root.isFolder()) { - children = root.getChildren(); + if(baseItem.isFolder()) { + children = baseItem.getChildren(); for(WorkspaceItem child : children) getStats(child, start, end); } else { try { - List accounting = root.getAccounting(); + List accounting = baseItem.getAccounting(); for(AccountingEntry entry : accounting) { switch(entry.getEntryType()) { @@ -92,8 +135,7 @@ public class DataMethodDownloadHarvester extends SoBigDataHarvester { case UPDATE: case READ: Calendar calendar = entry.getDate(); - if(calendar.after(DateUtils.dateToCalendar(start)) - && calendar.before(DateUtils.dateToCalendar(end))) { + if(calendar.after(DateUtils.dateToCalendar(start)) && calendar.before(DateUtils.dateToCalendar(end))) { count++; } @@ -111,7 +153,14 @@ public class DataMethodDownloadHarvester extends SoBigDataHarvester { } } - private static String getVREName(String vre) { + + /** + * Gets the VRE name to HL. + * + * @param vre the vre + * @return the VRE name to HL + */ + private static String getVRENameToHL(String vre) { Validate.notNull(vre, "scope must be not null"); String newName; diff --git a/src/main/java/org/gcube/dataharvest/harvester/sobigdata/ResourceCatalogueHarvester.java b/src/main/java/org/gcube/dataharvest/harvester/sobigdata/ResourceCatalogueHarvester.java index 479b3cb..3fafdc9 100644 --- a/src/main/java/org/gcube/dataharvest/harvester/sobigdata/ResourceCatalogueHarvester.java +++ b/src/main/java/org/gcube/dataharvest/harvester/sobigdata/ResourceCatalogueHarvester.java @@ -6,14 +6,8 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Properties; -import java.util.Set; import java.util.SortedSet; -import org.apache.commons.lang.Validate; -import org.gcube.datacatalogue.ckanutillibrary.server.DataCatalogueFactory; -import org.gcube.datacatalogue.ckanutillibrary.server.DataCatalogueImpl; -import org.gcube.dataharvest.AccountingDataHarvesterPlugin; import org.gcube.dataharvest.datamodel.HarvestedData; import org.gcube.dataharvest.datamodel.HarvestedDataKey; import org.gcube.dataharvest.utils.DateUtils; @@ -25,8 +19,6 @@ import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.trentorise.opendata.jackan.model.CkanGroup; - /** * The Class ResourceCatalogueHarvester. * @@ -36,11 +28,6 @@ import eu.trentorise.opendata.jackan.model.CkanGroup; */ public class ResourceCatalogueHarvester extends SoBigDataHarvester { - /** - * - */ - private static final String GROUP_LABEL = "group"; - private static final String AND = " AND "; public static int ROWS = 500; @@ -49,76 +36,22 @@ public class ResourceCatalogueHarvester extends SoBigDataHarvester { protected String solrBaseUrl; - private DataCatalogueFactory factory; - - private HashMap mapTypeToDBEntry; - - private HashMap mapCatalogueGroupToVRE; - - private String catalogueContext; - /** * Instantiates a new resource catalogue harvester. * * @param start the start * @param end the end * @param catalogueContext the catalogue context - * @param contexts the contexts + * @param contexts the contexts. They are the VREs * @throws Exception the exception */ public ResourceCatalogueHarvester(Date start, Date end, String catalogueContext, SortedSet contexts) throws Exception { - super(start, end, contexts); - factory = DataCatalogueFactory.getFactory(); - this.catalogueContext = catalogueContext; + super(start, end, catalogueContext, contexts); if(catalogueContext==null || catalogueContext.isEmpty()) throw new Exception("The catalogue context is null or empty. Pass a valid scope"); logger.debug("Catalogue context is: "+catalogueContext); - - Properties properties = AccountingDataHarvesterPlugin.getProperties().get(); - Set keys = properties.stringPropertyNames(); - - mapTypeToDBEntry = new HashMap(); - for (String key : keys) { - //System.out.println(key + " : " + properties.getProperty(key)); - try{ - HarvestedDataKey valueEnum = HarvestedDataKey.valueOf(key); - mapTypeToDBEntry.put(properties.getProperty(key), valueEnum.name()); - }catch(Exception e){ - //silent - } - } - - logger.info("Built from properties the mapping 'SystemType' to 'DB entry' : "+mapTypeToDBEntry); - - //GET CATALOGUE'S GROUPS - List groups = getGroups(catalogueContext); - //NORMALIZING THE GROUP NAME TO MATCH WITH VRE NAME - Map mapNormalizedGroups = normalizeGroups(groups); - logger.debug("Map of Normalized Groups is: "+mapNormalizedGroups); - - //CREATING MAPPING BETWEEN (CATALOGUE GROUP NAME TO VRE NAME) - mapCatalogueGroupToVRE = new HashMap(); - Set normalizedGroups = mapNormalizedGroups.keySet(); - for (String context : contexts) { - //logger.trace("Context is: " + context); - String loweredVREName =context.substring(context.lastIndexOf("/") + 1, context.length()).toLowerCase(); - //logger.trace("vreName lowered is: " + loweredVREName); - try { - if (normalizedGroups.contains(loweredVREName)) { - logger.debug("Normalized Groups matching the lowered VRE name: "+loweredVREName); - // Creating the map with couple (catalogue group name, - // scope) - mapCatalogueGroupToVRE.put(mapNormalizedGroups.get(loweredVREName), context); - } - } - catch (Exception e) { - // silent - } - } - - logger.info("Map of Catalogue Groups To VRE is: "+mapCatalogueGroupToVRE); } /** @@ -126,6 +59,8 @@ public class ResourceCatalogueHarvester extends SoBigDataHarvester { * * @return the solr base url */ + + //TODO @LUCA FROSINI protected String getSolrBaseUrl() { return "https://ckan-solr-d4s.d4science.org/solr/sobigdata"; } @@ -136,17 +71,16 @@ public class ResourceCatalogueHarvester extends SoBigDataHarvester { @Override public List getData() throws Exception { - List data = new ArrayList(); //FOR EACH SYSTEM_TYPE - for (String systemType : mapTypeToDBEntry.keySet()) { + for (String systemType : mapSystemTypeToDBEntry.keySet()) { List solrParameters = new ArrayList(1); solrParameters.add("extras_systemtype:\""+systemType+"\""); //EXECUTING THE QUERY IN THE PERIOD String queryResult = executeQueryFor(solrParameters, startDate, endDate, "groups"); - HarvestedDataKey insertDBKey = HarvestedDataKey.valueOf(mapTypeToDBEntry.get(systemType)); + HarvestedDataKey insertDBKey = HarvestedDataKey.valueOf(mapSystemTypeToDBEntry.get(systemType)); logger.info("Creating statistics for type: "+systemType+ " using db key "+insertDBKey); data.addAll(buildListOfHarvestedData(queryResult, insertDBKey)); } @@ -175,14 +109,13 @@ public class ResourceCatalogueHarvester extends SoBigDataHarvester { } JSONObject response = jsonObject.getJSONObject("response"); - int numFound = response.getInt("numFound"); - Map counter = new HashMap(mapCatalogueGroupToVRE.size()+1); for (String groupName : mapCatalogueGroupToVRE.keySet()) { counter.put(groupName, 0); } + //Counter for default context of accounting int catalogueContextCount = 0; logger.debug("For "+harvestKey+" has found "+numFound+" doc/s"); @@ -236,60 +169,6 @@ public class ResourceCatalogueHarvester extends SoBigDataHarvester { } - - /** - * Gets the groups. - * - * @param scope the scope - * @return the groups - */ - private List getGroups(String scope){ - List groups = new ArrayList(); - String ckanURL = ""; - try { - DataCatalogueImpl utils = factory.getUtilsPerScope(scope); - ckanURL = utils.getCatalogueUrl(); - List theGroups = utils.getGroups(); - Validate.notNull(theGroups, "The list of Groups is null"); - for (CkanGroup ckanGroup : theGroups) { - groups.add(ckanGroup.getName()); - } - } - catch (Exception e) { - logger.error("Error occurred on getting CKAN groups for scope: "+scope+" and CKAN URL: "+ckanURL,e); - } - - return groups; - } - - -// /** -// * Execute query. -// * -// * @param fqSubString the fq sub string -// * @return the string -// * @throws Exception the exception -// */ -// //TODO THIS METHOD MUST BE OPTIMIZED USING HttpSolrClient -// //We are not considering the rows (the number of documents returned from Solr by default) -// private String executeQuery(String fqSubString) throws Exception { -// -// String query = getSolrBaseUrl().endsWith("/")? getSolrBaseUrl():getSolrBaseUrl()+"/"; -// query+="select?"; -// -// String q = UrlEncoderUtil.encodeQuery("metadata_created:[" + DateUtils.dateToStringWithTZ(startDate) + " TO " -// + DateUtils.dateToStringWithTZ(endDate) + "]"); -// query += "q=" + q; -// String fq = UrlEncoderUtil.encodeQuery("extras_systemtype:\"SoBigData.eu: " + fqSubString + "\""); -// query += "&fq=" + fq + "&wt=json&indent=true&rows="+ROWS; -// logger.debug("Performing query: "+query); -// -// String json = Utils.getJson(query); -// logger.trace("Response is: "+json); -// -// return json; -// } - /** * Execute query. * @@ -333,7 +212,6 @@ public class ResourceCatalogueHarvester extends SoBigDataHarvester { return jsonResult; } - /** * Gets the catalogue context. * @@ -344,7 +222,6 @@ public class ResourceCatalogueHarvester extends SoBigDataHarvester { return catalogueContext; } - /** * Sets the catalogue context. * @@ -355,51 +232,4 @@ public class ResourceCatalogueHarvester extends SoBigDataHarvester { this.catalogueContext = catalogueContext; } - - /** - * Normalize groups. - * - * @param groups the groups - * @return the map with couple (normalized group name, group name) - */ - private Map normalizeGroups(List groups) { - Map listNGroups = new HashMap(groups.size()); - for (String group : groups) { - String normalizedGroup = group; - if(normalizedGroup.endsWith(GROUP_LABEL)){ - normalizedGroup = normalizedGroup.substring(0, normalizedGroup.length()-GROUP_LABEL.length()); - } - normalizedGroup = normalizedGroup.replaceAll("-",""); - listNGroups.put(normalizedGroup.toLowerCase(), group); - } - return listNGroups; - } - - - -// /** -// * Gets the data francesco. -// * -// * @return the data francesco -// * @throws Exception the exception -// */ -// public List getDataFrancesco() throws Exception { -// -// List data = new ArrayList(); -// -// //FOR EACH SYSTEM_TYPE -// for (String systemType : mapTypeToDBEntry.keySet()) { -// -// List solrParameters = new ArrayList(1); -// solrParameters.add("extras_systemtype:\""+systemType+"\""); -// //EXECUTING THE QUERY IN THE PERIOD -// String queryResult = executeQueryFor(solrParameters, startDate, endDate, "groups"); -// HarvestedDataKey insertDBKey = HarvestedDataKey.valueOf(mapTypeToDBEntry.get(systemType)); -// logger.info("Creating statistics for type: "+systemType+ " using db key "+insertDBKey); -// data.addAll(buildListOfHarvestedData(queryResult, insertDBKey)); -// } -// -// return data; -// } - } diff --git a/src/main/java/org/gcube/dataharvest/harvester/sobigdata/SoBigDataHarvester.java b/src/main/java/org/gcube/dataharvest/harvester/sobigdata/SoBigDataHarvester.java index d7d1190..3be43de 100644 --- a/src/main/java/org/gcube/dataharvest/harvester/sobigdata/SoBigDataHarvester.java +++ b/src/main/java/org/gcube/dataharvest/harvester/sobigdata/SoBigDataHarvester.java @@ -1,148 +1,361 @@ package org.gcube.dataharvest.harvester.sobigdata; import java.text.ParseException; +import java.util.ArrayList; import java.util.Date; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.Properties; import java.util.Set; import java.util.SortedSet; -import java.util.TreeSet; -import org.gcube.common.resources.gcore.GenericResource; +import org.apache.commons.lang.Validate; +import org.gcube.datacatalogue.ckanutillibrary.server.DataCatalogueFactory; +import org.gcube.datacatalogue.ckanutillibrary.server.DataCatalogueImpl; +import org.gcube.dataharvest.AccountingDataHarvesterPlugin; +import org.gcube.dataharvest.datamodel.HarvestedDataKey; import org.gcube.dataharvest.harvester.BasicHarvester; -import org.gcube.resources.discovery.client.queries.api.SimpleQuery; -import org.gcube.resources.discovery.icclient.ICFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.w3c.dom.Element; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; +import eu.trentorise.opendata.jackan.model.CkanGroup; + + +/** + * The Class SoBigDataHarvester. + * + * @author Luca Frosini (ISTI-CNR) + * @author Francesco Mangiacrapa at ISTI-CNR (francesco.mangiacrapa@isti.cnr.it) + * May 24, 2018 + */ public abstract class SoBigDataHarvester extends BasicHarvester { - + private static Logger logger = LoggerFactory.getLogger(SoBigDataHarvester.class); - + + + //Added by Francesco + private static final String GROUP_LABEL = "group"; + + //Added by Francesco + protected HashMap mapSystemTypeToDBEntry; + + //Added by Francesco + protected HashMap mapCatalogueGroupToVRE; + + //Added by Francesco + protected HashMap mapWsFolderNameToVRE; + + //Added by Francesco + protected String catalogueContext; + + //Added by Francesco + private DataCatalogueFactory catalogueFactory; + + +// public static String SECONDARY_TYPE_FORMAT = "$resource/Profile/SecondaryType/text() eq '%1s'"; +// public static String NAME_FORMAT = "$resource/Profile/Name/text() eq '%1s'"; +// +// public static String SECONDARY_TYPE = "ExcludingVREs"; +// public static String NAME = "AccountingHarvesters"; + + + public static final String SO_BIG_DATA_CONTEXT = "/d4science.research-infrastructures.eu/SoBigData"; - - protected SortedSet excludedContexts; + protected SortedSet contexts; - - public SoBigDataHarvester(Date start, Date end, SortedSet contexts) throws ParseException { + + /** + * Instantiates a new so big data harvester. + * + * @param start the start + * @param end the end + * @param catalogueContext the catalogue context + * @param vreScopes the contexts + * @throws ParseException the parse exception + */ + public SoBigDataHarvester(Date start, Date end, String catalogueContext, SortedSet vreScopes) throws ParseException { super(start, end); - this.excludedContexts = getExcludedContexts(); - // Adding trailing slash to SO_BIG_DATA_CONTEXT to avoid to get VO - this.contexts = getSoBigDataContexts(contexts, SO_BIG_DATA_CONTEXT + "/"); - logger.trace("Valid contexts are {}", contexts); + this.catalogueContext = catalogueContext; + this.catalogueFactory = DataCatalogueFactory.getFactory(); + this.contexts = vreScopes; + initMappingMaps(); + + +// this.excludedContexts = getExcludedContexts(); +// // Adding trailing slash to SO_BIG_DATA_CONTEXT to avoid to get VO +// this.contexts = getSoBigDataContexts(contexts, SO_BIG_DATA_CONTEXT + "/"); +// logger.trace("Valid contexts are {}", contexts); } - - public SortedSet getFilteredContexts() { - return contexts; - } - - public static String SECONDARY_TYPE_FORMAT = "$resource/Profile/SecondaryType/text() eq '%1s'"; - public static String NAME_FORMAT = "$resource/Profile/Name/text() eq '%1s'"; - - public static String SECONDARY_TYPE = "ExcludingVREs"; - public static String NAME = "AccountingHarvesters"; - - protected SimpleQuery getFilteringGenericResource() { - return ICFactory.queryFor(GenericResource.class) - .addCondition(String.format(SECONDARY_TYPE_FORMAT, SECONDARY_TYPE)) - .addCondition(String.format(NAME_FORMAT, NAME)); - } - - protected GenericResource getGenericResource() { - SimpleQuery simpleQuery = getFilteringGenericResource(); - List res = ICFactory.clientFor(GenericResource.class).submit(simpleQuery); - if(res.size()==0) { - // At time of writing it should be an error but it can change in the future - logger.info("No {} for filtering contexts.", GenericResource.class.getSimpleName()); - return null; - } - return res.get(0); - } - - public SortedSet getExcludedContexts() { - SortedSet excludedContexts = new TreeSet<>(); - - GenericResource genericResource = getGenericResource(); - if(genericResource==null) { - return excludedContexts; - } - - Element body = genericResource.profile().body(); - - /* - * The following code parse an XML formatted as this - * - * - * - * /d4science.research-infrastructures.eu/SoBigData/ResourceCatalogue - * /d4science.research-infrastructures.eu/SoBigData/TagMe - * - * - * - */ - - NodeList nodeList = body.getElementsByTagName(this.getClass().getSimpleName()); - if(nodeList.getLength()==0) { - // At time of writing it should be an error but it can change in the future - logger.info("The body of the {} does not contains any information to filter contexts.", GenericResource.class.getSimpleName()); - } - - Element classNameElement = null; - for(int c=0; c keys = properties.stringPropertyNames(); + + mapSystemTypeToDBEntry = new HashMap(); + for (String key : keys) { + //System.out.println(key + " : " + properties.getProperty(key)); + try{ + HarvestedDataKey valueEnum = HarvestedDataKey.valueOf(key); + mapSystemTypeToDBEntry.put(properties.getProperty(key), valueEnum.name()); + }catch(Exception e){ + //silent + } + } + + logger.info("Built from properties the mapping 'SystemType' to 'DB entry' : "+mapSystemTypeToDBEntry); + + //GET CATALOGUE'S GROUPS + List groups = loadGroupsFromCKAN(catalogueContext); + //NORMALIZING THE GROUP NAME TO MATCH WITH VRE NAME + Map mapNormalizedGroups = normalizeGroups(groups); + logger.debug("Map of Normalized Groups is: "+mapNormalizedGroups); + + //CREATING MAPPING BETWEEN (CATALOGUE GROUP NAME TO VRE NAME) + mapCatalogueGroupToVRE = new HashMap(); + //CREATING MAPPING BETWEEN (WS FOLDER NAME TO VRE NAME) + mapWsFolderNameToVRE = new HashMap(); + Set normalizedGroups = mapNormalizedGroups.keySet(); + for (String context : contexts) { + //logger.trace("Context is: " + context); + String loweredVREName = context.substring(context.lastIndexOf("/") + 1, context.length()).toLowerCase(); + try { + //logger.trace("vreName lowered is: " + loweredVREName); + if (normalizedGroups.contains(loweredVREName)) { + logger.debug("Normalized Groups matching the lowered VRE name: "+loweredVREName); + // Creating the map with couple (catalogue group name, scope) + mapCatalogueGroupToVRE.put(mapNormalizedGroups.get(loweredVREName), context); } - + + mapWsFolderNameToVRE.put(loweredVREName, context); + } + catch (Exception e) { + // silent } } - - return excludedContexts; - + + logger.info("Map of Catalogue Groups To VRE is: "+mapCatalogueGroupToVRE); + logger.info("Map of (lowered) Ws Folder Name To VRE is: "+mapWsFolderNameToVRE); + } - - protected boolean filterContext(String context) { - if(excludedContexts.contains(context)) { - return true; + + /** + * Normalize groups. + * + * @author Francesco Mangiacrapa + * @param groups the groups + * @return the map with couples (normalized group name, group name) + */ + private Map normalizeGroups(List groups) { + Map listNGroups = new HashMap(groups.size()); + for (String group : groups) { + String normalizedGroup = group; + if(normalizedGroup.endsWith(GROUP_LABEL)){ + normalizedGroup = normalizedGroup.substring(0, normalizedGroup.length()-GROUP_LABEL.length()); + } + normalizedGroup = normalizedGroup.replaceAll("-",""); + listNGroups.put(normalizedGroup.toLowerCase(), group); } - return false; + return listNGroups; } - - protected SortedSet getSoBigDataContexts(Set contexts, String base) { - SortedSet filteredContext = new TreeSet<>(); - for(String context : contexts) { - if(context.startsWith(SO_BIG_DATA_CONTEXT)) { - if(!filterContext(context)) { - filteredContext.add(context); - } + + + /** + * Load groups from ckan. + * + * @param scope the scope + * @return the list + */ + private List loadGroupsFromCKAN(String scope){ + List groups = new ArrayList(); + String ckanURL = ""; + try { + DataCatalogueImpl utils = catalogueFactory.getUtilsPerScope(scope); + ckanURL = utils.getCatalogueUrl(); + List theGroups = utils.getGroups(); + Validate.notNull(theGroups, "The list of Groups is null"); + for (CkanGroup ckanGroup : theGroups) { + groups.add(ckanGroup.getName()); } } - return filteredContext; + catch (Exception e) { + logger.error("Error occurred on getting CKAN groups for scope: "+scope+" and CKAN URL: "+ckanURL,e); + } + + return groups; } - + + /** + * Gets the map catalogue group to vre. + * + * @return the map catalogue group to vre + */ + public HashMap getMapCatalogueGroupToVRE() { + + return mapCatalogueGroupToVRE; + } + + + /** + * @return the mapSystemTypeToDBEntry + */ + public HashMap getMapSystemTypeToDBEntry() { + + return mapSystemTypeToDBEntry; + } + + + /** + * @return the mapWsFolderNameToVRE + */ + public HashMap getMapWsFolderNameToVRE() { + + return mapWsFolderNameToVRE; + } + + + +// /** +// * Gets the filtered contexts. +// * +// * @return the filtered contexts +// */ +// public SortedSet getFilteredContexts() { +// return contexts; +// } + +// /** +// * Gets the filtering generic resource. +// * +// * @return the filtering generic resource +// */ +// protected SimpleQuery getFilteringGenericResource() { +// return ICFactory.queryFor(GenericResource.class) +// .addCondition(String.format(SECONDARY_TYPE_FORMAT, SECONDARY_TYPE)) +// .addCondition(String.format(NAME_FORMAT, NAME)); +// } + +// /** +// * Gets the generic resource. +// * +// * @return the generic resource +// */ +// protected GenericResource getGenericResource() { +// SimpleQuery simpleQuery = getFilteringGenericResource(); +// List res = ICFactory.clientFor(GenericResource.class).submit(simpleQuery); +// if(res.size()==0) { +// // At time of writing it should be an error but it can change in the future +// logger.info("No {} for filtering contexts.", GenericResource.class.getSimpleName()); +// return null; +// } +// return res.get(0); +// } + +// /** +// * Gets the excluded contexts. +// * +// * @return the excluded contexts +// */ +// public SortedSet getExcludedContexts() { +// SortedSet excludedContexts = new TreeSet<>(); +// +// GenericResource genericResource = getGenericResource(); +// if(genericResource==null) { +// return excludedContexts; +// } +// +// Element body = genericResource.profile().body(); +// +// /* +// * The following code parse an XML formatted as this +// * +// * +// * +// * /d4science.research-infrastructures.eu/SoBigData/ResourceCatalogue +// * /d4science.research-infrastructures.eu/SoBigData/TagMe +// * +// * +// * +// */ +// +// NodeList nodeList = body.getElementsByTagName(this.getClass().getSimpleName()); +// if(nodeList.getLength()==0) { +// // At time of writing it should be an error but it can change in the future +// logger.info("The body of the {} does not contains any information to filter contexts.", GenericResource.class.getSimpleName()); +// } +// +// Element classNameElement = null; +// for(int c=0; c getSoBigDataContexts(Set contexts, String base) { +// SortedSet filteredContext = new TreeSet<>(); +// for(String context : contexts) { +// if(context.startsWith(SO_BIG_DATA_CONTEXT)) { +// if(!filterContext(context)) { +// filteredContext.add(context); +// } +// } +// } +// return filteredContext; +// } + + + } diff --git a/src/test/java/org/gcube/dataharvest/AccountingDataHarvesterPluginTest.java b/src/test/java/org/gcube/dataharvest/AccountingDataHarvesterPluginTest.java index 02310cc..a30a87b 100644 --- a/src/test/java/org/gcube/dataharvest/AccountingDataHarvesterPluginTest.java +++ b/src/test/java/org/gcube/dataharvest/AccountingDataHarvesterPluginTest.java @@ -12,6 +12,7 @@ import java.util.TreeSet; import org.gcube.common.scope.impl.ScopeBean; import org.gcube.dataharvest.datamodel.HarvestedData; import org.gcube.dataharvest.harvester.MethodInvocationHarvester; +import org.gcube.dataharvest.harvester.sobigdata.DataMethodDownloadHarvester; import org.gcube.dataharvest.harvester.sobigdata.ResourceCatalogueHarvester; import org.gcube.dataharvest.harvester.sobigdata.TagMeMethodInvocationHarvester; import org.gcube.dataharvest.utils.ContextTest; @@ -127,11 +128,11 @@ public class AccountingDataHarvesterPluginTest extends ContextTest { //end ResourceCatalogueHarvester resourceCatalogueHarvester = new ResourceCatalogueHarvester(start, end, catalogueContext, contexts); - SortedSet excludedContexts = resourceCatalogueHarvester.getExcludedContexts(); - logger.info("Excluded contexts {}", excludedContexts); - SortedSet validContexts = resourceCatalogueHarvester.getFilteredContexts(); - - logger.info("Valid Contexts {}", validContexts); +// SortedSet excludedContexts = resourceCatalogueHarvester.getExcludedContexts(); +// logger.info("Excluded contexts {}", excludedContexts); +// SortedSet validContexts = resourceCatalogueHarvester.getFilteredContexts(); +// +// logger.info("Valid Contexts {}", validContexts); } catch(Exception e) { logger.error("", e); @@ -139,47 +140,18 @@ public class AccountingDataHarvesterPluginTest extends ContextTest { } - @Test + //@Test public void testResourceCatalogueHarvester() { try { -// Properties properties = AccountingDataHarvesterPlugin.getProperties().get(); -// //Enumeration enums = (Enumeration) properties.propertyNames(); -// //System.out.println("enums: " +enums.hasMoreElements()); -// Set keys = properties.stringPropertyNames(); -// -// Map typeToDB = new HashMap(); -// for (String key : keys) { -// System.out.println(key + " : " + properties.getProperty(key)); -// -// try{ -// HarvestedDataKey valueEnum = HarvestedDataKey.valueOf(key); -// typeToDB.put(properties.getProperty(key), valueEnum.name()); -// }catch(Exception e){ -// //silent -// } -// } -// -// System.out.println(typeToDB); - org.gcube.dataharvest.utils.Utils.setContext(RESOURCE_CATALOGUE); - MeasureType measureType = MeasureType.MONTHLY; // Date start = DateUtils.getPreviousPeriod(measureType).getTime(); // Date end = DateUtils.getEndDateFromStartDate(measureType, start, 1); - - //Date start = DateUtils.getPreviousPeriod(measureType).getTime(); - Date start = DateUtils.getStartCalendar(2016, 12, 01).getTime(); - Date end = DateUtils.getEndDateFromStartDate(measureType, start, 18); - - LinkedHashMap map = ContextManager.readContexts(); - SortedSet contexts = new TreeSet<>(map.keySet()); - - for (String context : contexts) { - System.out.println("context: "+context); - } - +// Date start = DateUtils.getStartCalendar(2016, 12, 01).getTime(); + Date start = DateUtils.getPreviousPeriod(measureType).getTime(); + Date end = DateUtils.getEndDateFromStartDate(measureType, start, 1); AccountingDataHarvesterPlugin accountingDataHarvesterPlugin = new AccountingDataHarvesterPlugin(null); accountingDataHarvesterPlugin.getConfigParameters(); @@ -190,7 +162,8 @@ public class AccountingDataHarvesterPluginTest extends ContextTest { logger.debug("Read from properties "+AccountingDataHarvesterPlugin.RESOURCE_CATALOGUE_CONTEXT+" value: "+catalogueContext); //end - contexts = new TreeSet(); + //TODO @LUCA FROSINI MUST PASS SoBigData VREs + TreeSet contexts = new TreeSet(); contexts.add("/d4science.research-infrastructures.eu/SoBigData/ResourceCatalogue"); contexts.add("/d4science.research-infrastructures.eu/SoBigData/TagMe"); contexts.add("/d4science.research-infrastructures.eu/SoBigData/WellBeingAndEconomy"); @@ -198,6 +171,7 @@ public class AccountingDataHarvesterPluginTest extends ContextTest { contexts.add("/d4science.research-infrastructures.eu/SoBigData/SocietalDebates"); contexts.add("/d4science.research-infrastructures.eu/SoBigData/SportsDataScience"); contexts.add("/d4science.research-infrastructures.eu/SoBigData/SMAPH"); + ResourceCatalogueHarvester resourceCatalogueHarvester = new ResourceCatalogueHarvester(start, end, catalogueContext, contexts); List data = resourceCatalogueHarvester.getData(); @@ -209,4 +183,49 @@ public class AccountingDataHarvesterPluginTest extends ContextTest { logger.error("", e); } } + + + @Test + public void testDataMethodDownloadHarvester() { + try { + + org.gcube.dataharvest.utils.Utils.setContext(RESOURCE_CATALOGUE); + + MeasureType measureType = MeasureType.MONTHLY; + + Date start = DateUtils.getStartCalendar(2016, 12, 01).getTime(); + // Date start = DateUtils.getPreviousPeriod(measureType).getTime(); +// Date end = DateUtils.getEndDateFromStartDate(measureType, start, 1); + Date end = DateUtils.getEndDateFromStartDate(measureType, start, 18); + + AccountingDataHarvesterPlugin accountingDataHarvesterPlugin = new AccountingDataHarvesterPlugin(null); + accountingDataHarvesterPlugin.getConfigParameters(); + + //Added by Francesco + Properties properties = AccountingDataHarvesterPlugin.getProperties().get(); + String catalogueContext = (String) properties.get(AccountingDataHarvesterPlugin.RESOURCE_CATALOGUE_CONTEXT); + logger.debug("Read from properties "+AccountingDataHarvesterPlugin.RESOURCE_CATALOGUE_CONTEXT+" value: "+catalogueContext); + //end + + //TODO @LUCA FROSINI MUST PASS SoBigData VREs + TreeSet contexts = new TreeSet(); + contexts.add("/d4science.research-infrastructures.eu/SoBigData/ResourceCatalogue"); + contexts.add("/d4science.research-infrastructures.eu/SoBigData/TagMe"); + contexts.add("/d4science.research-infrastructures.eu/SoBigData/WellBeingAndEconomy"); + contexts.add("/d4science.research-infrastructures.eu/SoBigData/CityOfCitizens"); + contexts.add("/d4science.research-infrastructures.eu/SoBigData/SocietalDebates"); + contexts.add("/d4science.research-infrastructures.eu/SoBigData/SportsDataScience"); + contexts.add("/d4science.research-infrastructures.eu/SoBigData/SMAPH"); + + DataMethodDownloadHarvester resourceCatalogueHarvester = new DataMethodDownloadHarvester(start, end, catalogueContext, contexts); + List data = resourceCatalogueHarvester.getData(); + +// for (HarvestedData harvestedData : data) { +// System.out.println(harvestedData.toString()); +// } + + } catch(Exception e) { + logger.error("", e); + } + } }