Fixed DataMethodDownloadHarvester

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/accounting/accounting-dashboard-harvester-se-plugin@167721 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Francesco Mangiacrapa 2018-05-24 15:10:45 +00:00
parent 09a8f2d2dc
commit cb8e1a62a5
5 changed files with 473 additions and 362 deletions

View File

@ -177,7 +177,7 @@ public class AccountingDataHarvesterPlugin extends Plugin<DataHarvestPluginDecla
try { try {
// Collecting info on Data/Method download // Collecting info on Data/Method download
DataMethodDownloadHarvester dataMethodDownloadHarvester = new DataMethodDownloadHarvester(start, end,contexts); DataMethodDownloadHarvester dataMethodDownloadHarvester = new DataMethodDownloadHarvester(start, end, catalogueContext, contexts);
List<HarvestedData> harvested = dataMethodDownloadHarvester.getData(); List<HarvestedData> harvested = dataMethodDownloadHarvester.getData();
data.addAll(harvested); data.addAll(harvested);
} catch(Exception e) { } catch(Exception e) {

View File

@ -34,39 +34,72 @@ public class DataMethodDownloadHarvester extends SoBigDataHarvester {
private int count = 0; private int count = 0;
public DataMethodDownloadHarvester(Date start, Date end, SortedSet<String> contexts) throws ParseException { public DataMethodDownloadHarvester(Date start, Date end, String catalogueContext, SortedSet<String> contexts) throws ParseException {
super(start, end, contexts); super(start, end, catalogueContext, contexts);
} }
@Override @Override
public List<HarvestedData> getData() throws Exception { public List<HarvestedData> getData() throws Exception {
String context = Utils.getCurrentContext(); String defaultContext = Utils.getCurrentContext();
logger.debug("The context is: "+defaultContext);
try { try {
ArrayList<HarvestedData> data = new ArrayList<HarvestedData>();
count = 0; String vreName = getVRENameToHL(defaultContext);
logger.debug("Getting VRE Name to HL from context/scope returns: "+vreName);
String user = vreName + "-Manager";
logger.debug("Using user '"+user+"' to getHome from HL");
//ISTANCING HL AND GETTING HOME FOR VRE MANAGER
HomeManager manager = HomeLibrary.getHomeManagerFactory().getHomeManager(); HomeManager manager = HomeLibrary.getHomeManagerFactory().getHomeManager();
String user = getVREName(context) + "-Manager";
@SuppressWarnings("deprecation") @SuppressWarnings("deprecation")
Home home = manager.getHome(user); Home home = manager.getHome(user);
JCRWorkspace ws = (JCRWorkspace) home.getWorkspace(); JCRWorkspace ws = (JCRWorkspace) home.getWorkspace();
//
String path = "/Workspace/MySpecialFolders/" + vreName;
logger.debug("Getting item by Path: "+path);
JCRWorkspaceItem item = (JCRWorkspaceItem) ws.getItemByPath(path);
//
logger.info("Analyzing " + defaultContext + " in the period [" + startDate.toString() + " to " + endDate.toString() +"] starting from root: "+item.getName());
JCRWorkspaceItem item = (JCRWorkspaceItem) ws HarvestedData defaultHarvesteData = new HarvestedData(HarvestedDataKey.DATA_METHOD_DOWNLOAD, defaultContext, count);
.getItemByPath("/Workspace/MySpecialFolders/" + getVREName(context));
logger.debug("Analyzing " + context + " from " + startDate.toString() + " to " + endDate.toString()); List<HarvestedData> data = new ArrayList<HarvestedData>();
for (WorkspaceItem children: item.getChildren()) {
count = 0; //resettings the counter
HarvestedData harvestedData;
//Getting statistics for folder
if(children.isFolder()){
logger.info("Getting statistics for folder: "+children.getName());
getStats(children, startDate, endDate);
String normalizedName = children.getName().replaceAll("[^A-Za-z0-9]","");
String scope = mapWsFolderNameToVRE.get(normalizedName);
//Checking if it is a VRE name to right accouning...
if(scope!=null && !scope.isEmpty()){
logger.info("Found scope '" + scope + "' matching with normalized VRE name: "+normalizedName);
harvestedData = new HarvestedData(HarvestedDataKey.DATA_METHOD_DOWNLOAD, scope, count);
data.add(harvestedData);
logger.info("Added data: "+harvestedData);
}else{
logger.info("No scope found matching the folder name: "+normalizedName +", accounting its stats in the default context: "+defaultContext);
//INCREASING THE DEFAULT CONTEXT COUNTER...
defaultHarvesteData.setMeasure(defaultHarvesteData.getMeasure()+count);
logger.info("Increased default context stats: "+defaultHarvesteData);
}
}
}
//ADDING DEFAULT ACCOUNTING
data.add(defaultHarvesteData);
logger.info("In the period [from "+startDate+" to "+endDate+ "] returning workspace accouting data:");
for (HarvestedData harvestedData : data) {
logger.info(harvestedData.toString());
}
logger.error("Before getStats()");
getStats(item, startDate, endDate);
logger.error("After getStats()");
HarvestedData harvest = new HarvestedData(HarvestedDataKey.DATA_METHOD_DOWNLOAD, context, count);
data.add(harvest);
logger.debug(harvest.toString());
return data; return data;
} catch(Exception e) { } catch(Exception e) {
@ -75,16 +108,26 @@ public class DataMethodDownloadHarvester extends SoBigDataHarvester {
} }
private void getStats(WorkspaceItem root, Date start, Date end) throws InternalErrorException {
/**
* Gets the stats.
*
* @param baseItem the base item
* @param start the start
* @param end the end
* @return the stats
* @throws InternalErrorException the internal error exception
*/
private void getStats(WorkspaceItem baseItem, Date start, Date end) throws InternalErrorException {
List<? extends WorkspaceItem> children; List<? extends WorkspaceItem> children;
if(root.isFolder()) { if(baseItem.isFolder()) {
children = root.getChildren(); children = baseItem.getChildren();
for(WorkspaceItem child : children) for(WorkspaceItem child : children)
getStats(child, start, end); getStats(child, start, end);
} else { } else {
try { try {
List<AccountingEntry> accounting = root.getAccounting(); List<AccountingEntry> accounting = baseItem.getAccounting();
for(AccountingEntry entry : accounting) { for(AccountingEntry entry : accounting) {
switch(entry.getEntryType()) { switch(entry.getEntryType()) {
@ -92,8 +135,7 @@ public class DataMethodDownloadHarvester extends SoBigDataHarvester {
case UPDATE: case UPDATE:
case READ: case READ:
Calendar calendar = entry.getDate(); Calendar calendar = entry.getDate();
if(calendar.after(DateUtils.dateToCalendar(start)) if(calendar.after(DateUtils.dateToCalendar(start)) && calendar.before(DateUtils.dateToCalendar(end))) {
&& calendar.before(DateUtils.dateToCalendar(end))) {
count++; count++;
} }
@ -111,7 +153,14 @@ public class DataMethodDownloadHarvester extends SoBigDataHarvester {
} }
} }
private static String getVREName(String vre) {
/**
* Gets the VRE name to HL.
*
* @param vre the vre
* @return the VRE name to HL
*/
private static String getVRENameToHL(String vre) {
Validate.notNull(vre, "scope must be not null"); Validate.notNull(vre, "scope must be not null");
String newName; String newName;

View File

@ -6,14 +6,8 @@ import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.SortedSet; import java.util.SortedSet;
import org.apache.commons.lang.Validate;
import org.gcube.datacatalogue.ckanutillibrary.server.DataCatalogueFactory;
import org.gcube.datacatalogue.ckanutillibrary.server.DataCatalogueImpl;
import org.gcube.dataharvest.AccountingDataHarvesterPlugin;
import org.gcube.dataharvest.datamodel.HarvestedData; import org.gcube.dataharvest.datamodel.HarvestedData;
import org.gcube.dataharvest.datamodel.HarvestedDataKey; import org.gcube.dataharvest.datamodel.HarvestedDataKey;
import org.gcube.dataharvest.utils.DateUtils; import org.gcube.dataharvest.utils.DateUtils;
@ -25,8 +19,6 @@ import org.json.JSONObject;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.trentorise.opendata.jackan.model.CkanGroup;
/** /**
* The Class ResourceCatalogueHarvester. * The Class ResourceCatalogueHarvester.
* *
@ -36,11 +28,6 @@ import eu.trentorise.opendata.jackan.model.CkanGroup;
*/ */
public class ResourceCatalogueHarvester extends SoBigDataHarvester { public class ResourceCatalogueHarvester extends SoBigDataHarvester {
/**
*
*/
private static final String GROUP_LABEL = "group";
private static final String AND = " AND "; private static final String AND = " AND ";
public static int ROWS = 500; public static int ROWS = 500;
@ -49,76 +36,22 @@ public class ResourceCatalogueHarvester extends SoBigDataHarvester {
protected String solrBaseUrl; protected String solrBaseUrl;
private DataCatalogueFactory factory;
private HashMap<String, String> mapTypeToDBEntry;
private HashMap<String, String> mapCatalogueGroupToVRE;
private String catalogueContext;
/** /**
* Instantiates a new resource catalogue harvester. * Instantiates a new resource catalogue harvester.
* *
* @param start the start * @param start the start
* @param end the end * @param end the end
* @param catalogueContext the catalogue context * @param catalogueContext the catalogue context
* @param contexts the contexts * @param contexts the contexts. They are the VREs
* @throws Exception the exception * @throws Exception the exception
*/ */
public ResourceCatalogueHarvester(Date start, Date end, String catalogueContext, SortedSet<String> contexts) throws Exception { public ResourceCatalogueHarvester(Date start, Date end, String catalogueContext, SortedSet<String> contexts) throws Exception {
super(start, end, contexts); super(start, end, catalogueContext, contexts);
factory = DataCatalogueFactory.getFactory();
this.catalogueContext = catalogueContext;
if(catalogueContext==null || catalogueContext.isEmpty()) if(catalogueContext==null || catalogueContext.isEmpty())
throw new Exception("The catalogue context is null or empty. Pass a valid scope"); throw new Exception("The catalogue context is null or empty. Pass a valid scope");
logger.debug("Catalogue context is: "+catalogueContext); logger.debug("Catalogue context is: "+catalogueContext);
Properties properties = AccountingDataHarvesterPlugin.getProperties().get();
Set<String> keys = properties.stringPropertyNames();
mapTypeToDBEntry = new HashMap<String, String>();
for (String key : keys) {
//System.out.println(key + " : " + properties.getProperty(key));
try{
HarvestedDataKey valueEnum = HarvestedDataKey.valueOf(key);
mapTypeToDBEntry.put(properties.getProperty(key), valueEnum.name());
}catch(Exception e){
//silent
}
}
logger.info("Built from properties the mapping 'SystemType' to 'DB entry' : "+mapTypeToDBEntry);
//GET CATALOGUE'S GROUPS
List<String> groups = getGroups(catalogueContext);
//NORMALIZING THE GROUP NAME TO MATCH WITH VRE NAME
Map<String,String> mapNormalizedGroups = normalizeGroups(groups);
logger.debug("Map of Normalized Groups is: "+mapNormalizedGroups);
//CREATING MAPPING BETWEEN (CATALOGUE GROUP NAME TO VRE NAME)
mapCatalogueGroupToVRE = new HashMap<String, String>();
Set<String> normalizedGroups = mapNormalizedGroups.keySet();
for (String context : contexts) {
//logger.trace("Context is: " + context);
String loweredVREName =context.substring(context.lastIndexOf("/") + 1, context.length()).toLowerCase();
//logger.trace("vreName lowered is: " + loweredVREName);
try {
if (normalizedGroups.contains(loweredVREName)) {
logger.debug("Normalized Groups matching the lowered VRE name: "+loweredVREName);
// Creating the map with couple (catalogue group name,
// scope)
mapCatalogueGroupToVRE.put(mapNormalizedGroups.get(loweredVREName), context);
}
}
catch (Exception e) {
// silent
}
}
logger.info("Map of Catalogue Groups To VRE is: "+mapCatalogueGroupToVRE);
} }
/** /**
@ -126,6 +59,8 @@ public class ResourceCatalogueHarvester extends SoBigDataHarvester {
* *
* @return the solr base url * @return the solr base url
*/ */
//TODO @LUCA FROSINI
protected String getSolrBaseUrl() { protected String getSolrBaseUrl() {
return "https://ckan-solr-d4s.d4science.org/solr/sobigdata"; return "https://ckan-solr-d4s.d4science.org/solr/sobigdata";
} }
@ -136,17 +71,16 @@ public class ResourceCatalogueHarvester extends SoBigDataHarvester {
@Override @Override
public List<HarvestedData> getData() throws Exception { public List<HarvestedData> getData() throws Exception {
List<HarvestedData> data = new ArrayList<HarvestedData>(); List<HarvestedData> data = new ArrayList<HarvestedData>();
//FOR EACH SYSTEM_TYPE //FOR EACH SYSTEM_TYPE
for (String systemType : mapTypeToDBEntry.keySet()) { for (String systemType : mapSystemTypeToDBEntry.keySet()) {
List<String> solrParameters = new ArrayList<String>(1); List<String> solrParameters = new ArrayList<String>(1);
solrParameters.add("extras_systemtype:\""+systemType+"\""); solrParameters.add("extras_systemtype:\""+systemType+"\"");
//EXECUTING THE QUERY IN THE PERIOD //EXECUTING THE QUERY IN THE PERIOD
String queryResult = executeQueryFor(solrParameters, startDate, endDate, "groups"); String queryResult = executeQueryFor(solrParameters, startDate, endDate, "groups");
HarvestedDataKey insertDBKey = HarvestedDataKey.valueOf(mapTypeToDBEntry.get(systemType)); HarvestedDataKey insertDBKey = HarvestedDataKey.valueOf(mapSystemTypeToDBEntry.get(systemType));
logger.info("Creating statistics for type: "+systemType+ " using db key "+insertDBKey); logger.info("Creating statistics for type: "+systemType+ " using db key "+insertDBKey);
data.addAll(buildListOfHarvestedData(queryResult, insertDBKey)); data.addAll(buildListOfHarvestedData(queryResult, insertDBKey));
} }
@ -175,14 +109,13 @@ public class ResourceCatalogueHarvester extends SoBigDataHarvester {
} }
JSONObject response = jsonObject.getJSONObject("response"); JSONObject response = jsonObject.getJSONObject("response");
int numFound = response.getInt("numFound"); int numFound = response.getInt("numFound");
Map<String, Integer> counter = new HashMap<String, Integer>(mapCatalogueGroupToVRE.size()+1); Map<String, Integer> counter = new HashMap<String, Integer>(mapCatalogueGroupToVRE.size()+1);
for (String groupName : mapCatalogueGroupToVRE.keySet()) { for (String groupName : mapCatalogueGroupToVRE.keySet()) {
counter.put(groupName, 0); counter.put(groupName, 0);
} }
//Counter for default context of accounting //Counter for default context of accounting
int catalogueContextCount = 0; int catalogueContextCount = 0;
logger.debug("For "+harvestKey+" has found "+numFound+" doc/s"); logger.debug("For "+harvestKey+" has found "+numFound+" doc/s");
@ -236,60 +169,6 @@ public class ResourceCatalogueHarvester extends SoBigDataHarvester {
} }
/**
* Gets the groups.
*
* @param scope the scope
* @return the groups
*/
private List<String> getGroups(String scope){
List<String> groups = new ArrayList<String>();
String ckanURL = "";
try {
DataCatalogueImpl utils = factory.getUtilsPerScope(scope);
ckanURL = utils.getCatalogueUrl();
List<CkanGroup> theGroups = utils.getGroups();
Validate.notNull(theGroups, "The list of Groups is null");
for (CkanGroup ckanGroup : theGroups) {
groups.add(ckanGroup.getName());
}
}
catch (Exception e) {
logger.error("Error occurred on getting CKAN groups for scope: "+scope+" and CKAN URL: "+ckanURL,e);
}
return groups;
}
// /**
// * Execute query.
// *
// * @param fqSubString the fq sub string
// * @return the string
// * @throws Exception the exception
// */
// //TODO THIS METHOD MUST BE OPTIMIZED USING HttpSolrClient
// //We are not considering the rows (the number of documents returned from Solr by default)
// private String executeQuery(String fqSubString) throws Exception {
//
// String query = getSolrBaseUrl().endsWith("/")? getSolrBaseUrl():getSolrBaseUrl()+"/";
// query+="select?";
//
// String q = UrlEncoderUtil.encodeQuery("metadata_created:[" + DateUtils.dateToStringWithTZ(startDate) + " TO "
// + DateUtils.dateToStringWithTZ(endDate) + "]");
// query += "q=" + q;
// String fq = UrlEncoderUtil.encodeQuery("extras_systemtype:\"SoBigData.eu: " + fqSubString + "\"");
// query += "&fq=" + fq + "&wt=json&indent=true&rows="+ROWS;
// logger.debug("Performing query: "+query);
//
// String json = Utils.getJson(query);
// logger.trace("Response is: "+json);
//
// return json;
// }
/** /**
* Execute query. * Execute query.
* *
@ -333,7 +212,6 @@ public class ResourceCatalogueHarvester extends SoBigDataHarvester {
return jsonResult; return jsonResult;
} }
/** /**
* Gets the catalogue context. * Gets the catalogue context.
* *
@ -344,7 +222,6 @@ public class ResourceCatalogueHarvester extends SoBigDataHarvester {
return catalogueContext; return catalogueContext;
} }
/** /**
* Sets the catalogue context. * Sets the catalogue context.
* *
@ -355,51 +232,4 @@ public class ResourceCatalogueHarvester extends SoBigDataHarvester {
this.catalogueContext = catalogueContext; this.catalogueContext = catalogueContext;
} }
/**
* Normalize groups.
*
* @param groups the groups
* @return the map with couple (normalized group name, group name)
*/
private Map<String,String> normalizeGroups(List<String> groups) {
Map<String,String> listNGroups = new HashMap<String,String>(groups.size());
for (String group : groups) {
String normalizedGroup = group;
if(normalizedGroup.endsWith(GROUP_LABEL)){
normalizedGroup = normalizedGroup.substring(0, normalizedGroup.length()-GROUP_LABEL.length());
}
normalizedGroup = normalizedGroup.replaceAll("-","");
listNGroups.put(normalizedGroup.toLowerCase(), group);
}
return listNGroups;
}
// /**
// * Gets the data francesco.
// *
// * @return the data francesco
// * @throws Exception the exception
// */
// public List<HarvestedData> getDataFrancesco() throws Exception {
//
// List<HarvestedData> data = new ArrayList<HarvestedData>();
//
// //FOR EACH SYSTEM_TYPE
// for (String systemType : mapTypeToDBEntry.keySet()) {
//
// List<String> solrParameters = new ArrayList<String>(1);
// solrParameters.add("extras_systemtype:\""+systemType+"\"");
// //EXECUTING THE QUERY IN THE PERIOD
// String queryResult = executeQueryFor(solrParameters, startDate, endDate, "groups");
// HarvestedDataKey insertDBKey = HarvestedDataKey.valueOf(mapTypeToDBEntry.get(systemType));
// logger.info("Creating statistics for type: "+systemType+ " using db key "+insertDBKey);
// data.addAll(buildListOfHarvestedData(queryResult, insertDBKey));
// }
//
// return data;
// }
} }

View File

@ -1,148 +1,361 @@
package org.gcube.dataharvest.harvester.sobigdata; package org.gcube.dataharvest.harvester.sobigdata;
import java.text.ParseException; import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set; import java.util.Set;
import java.util.SortedSet; import java.util.SortedSet;
import java.util.TreeSet;
import org.gcube.common.resources.gcore.GenericResource; import org.apache.commons.lang.Validate;
import org.gcube.datacatalogue.ckanutillibrary.server.DataCatalogueFactory;
import org.gcube.datacatalogue.ckanutillibrary.server.DataCatalogueImpl;
import org.gcube.dataharvest.AccountingDataHarvesterPlugin;
import org.gcube.dataharvest.datamodel.HarvestedDataKey;
import org.gcube.dataharvest.harvester.BasicHarvester; import org.gcube.dataharvest.harvester.BasicHarvester;
import org.gcube.resources.discovery.client.queries.api.SimpleQuery;
import org.gcube.resources.discovery.icclient.ICFactory;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import eu.trentorise.opendata.jackan.model.CkanGroup;
/**
* The Class SoBigDataHarvester.
*
* @author Luca Frosini (ISTI-CNR)
* @author Francesco Mangiacrapa at ISTI-CNR (francesco.mangiacrapa@isti.cnr.it)
* May 24, 2018
*/
public abstract class SoBigDataHarvester extends BasicHarvester { public abstract class SoBigDataHarvester extends BasicHarvester {
private static Logger logger = LoggerFactory.getLogger(SoBigDataHarvester.class); private static Logger logger = LoggerFactory.getLogger(SoBigDataHarvester.class);
//Added by Francesco
private static final String GROUP_LABEL = "group";
//Added by Francesco
protected HashMap<String, String> mapSystemTypeToDBEntry;
//Added by Francesco
protected HashMap<String, String> mapCatalogueGroupToVRE;
//Added by Francesco
protected HashMap<String, String> mapWsFolderNameToVRE;
//Added by Francesco
protected String catalogueContext;
//Added by Francesco
private DataCatalogueFactory catalogueFactory;
// public static String SECONDARY_TYPE_FORMAT = "$resource/Profile/SecondaryType/text() eq '%1s'";
// public static String NAME_FORMAT = "$resource/Profile/Name/text() eq '%1s'";
//
// public static String SECONDARY_TYPE = "ExcludingVREs";
// public static String NAME = "AccountingHarvesters";
public static final String SO_BIG_DATA_CONTEXT = "/d4science.research-infrastructures.eu/SoBigData"; public static final String SO_BIG_DATA_CONTEXT = "/d4science.research-infrastructures.eu/SoBigData";
protected SortedSet<String> excludedContexts;
protected SortedSet<String> contexts; protected SortedSet<String> contexts;
public SoBigDataHarvester(Date start, Date end, SortedSet<String> contexts) throws ParseException { /**
* Instantiates a new so big data harvester.
*
* @param start the start
* @param end the end
* @param catalogueContext the catalogue context
* @param vreScopes the contexts
* @throws ParseException the parse exception
*/
public SoBigDataHarvester(Date start, Date end, String catalogueContext, SortedSet<String> vreScopes) throws ParseException {
super(start, end); super(start, end);
this.excludedContexts = getExcludedContexts(); this.catalogueContext = catalogueContext;
// Adding trailing slash to SO_BIG_DATA_CONTEXT to avoid to get VO this.catalogueFactory = DataCatalogueFactory.getFactory();
this.contexts = getSoBigDataContexts(contexts, SO_BIG_DATA_CONTEXT + "/"); this.contexts = vreScopes;
logger.trace("Valid contexts are {}", contexts); initMappingMaps();
// this.excludedContexts = getExcludedContexts();
// // Adding trailing slash to SO_BIG_DATA_CONTEXT to avoid to get VO
// this.contexts = getSoBigDataContexts(contexts, SO_BIG_DATA_CONTEXT + "/");
// logger.trace("Valid contexts are {}", contexts);
} }
public SortedSet<String> getFilteredContexts() { /**
return contexts; * Inits the mapping maps.
} */
protected void initMappingMaps(){
Properties properties = AccountingDataHarvesterPlugin.getProperties().get();
Set<String> keys = properties.stringPropertyNames();
public static String SECONDARY_TYPE_FORMAT = "$resource/Profile/SecondaryType/text() eq '%1s'"; mapSystemTypeToDBEntry = new HashMap<String, String>();
public static String NAME_FORMAT = "$resource/Profile/Name/text() eq '%1s'"; for (String key : keys) {
//System.out.println(key + " : " + properties.getProperty(key));
try{
HarvestedDataKey valueEnum = HarvestedDataKey.valueOf(key);
mapSystemTypeToDBEntry.put(properties.getProperty(key), valueEnum.name());
}catch(Exception e){
//silent
}
}
public static String SECONDARY_TYPE = "ExcludingVREs"; logger.info("Built from properties the mapping 'SystemType' to 'DB entry' : "+mapSystemTypeToDBEntry);
public static String NAME = "AccountingHarvesters";
protected SimpleQuery getFilteringGenericResource() { //GET CATALOGUE'S GROUPS
return ICFactory.queryFor(GenericResource.class) List<String> groups = loadGroupsFromCKAN(catalogueContext);
.addCondition(String.format(SECONDARY_TYPE_FORMAT, SECONDARY_TYPE)) //NORMALIZING THE GROUP NAME TO MATCH WITH VRE NAME
.addCondition(String.format(NAME_FORMAT, NAME)); Map<String,String> mapNormalizedGroups = normalizeGroups(groups);
} logger.debug("Map of Normalized Groups is: "+mapNormalizedGroups);
protected GenericResource getGenericResource() { //CREATING MAPPING BETWEEN (CATALOGUE GROUP NAME TO VRE NAME)
SimpleQuery simpleQuery = getFilteringGenericResource(); mapCatalogueGroupToVRE = new HashMap<String, String>();
List<GenericResource> res = ICFactory.clientFor(GenericResource.class).submit(simpleQuery); //CREATING MAPPING BETWEEN (WS FOLDER NAME TO VRE NAME)
if(res.size()==0) { mapWsFolderNameToVRE = new HashMap<String, String>();
// At time of writing it should be an error but it can change in the future Set<String> normalizedGroups = mapNormalizedGroups.keySet();
logger.info("No {} for filtering contexts.", GenericResource.class.getSimpleName()); for (String context : contexts) {
return null; //logger.trace("Context is: " + context);
} String loweredVREName = context.substring(context.lastIndexOf("/") + 1, context.length()).toLowerCase();
return res.get(0); try {
} //logger.trace("vreName lowered is: " + loweredVREName);
if (normalizedGroups.contains(loweredVREName)) {
public SortedSet<String> getExcludedContexts() { logger.debug("Normalized Groups matching the lowered VRE name: "+loweredVREName);
SortedSet<String> excludedContexts = new TreeSet<>(); // Creating the map with couple (catalogue group name, scope)
mapCatalogueGroupToVRE.put(mapNormalizedGroups.get(loweredVREName), context);
GenericResource genericResource = getGenericResource();
if(genericResource==null) {
return excludedContexts;
}
Element body = genericResource.profile().body();
/*
* The following code parse an XML formatted as this
*
* <ResourceCatalogueHarvester>
* <vres>
* <vre>/d4science.research-infrastructures.eu/SoBigData/ResourceCatalogue</vre>
* <vre>/d4science.research-infrastructures.eu/SoBigData/TagMe</vre>
* </vres>
* </ResourceCatalogueHarvester>
*
*/
NodeList nodeList = body.getElementsByTagName(this.getClass().getSimpleName());
if(nodeList.getLength()==0) {
// At time of writing it should be an error but it can change in the future
logger.info("The body of the {} does not contains any information to filter contexts.", GenericResource.class.getSimpleName());
}
Element classNameElement = null;
for(int c=0; c<nodeList.getLength(); c++) {
Node node = nodeList.item(c);
if(node.getNodeType() == Node.ELEMENT_NODE) {
classNameElement = (Element) node;
}
}
Element vresElement = null;
NodeList vresNodeList = classNameElement.getElementsByTagName("vres");
for(int c=0; c<vresNodeList.getLength(); c++) {
Node node = vresNodeList.item(c);
if(node.getNodeType() == Node.ELEMENT_NODE) {
vresElement = (Element) node;
}
}
NodeList vreNodeList = vresElement.getElementsByTagName("vre");
for(int c=0; c<vreNodeList.getLength(); c++) {
Node node = vreNodeList.item(c);
if(node.getNodeType() == Node.ELEMENT_NODE) {
Element vreElement = (Element) node;
NodeList nl = vreElement.getChildNodes();
for(int i=0; i<nl.getLength(); i++) {
Node n = nl.item(i);
if(n.getNodeType() == Node.TEXT_NODE) {
excludedContexts.add(n.getNodeValue());
}
} }
mapWsFolderNameToVRE.put(loweredVREName, context);
}
catch (Exception e) {
// silent
} }
} }
return excludedContexts; logger.info("Map of Catalogue Groups To VRE is: "+mapCatalogueGroupToVRE);
logger.info("Map of (lowered) Ws Folder Name To VRE is: "+mapWsFolderNameToVRE);
} }
protected boolean filterContext(String context) { /**
if(excludedContexts.contains(context)) { * Normalize groups.
return true; *
* @author Francesco Mangiacrapa
* @param groups the groups
* @return the map with couples (normalized group name, group name)
*/
private Map<String,String> normalizeGroups(List<String> groups) {
Map<String,String> listNGroups = new HashMap<String,String>(groups.size());
for (String group : groups) {
String normalizedGroup = group;
if(normalizedGroup.endsWith(GROUP_LABEL)){
normalizedGroup = normalizedGroup.substring(0, normalizedGroup.length()-GROUP_LABEL.length());
}
normalizedGroup = normalizedGroup.replaceAll("-","");
listNGroups.put(normalizedGroup.toLowerCase(), group);
} }
return false; return listNGroups;
} }
protected SortedSet<String> getSoBigDataContexts(Set<String> contexts, String base) {
SortedSet<String> filteredContext = new TreeSet<>(); /**
for(String context : contexts) { * Load groups from ckan.
if(context.startsWith(SO_BIG_DATA_CONTEXT)) { *
if(!filterContext(context)) { * @param scope the scope
filteredContext.add(context); * @return the list
} */
private List<String> loadGroupsFromCKAN(String scope){
List<String> groups = new ArrayList<String>();
String ckanURL = "";
try {
DataCatalogueImpl utils = catalogueFactory.getUtilsPerScope(scope);
ckanURL = utils.getCatalogueUrl();
List<CkanGroup> theGroups = utils.getGroups();
Validate.notNull(theGroups, "The list of Groups is null");
for (CkanGroup ckanGroup : theGroups) {
groups.add(ckanGroup.getName());
} }
} }
return filteredContext; catch (Exception e) {
logger.error("Error occurred on getting CKAN groups for scope: "+scope+" and CKAN URL: "+ckanURL,e);
}
return groups;
} }
/**
* Gets the map catalogue group to vre.
*
* @return the map catalogue group to vre
*/
public HashMap<String, String> getMapCatalogueGroupToVRE() {
return mapCatalogueGroupToVRE;
}
/**
* @return the mapSystemTypeToDBEntry
*/
public HashMap<String, String> getMapSystemTypeToDBEntry() {
return mapSystemTypeToDBEntry;
}
/**
* @return the mapWsFolderNameToVRE
*/
public HashMap<String, String> getMapWsFolderNameToVRE() {
return mapWsFolderNameToVRE;
}
// /**
// * Gets the filtered contexts.
// *
// * @return the filtered contexts
// */
// public SortedSet<String> getFilteredContexts() {
// return contexts;
// }
// /**
// * Gets the filtering generic resource.
// *
// * @return the filtering generic resource
// */
// protected SimpleQuery getFilteringGenericResource() {
// return ICFactory.queryFor(GenericResource.class)
// .addCondition(String.format(SECONDARY_TYPE_FORMAT, SECONDARY_TYPE))
// .addCondition(String.format(NAME_FORMAT, NAME));
// }
// /**
// * Gets the generic resource.
// *
// * @return the generic resource
// */
// protected GenericResource getGenericResource() {
// SimpleQuery simpleQuery = getFilteringGenericResource();
// List<GenericResource> res = ICFactory.clientFor(GenericResource.class).submit(simpleQuery);
// if(res.size()==0) {
// // At time of writing it should be an error but it can change in the future
// logger.info("No {} for filtering contexts.", GenericResource.class.getSimpleName());
// return null;
// }
// return res.get(0);
// }
// /**
// * Gets the excluded contexts.
// *
// * @return the excluded contexts
// */
// public SortedSet<String> getExcludedContexts() {
// SortedSet<String> excludedContexts = new TreeSet<>();
//
// GenericResource genericResource = getGenericResource();
// if(genericResource==null) {
// return excludedContexts;
// }
//
// Element body = genericResource.profile().body();
//
// /*
// * The following code parse an XML formatted as this
// *
// * <ResourceCatalogueHarvester>
// * <vres>
// * <vre>/d4science.research-infrastructures.eu/SoBigData/ResourceCatalogue</vre>
// * <vre>/d4science.research-infrastructures.eu/SoBigData/TagMe</vre>
// * </vres>
// * </ResourceCatalogueHarvester>
// *
// */
//
// NodeList nodeList = body.getElementsByTagName(this.getClass().getSimpleName());
// if(nodeList.getLength()==0) {
// // At time of writing it should be an error but it can change in the future
// logger.info("The body of the {} does not contains any information to filter contexts.", GenericResource.class.getSimpleName());
// }
//
// Element classNameElement = null;
// for(int c=0; c<nodeList.getLength(); c++) {
// Node node = nodeList.item(c);
// if(node.getNodeType() == Node.ELEMENT_NODE) {
// classNameElement = (Element) node;
// }
// }
//
// Element vresElement = null;
// NodeList vresNodeList = classNameElement.getElementsByTagName("vres");
// for(int c=0; c<vresNodeList.getLength(); c++) {
// Node node = vresNodeList.item(c);
// if(node.getNodeType() == Node.ELEMENT_NODE) {
// vresElement = (Element) node;
// }
// }
//
// NodeList vreNodeList = vresElement.getElementsByTagName("vre");
// for(int c=0; c<vreNodeList.getLength(); c++) {
// Node node = vreNodeList.item(c);
// if(node.getNodeType() == Node.ELEMENT_NODE) {
// Element vreElement = (Element) node;
// NodeList nl = vreElement.getChildNodes();
// for(int i=0; i<nl.getLength(); i++) {
// Node n = nl.item(i);
// if(n.getNodeType() == Node.TEXT_NODE) {
// excludedContexts.add(n.getNodeValue());
// }
// }
//
// }
// }
//
// return excludedContexts;
//
// }
//
// /**
// * Filter context.
// *
// * @param context the context
// * @return true, if successful
// */
// protected boolean filterContext(String context) {
// if(excludedContexts.contains(context)) {
// return true;
// }
// return false;
// }
// /**
// * Gets the so big data contexts.
// *
// * @param contexts the contexts
// * @param base the base
// * @return the so big data contexts
// */
// protected SortedSet<String> getSoBigDataContexts(Set<String> contexts, String base) {
// SortedSet<String> filteredContext = new TreeSet<>();
// for(String context : contexts) {
// if(context.startsWith(SO_BIG_DATA_CONTEXT)) {
// if(!filterContext(context)) {
// filteredContext.add(context);
// }
// }
// }
// return filteredContext;
// }
} }

View File

@ -12,6 +12,7 @@ import java.util.TreeSet;
import org.gcube.common.scope.impl.ScopeBean; import org.gcube.common.scope.impl.ScopeBean;
import org.gcube.dataharvest.datamodel.HarvestedData; import org.gcube.dataharvest.datamodel.HarvestedData;
import org.gcube.dataharvest.harvester.MethodInvocationHarvester; import org.gcube.dataharvest.harvester.MethodInvocationHarvester;
import org.gcube.dataharvest.harvester.sobigdata.DataMethodDownloadHarvester;
import org.gcube.dataharvest.harvester.sobigdata.ResourceCatalogueHarvester; import org.gcube.dataharvest.harvester.sobigdata.ResourceCatalogueHarvester;
import org.gcube.dataharvest.harvester.sobigdata.TagMeMethodInvocationHarvester; import org.gcube.dataharvest.harvester.sobigdata.TagMeMethodInvocationHarvester;
import org.gcube.dataharvest.utils.ContextTest; import org.gcube.dataharvest.utils.ContextTest;
@ -127,11 +128,11 @@ public class AccountingDataHarvesterPluginTest extends ContextTest {
//end //end
ResourceCatalogueHarvester resourceCatalogueHarvester = new ResourceCatalogueHarvester(start, end, catalogueContext, contexts); ResourceCatalogueHarvester resourceCatalogueHarvester = new ResourceCatalogueHarvester(start, end, catalogueContext, contexts);
SortedSet<String> excludedContexts = resourceCatalogueHarvester.getExcludedContexts(); // SortedSet<String> excludedContexts = resourceCatalogueHarvester.getExcludedContexts();
logger.info("Excluded contexts {}", excludedContexts); // logger.info("Excluded contexts {}", excludedContexts);
SortedSet<String> validContexts = resourceCatalogueHarvester.getFilteredContexts(); // SortedSet<String> validContexts = resourceCatalogueHarvester.getFilteredContexts();
//
logger.info("Valid Contexts {}", validContexts); // logger.info("Valid Contexts {}", validContexts);
} catch(Exception e) { } catch(Exception e) {
logger.error("", e); logger.error("", e);
@ -139,47 +140,18 @@ public class AccountingDataHarvesterPluginTest extends ContextTest {
} }
@Test //@Test
public void testResourceCatalogueHarvester() { public void testResourceCatalogueHarvester() {
try { try {
// Properties properties = AccountingDataHarvesterPlugin.getProperties().get();
// //Enumeration<String> enums = (Enumeration<String>) properties.propertyNames();
// //System.out.println("enums: " +enums.hasMoreElements());
// Set<String> keys = properties.stringPropertyNames();
//
// Map<String, String> typeToDB = new HashMap<String, String>();
// for (String key : keys) {
// System.out.println(key + " : " + properties.getProperty(key));
//
// try{
// HarvestedDataKey valueEnum = HarvestedDataKey.valueOf(key);
// typeToDB.put(properties.getProperty(key), valueEnum.name());
// }catch(Exception e){
// //silent
// }
// }
//
// System.out.println(typeToDB);
org.gcube.dataharvest.utils.Utils.setContext(RESOURCE_CATALOGUE); org.gcube.dataharvest.utils.Utils.setContext(RESOURCE_CATALOGUE);
MeasureType measureType = MeasureType.MONTHLY; MeasureType measureType = MeasureType.MONTHLY;
// Date start = DateUtils.getPreviousPeriod(measureType).getTime(); // Date start = DateUtils.getPreviousPeriod(measureType).getTime();
// Date end = DateUtils.getEndDateFromStartDate(measureType, start, 1); // Date end = DateUtils.getEndDateFromStartDate(measureType, start, 1);
// Date start = DateUtils.getStartCalendar(2016, 12, 01).getTime();
//Date start = DateUtils.getPreviousPeriod(measureType).getTime(); Date start = DateUtils.getPreviousPeriod(measureType).getTime();
Date start = DateUtils.getStartCalendar(2016, 12, 01).getTime(); Date end = DateUtils.getEndDateFromStartDate(measureType, start, 1);
Date end = DateUtils.getEndDateFromStartDate(measureType, start, 18);
LinkedHashMap<String,ScopeBean> map = ContextManager.readContexts();
SortedSet<String> contexts = new TreeSet<>(map.keySet());
for (String context : contexts) {
System.out.println("context: "+context);
}
AccountingDataHarvesterPlugin accountingDataHarvesterPlugin = new AccountingDataHarvesterPlugin(null); AccountingDataHarvesterPlugin accountingDataHarvesterPlugin = new AccountingDataHarvesterPlugin(null);
accountingDataHarvesterPlugin.getConfigParameters(); accountingDataHarvesterPlugin.getConfigParameters();
@ -190,7 +162,8 @@ public class AccountingDataHarvesterPluginTest extends ContextTest {
logger.debug("Read from properties "+AccountingDataHarvesterPlugin.RESOURCE_CATALOGUE_CONTEXT+" value: "+catalogueContext); logger.debug("Read from properties "+AccountingDataHarvesterPlugin.RESOURCE_CATALOGUE_CONTEXT+" value: "+catalogueContext);
//end //end
contexts = new TreeSet<String>(); //TODO @LUCA FROSINI MUST PASS SoBigData VREs
TreeSet<String> contexts = new TreeSet<String>();
contexts.add("/d4science.research-infrastructures.eu/SoBigData/ResourceCatalogue"); contexts.add("/d4science.research-infrastructures.eu/SoBigData/ResourceCatalogue");
contexts.add("/d4science.research-infrastructures.eu/SoBigData/TagMe"); contexts.add("/d4science.research-infrastructures.eu/SoBigData/TagMe");
contexts.add("/d4science.research-infrastructures.eu/SoBigData/WellBeingAndEconomy"); contexts.add("/d4science.research-infrastructures.eu/SoBigData/WellBeingAndEconomy");
@ -198,6 +171,7 @@ public class AccountingDataHarvesterPluginTest extends ContextTest {
contexts.add("/d4science.research-infrastructures.eu/SoBigData/SocietalDebates"); contexts.add("/d4science.research-infrastructures.eu/SoBigData/SocietalDebates");
contexts.add("/d4science.research-infrastructures.eu/SoBigData/SportsDataScience"); contexts.add("/d4science.research-infrastructures.eu/SoBigData/SportsDataScience");
contexts.add("/d4science.research-infrastructures.eu/SoBigData/SMAPH"); contexts.add("/d4science.research-infrastructures.eu/SoBigData/SMAPH");
ResourceCatalogueHarvester resourceCatalogueHarvester = new ResourceCatalogueHarvester(start, end, catalogueContext, contexts); ResourceCatalogueHarvester resourceCatalogueHarvester = new ResourceCatalogueHarvester(start, end, catalogueContext, contexts);
List<HarvestedData> data = resourceCatalogueHarvester.getData(); List<HarvestedData> data = resourceCatalogueHarvester.getData();
@ -209,4 +183,49 @@ public class AccountingDataHarvesterPluginTest extends ContextTest {
logger.error("", e); logger.error("", e);
} }
} }
@Test
public void testDataMethodDownloadHarvester() {
try {
org.gcube.dataharvest.utils.Utils.setContext(RESOURCE_CATALOGUE);
MeasureType measureType = MeasureType.MONTHLY;
Date start = DateUtils.getStartCalendar(2016, 12, 01).getTime();
// Date start = DateUtils.getPreviousPeriod(measureType).getTime();
// Date end = DateUtils.getEndDateFromStartDate(measureType, start, 1);
Date end = DateUtils.getEndDateFromStartDate(measureType, start, 18);
AccountingDataHarvesterPlugin accountingDataHarvesterPlugin = new AccountingDataHarvesterPlugin(null);
accountingDataHarvesterPlugin.getConfigParameters();
//Added by Francesco
Properties properties = AccountingDataHarvesterPlugin.getProperties().get();
String catalogueContext = (String) properties.get(AccountingDataHarvesterPlugin.RESOURCE_CATALOGUE_CONTEXT);
logger.debug("Read from properties "+AccountingDataHarvesterPlugin.RESOURCE_CATALOGUE_CONTEXT+" value: "+catalogueContext);
//end
//TODO @LUCA FROSINI MUST PASS SoBigData VREs
TreeSet<String> contexts = new TreeSet<String>();
contexts.add("/d4science.research-infrastructures.eu/SoBigData/ResourceCatalogue");
contexts.add("/d4science.research-infrastructures.eu/SoBigData/TagMe");
contexts.add("/d4science.research-infrastructures.eu/SoBigData/WellBeingAndEconomy");
contexts.add("/d4science.research-infrastructures.eu/SoBigData/CityOfCitizens");
contexts.add("/d4science.research-infrastructures.eu/SoBigData/SocietalDebates");
contexts.add("/d4science.research-infrastructures.eu/SoBigData/SportsDataScience");
contexts.add("/d4science.research-infrastructures.eu/SoBigData/SMAPH");
DataMethodDownloadHarvester resourceCatalogueHarvester = new DataMethodDownloadHarvester(start, end, catalogueContext, contexts);
List<HarvestedData> data = resourceCatalogueHarvester.getData();
// for (HarvestedData harvestedData : data) {
// System.out.println(harvestedData.toString());
// }
} catch(Exception e) {
logger.error("", e);
}
}
} }