2018-05-21 14:05:15 +02:00
|
|
|
package org.gcube.dataharvest.harvester.sobigdata;
|
|
|
|
|
|
|
|
import java.text.ParseException;
|
2018-05-24 17:10:45 +02:00
|
|
|
import java.util.ArrayList;
|
2018-05-21 14:05:15 +02:00
|
|
|
import java.util.Date;
|
2018-05-24 17:10:45 +02:00
|
|
|
import java.util.HashMap;
|
2019-09-18 16:03:40 +02:00
|
|
|
import java.util.Iterator;
|
2018-05-21 14:49:01 +02:00
|
|
|
import java.util.List;
|
2018-05-24 17:10:45 +02:00
|
|
|
import java.util.Map;
|
|
|
|
import java.util.Properties;
|
2018-05-21 14:49:01 +02:00
|
|
|
import java.util.Set;
|
2018-05-21 14:15:09 +02:00
|
|
|
import java.util.SortedSet;
|
2018-05-28 12:00:15 +02:00
|
|
|
import java.util.TreeSet;
|
2018-05-21 14:05:15 +02:00
|
|
|
|
2018-05-28 12:00:15 +02:00
|
|
|
import org.gcube.common.authorization.client.exceptions.ObjectNotFound;
|
2018-05-24 17:10:45 +02:00
|
|
|
import org.gcube.dataharvest.AccountingDataHarvesterPlugin;
|
|
|
|
import org.gcube.dataharvest.datamodel.HarvestedDataKey;
|
2018-05-21 14:05:15 +02:00
|
|
|
import org.gcube.dataharvest.harvester.BasicHarvester;
|
2018-05-28 12:00:15 +02:00
|
|
|
import org.gcube.dataharvest.utils.Utils;
|
2019-09-18 16:03:40 +02:00
|
|
|
import org.gcube.gcat.client.Group;
|
2018-05-21 15:37:57 +02:00
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
2018-05-21 14:05:15 +02:00
|
|
|
|
2019-09-18 16:03:40 +02:00
|
|
|
import com.fasterxml.jackson.databind.JsonNode;
|
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
2018-05-24 17:10:45 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* The Class SoBigDataHarvester.
|
|
|
|
*
|
|
|
|
* @author Luca Frosini (ISTI-CNR)
|
|
|
|
* @author Francesco Mangiacrapa at ISTI-CNR (francesco.mangiacrapa@isti.cnr.it)
|
|
|
|
* May 24, 2018
|
|
|
|
*/
|
2018-05-21 14:05:15 +02:00
|
|
|
public abstract class SoBigDataHarvester extends BasicHarvester {
|
2018-05-28 12:00:15 +02:00
|
|
|
|
2018-05-21 15:37:57 +02:00
|
|
|
private static Logger logger = LoggerFactory.getLogger(SoBigDataHarvester.class);
|
2018-05-28 12:00:15 +02:00
|
|
|
|
2018-05-24 17:10:45 +02:00
|
|
|
//Added by Francesco
|
|
|
|
private static final String GROUP_LABEL = "group";
|
2018-05-28 12:00:15 +02:00
|
|
|
|
2018-05-24 17:10:45 +02:00
|
|
|
//Added by Francesco
|
2018-05-28 12:00:15 +02:00
|
|
|
protected HashMap<String,String> mapSystemTypeToDBEntry;
|
|
|
|
|
2018-05-24 17:10:45 +02:00
|
|
|
//Added by Francesco
|
2018-05-28 12:00:15 +02:00
|
|
|
protected HashMap<String,String> mapCatalogueGroupToVRE;
|
|
|
|
|
2018-05-24 17:10:45 +02:00
|
|
|
//Added by Francesco
|
2018-05-28 12:00:15 +02:00
|
|
|
protected HashMap<String,String> mapWsFolderNameToVRE;
|
|
|
|
|
2018-05-21 14:15:09 +02:00
|
|
|
protected SortedSet<String> contexts;
|
2018-05-28 12:00:15 +02:00
|
|
|
|
2018-05-24 17:10:45 +02:00
|
|
|
/**
|
|
|
|
* Instantiates a new so big data harvester.
|
|
|
|
*
|
|
|
|
* @param start the start
|
|
|
|
* @param end the end
|
|
|
|
* @param catalogueContext the catalogue context
|
|
|
|
* @param vreScopes the contexts
|
|
|
|
* @throws ParseException the parse exception
|
|
|
|
*/
|
2018-05-28 12:00:15 +02:00
|
|
|
public SoBigDataHarvester(Date start, Date end, SortedSet<String> contexts) throws Exception {
|
2018-05-21 14:05:15 +02:00
|
|
|
super(start, end);
|
2018-05-28 12:00:15 +02:00
|
|
|
|
|
|
|
String currentContext = Utils.getCurrentContext();
|
|
|
|
|
2018-05-28 15:22:59 +02:00
|
|
|
// Truncating the context to the last / (the last / is retained for filtering issues)
|
2018-05-28 12:00:15 +02:00
|
|
|
String baseContext = currentContext.substring(0, currentContext.lastIndexOf("/")+1);
|
|
|
|
|
|
|
|
this.contexts = getValidContexts(contexts, baseContext);
|
2018-05-28 15:22:59 +02:00
|
|
|
logger.trace("Valid contexts are {}", this.contexts);
|
2018-05-28 12:00:15 +02:00
|
|
|
|
2018-05-24 17:10:45 +02:00
|
|
|
initMappingMaps();
|
2018-05-28 12:00:15 +02:00
|
|
|
|
|
|
|
|
2018-05-21 14:49:01 +02:00
|
|
|
}
|
2018-05-28 12:00:15 +02:00
|
|
|
|
2018-05-24 17:10:45 +02:00
|
|
|
/**
|
|
|
|
* Inits the mapping maps.
|
2018-05-28 12:00:15 +02:00
|
|
|
* @throws Exception
|
|
|
|
* @throws ObjectNotFound
|
2018-05-24 17:10:45 +02:00
|
|
|
*/
|
2018-05-28 12:00:15 +02:00
|
|
|
protected void initMappingMaps() throws ObjectNotFound, Exception {
|
2018-05-24 17:10:45 +02:00
|
|
|
Properties properties = AccountingDataHarvesterPlugin.getProperties().get();
|
2018-05-28 12:00:15 +02:00
|
|
|
Set<String> keys = properties.stringPropertyNames();
|
|
|
|
|
|
|
|
mapSystemTypeToDBEntry = new HashMap<String,String>();
|
|
|
|
for(String key : keys) {
|
|
|
|
try {
|
|
|
|
HarvestedDataKey valueEnum = HarvestedDataKey.valueOf(key);
|
|
|
|
mapSystemTypeToDBEntry.put(properties.getProperty(key), valueEnum.name());
|
|
|
|
} catch(Exception e) {
|
|
|
|
//silent
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-28 15:22:59 +02:00
|
|
|
logger.info("Built from properties the mapping 'SystemType' to 'DB entry' {}", mapSystemTypeToDBEntry);
|
2018-05-28 12:00:15 +02:00
|
|
|
|
2018-05-24 17:10:45 +02:00
|
|
|
//GET CATALOGUE'S GROUPS
|
2019-09-18 16:03:40 +02:00
|
|
|
List<String> groups = listGroup();
|
2018-05-24 17:10:45 +02:00
|
|
|
//NORMALIZING THE GROUP NAME TO MATCH WITH VRE NAME
|
|
|
|
Map<String,String> mapNormalizedGroups = normalizeGroups(groups);
|
2018-05-28 15:22:59 +02:00
|
|
|
logger.debug("Map of Normalized Groups is {} ", mapNormalizedGroups);
|
2018-05-28 12:00:15 +02:00
|
|
|
|
2018-05-24 17:10:45 +02:00
|
|
|
//CREATING MAPPING BETWEEN (CATALOGUE GROUP NAME TO VRE NAME)
|
2018-05-28 12:00:15 +02:00
|
|
|
mapCatalogueGroupToVRE = new HashMap<String,String>();
|
2018-05-24 17:10:45 +02:00
|
|
|
//CREATING MAPPING BETWEEN (WS FOLDER NAME TO VRE NAME)
|
2018-05-28 12:00:15 +02:00
|
|
|
mapWsFolderNameToVRE = new HashMap<String,String>();
|
2018-05-24 17:10:45 +02:00
|
|
|
Set<String> normalizedGroups = mapNormalizedGroups.keySet();
|
2018-05-28 12:00:15 +02:00
|
|
|
for(String context : contexts) {
|
2018-05-24 17:10:45 +02:00
|
|
|
String loweredVREName = context.substring(context.lastIndexOf("/") + 1, context.length()).toLowerCase();
|
|
|
|
try {
|
2018-05-28 12:00:15 +02:00
|
|
|
if(normalizedGroups.contains(loweredVREName)) {
|
2018-05-28 15:22:59 +02:00
|
|
|
logger.debug("Normalized Groups matching the lowered VRE name {}", loweredVREName);
|
2018-05-24 17:10:45 +02:00
|
|
|
// Creating the map with couple (catalogue group name, scope)
|
|
|
|
mapCatalogueGroupToVRE.put(mapNormalizedGroups.get(loweredVREName), context);
|
|
|
|
}
|
2018-05-28 12:00:15 +02:00
|
|
|
|
2018-05-24 17:10:45 +02:00
|
|
|
mapWsFolderNameToVRE.put(loweredVREName, context);
|
2018-05-28 12:00:15 +02:00
|
|
|
} catch(Exception e) {
|
2018-05-24 17:10:45 +02:00
|
|
|
// silent
|
|
|
|
}
|
2018-05-21 15:37:57 +02:00
|
|
|
}
|
2018-05-28 12:00:15 +02:00
|
|
|
|
2018-05-28 15:22:59 +02:00
|
|
|
logger.info("Map of Catalogue Groups To VRE is {} ", mapCatalogueGroupToVRE);
|
|
|
|
logger.info("Map of (lowered) Ws Folder Name To VRE is {}", mapWsFolderNameToVRE);
|
2018-05-28 12:00:15 +02:00
|
|
|
|
2018-05-21 15:37:57 +02:00
|
|
|
}
|
2018-05-28 12:00:15 +02:00
|
|
|
|
2018-05-24 17:10:45 +02:00
|
|
|
/**
|
|
|
|
* Normalize groups.
|
|
|
|
*
|
|
|
|
* @author Francesco Mangiacrapa
|
|
|
|
* @param groups the groups
|
|
|
|
* @return the map with couples (normalized group name, group name)
|
|
|
|
*/
|
|
|
|
private Map<String,String> normalizeGroups(List<String> groups) {
|
|
|
|
Map<String,String> listNGroups = new HashMap<String,String>(groups.size());
|
2018-05-28 12:00:15 +02:00
|
|
|
for(String group : groups) {
|
2018-05-24 17:10:45 +02:00
|
|
|
String normalizedGroup = group;
|
2018-05-28 12:00:15 +02:00
|
|
|
if(normalizedGroup.endsWith(GROUP_LABEL)) {
|
|
|
|
normalizedGroup = normalizedGroup.substring(0, normalizedGroup.length() - GROUP_LABEL.length());
|
2018-05-21 16:23:14 +02:00
|
|
|
}
|
2018-05-28 12:00:15 +02:00
|
|
|
normalizedGroup = normalizedGroup.replaceAll("-", "");
|
2018-05-24 17:10:45 +02:00
|
|
|
listNGroups.put(normalizedGroup.toLowerCase(), group);
|
2018-05-21 16:23:14 +02:00
|
|
|
}
|
2018-05-24 17:10:45 +02:00
|
|
|
return listNGroups;
|
|
|
|
}
|
2018-05-28 12:00:15 +02:00
|
|
|
|
2019-09-18 16:03:40 +02:00
|
|
|
public static List<String> listGroup() throws Exception {
|
|
|
|
List<String> groupList = new ArrayList<>();
|
|
|
|
Group group = new Group();
|
|
|
|
String groups = group.list(1000, 0);
|
|
|
|
ObjectMapper objectMapper = new ObjectMapper();
|
|
|
|
JsonNode jsonNodeGroups = objectMapper.readTree(groups);
|
|
|
|
Iterator<JsonNode> iterator = jsonNodeGroups.elements();
|
|
|
|
while(iterator.hasNext()){
|
|
|
|
JsonNode jsonNode = iterator.next();
|
|
|
|
groupList.add(jsonNode.asText());
|
2018-05-21 16:23:14 +02:00
|
|
|
}
|
2019-09-18 16:03:40 +02:00
|
|
|
return groupList;
|
2018-05-21 14:49:01 +02:00
|
|
|
}
|
2018-05-28 12:00:15 +02:00
|
|
|
|
2018-05-24 17:10:45 +02:00
|
|
|
/**
|
|
|
|
* Gets the map catalogue group to vre.
|
|
|
|
*
|
|
|
|
* @return the map catalogue group to vre
|
|
|
|
*/
|
2018-05-28 12:00:15 +02:00
|
|
|
public HashMap<String,String> getMapCatalogueGroupToVRE() {
|
2018-05-24 17:10:45 +02:00
|
|
|
return mapCatalogueGroupToVRE;
|
2018-05-21 14:49:01 +02:00
|
|
|
}
|
2018-05-28 12:00:15 +02:00
|
|
|
|
2018-05-24 17:10:45 +02:00
|
|
|
/**
|
|
|
|
* @return the mapSystemTypeToDBEntry
|
|
|
|
*/
|
2018-05-28 12:00:15 +02:00
|
|
|
public HashMap<String,String> getMapSystemTypeToDBEntry() {
|
2018-05-24 17:10:45 +02:00
|
|
|
return mapSystemTypeToDBEntry;
|
|
|
|
}
|
2018-05-28 12:00:15 +02:00
|
|
|
|
2018-05-24 17:10:45 +02:00
|
|
|
/**
|
|
|
|
* @return the mapWsFolderNameToVRE
|
|
|
|
*/
|
2018-05-28 12:00:15 +02:00
|
|
|
public HashMap<String,String> getMapWsFolderNameToVRE() {
|
2018-05-24 17:10:45 +02:00
|
|
|
return mapWsFolderNameToVRE;
|
2018-05-21 14:06:27 +02:00
|
|
|
}
|
2018-05-28 12:00:15 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the so big data contexts.
|
|
|
|
*
|
|
|
|
* @param contexts the contexts
|
|
|
|
* @param base the base
|
|
|
|
* @return the so big data contexts
|
|
|
|
*/
|
|
|
|
public SortedSet<String> getValidContexts(Set<String> contexts, String base) {
|
|
|
|
SortedSet<String> filteredContext = new TreeSet<>();
|
|
|
|
for(String context : contexts) {
|
2018-05-28 15:22:59 +02:00
|
|
|
if(context.startsWith(base)) {
|
2018-05-28 12:00:15 +02:00
|
|
|
filteredContext.add(context);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return filteredContext;
|
|
|
|
}
|
|
|
|
|
2018-05-21 14:05:15 +02:00
|
|
|
}
|