2018-05-21 14:05:15 +02:00
|
|
|
package org.gcube.dataharvest.harvester.sobigdata;
|
|
|
|
|
|
|
|
import java.text.ParseException;
|
|
|
|
import java.util.Date;
|
2018-05-21 14:49:01 +02:00
|
|
|
import java.util.List;
|
|
|
|
import java.util.Set;
|
2018-05-21 14:15:09 +02:00
|
|
|
import java.util.SortedSet;
|
2018-05-21 14:49:01 +02:00
|
|
|
import java.util.TreeSet;
|
2018-05-21 14:05:15 +02:00
|
|
|
|
2018-05-21 14:49:01 +02:00
|
|
|
import org.gcube.common.resources.gcore.GenericResource;
|
2018-05-21 14:05:15 +02:00
|
|
|
import org.gcube.dataharvest.harvester.BasicHarvester;
|
2018-05-21 14:49:01 +02:00
|
|
|
import org.gcube.resources.discovery.client.queries.api.SimpleQuery;
|
|
|
|
import org.gcube.resources.discovery.icclient.ICFactory;
|
2018-05-21 15:37:57 +02:00
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
import org.w3c.dom.Element;
|
|
|
|
import org.w3c.dom.Node;
|
|
|
|
import org.w3c.dom.NodeList;
|
2018-05-21 14:05:15 +02:00
|
|
|
|
|
|
|
public abstract class SoBigDataHarvester extends BasicHarvester {
|
2018-05-21 15:37:57 +02:00
|
|
|
|
|
|
|
private static Logger logger = LoggerFactory.getLogger(SoBigDataHarvester.class);
|
|
|
|
|
2018-05-21 14:49:01 +02:00
|
|
|
public static final String SO_BIG_DATA_CONTEXT = "/d4science.research-infrastructures.eu/SoBigData";
|
|
|
|
|
2018-05-21 15:37:57 +02:00
|
|
|
protected SortedSet<String> excludedContexts;
|
2018-05-21 14:15:09 +02:00
|
|
|
protected SortedSet<String> contexts;
|
2018-05-21 14:06:27 +02:00
|
|
|
|
2018-05-21 15:52:19 +02:00
|
|
|
public SoBigDataHarvester(Date start, Date end, SortedSet<String> contexts) throws ParseException {
|
2018-05-21 14:05:15 +02:00
|
|
|
super(start, end);
|
2018-05-21 15:37:57 +02:00
|
|
|
this.excludedContexts = getExcludedContexts();
|
2018-05-21 14:49:01 +02:00
|
|
|
// Adding trailing slash to SO_BIG_DATA_CONTEXT to avoid to get VO
|
|
|
|
this.contexts = getSoBigDataContexts(contexts, SO_BIG_DATA_CONTEXT + "/");
|
2018-05-21 15:37:57 +02:00
|
|
|
logger.trace("Valid contexts are {}", contexts);
|
2018-05-21 14:49:01 +02:00
|
|
|
}
|
|
|
|
|
2018-05-21 15:52:19 +02:00
|
|
|
public SortedSet<String> getFilteredContexts() {
|
|
|
|
return contexts;
|
|
|
|
}
|
|
|
|
|
2018-05-21 14:49:01 +02:00
|
|
|
public static String SECONDARY_TYPE_FORMAT = "$resource/Profile/SecondaryType/text() eq '%1s'";
|
|
|
|
public static String NAME_FORMAT = "$resource/Profile/Name/text() eq '%1s'";
|
|
|
|
|
|
|
|
public static String SECONDARY_TYPE = "ExcludingVREs";
|
|
|
|
public static String NAME = "AccountingHarvesters";
|
|
|
|
|
|
|
|
protected SimpleQuery getFilteringGenericResource() {
|
|
|
|
return ICFactory.queryFor(GenericResource.class)
|
|
|
|
.addCondition(String.format(SECONDARY_TYPE_FORMAT, SECONDARY_TYPE))
|
|
|
|
.addCondition(String.format(NAME_FORMAT, NAME));
|
|
|
|
}
|
|
|
|
|
2018-05-21 15:37:57 +02:00
|
|
|
protected GenericResource getGenericResource() {
|
2018-05-21 14:49:01 +02:00
|
|
|
SimpleQuery simpleQuery = getFilteringGenericResource();
|
2018-05-21 15:37:57 +02:00
|
|
|
List<GenericResource> res = ICFactory.clientFor(GenericResource.class).submit(simpleQuery);
|
|
|
|
if(res.size()==0) {
|
|
|
|
// At time of writing it should be an error but it can change in the future
|
|
|
|
logger.info("No {} for filtering contexts.", GenericResource.class.getSimpleName());
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
return res.get(0);
|
|
|
|
}
|
|
|
|
|
2018-05-21 15:52:19 +02:00
|
|
|
protected SortedSet<String> getExcludedContexts() {
|
2018-05-21 15:37:57 +02:00
|
|
|
SortedSet<String> excludedContexts = new TreeSet<>();
|
|
|
|
|
|
|
|
GenericResource genericResource = getGenericResource();
|
|
|
|
if(genericResource==null) {
|
|
|
|
return excludedContexts;
|
|
|
|
}
|
|
|
|
|
|
|
|
Element body = genericResource.profile().body();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The following code parse an XML formatted as this
|
|
|
|
*
|
|
|
|
* <ResourceCatalogueHarvester>
|
|
|
|
* <vres>
|
|
|
|
* <vre>/d4science.research-infrastructures.eu/SoBigData/ResourceCatalogue</vre>
|
|
|
|
* <vre>/d4science.research-infrastructures.eu/SoBigData/TagMe</vre>
|
|
|
|
* </vres>
|
|
|
|
* </ResourceCatalogueHarvester>
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
NodeList nodeList = body.getElementsByTagName(this.getClass().getSimpleName());
|
|
|
|
if(nodeList.getLength()==0) {
|
|
|
|
// At time of writing it should be an error but it can change in the future
|
|
|
|
logger.info("The body of the {} does not contains any information to filter contexts.", GenericResource.class.getSimpleName());
|
|
|
|
}
|
|
|
|
|
|
|
|
Node node = nodeList.item(0).getChildNodes().item(1);
|
|
|
|
NodeList contexts = node.getChildNodes();
|
|
|
|
|
|
|
|
for(int i=1; i<contexts.getLength()-1; i++){
|
|
|
|
Node context = contexts.item(i);
|
2018-05-21 15:56:52 +02:00
|
|
|
NodeList nList = context.getChildNodes();
|
|
|
|
if(nList.item(0)!=null) {
|
|
|
|
String contextToExclude = nList.item(0).getNodeValue();
|
|
|
|
excludedContexts.add(contextToExclude);
|
|
|
|
}
|
2018-05-21 15:37:57 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return excludedContexts;
|
2018-05-21 14:49:01 +02:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
protected boolean filterContext(String context) {
|
2018-05-21 15:37:57 +02:00
|
|
|
if(excludedContexts.contains(context)) {
|
|
|
|
return true;
|
|
|
|
}
|
2018-05-21 14:49:01 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-05-21 15:37:57 +02:00
|
|
|
protected SortedSet<String> getSoBigDataContexts(Set<String> contexts, String base) {
|
2018-05-21 14:49:01 +02:00
|
|
|
SortedSet<String> filteredContext = new TreeSet<>();
|
|
|
|
for(String context : contexts) {
|
2018-05-21 15:37:57 +02:00
|
|
|
if(context.startsWith(SO_BIG_DATA_CONTEXT)) {
|
2018-05-21 14:49:01 +02:00
|
|
|
if(!filterContext(context)) {
|
|
|
|
filteredContext.add(context);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return filteredContext;
|
2018-05-21 14:06:27 +02:00
|
|
|
}
|
|
|
|
|
2018-05-21 14:05:15 +02:00
|
|
|
}
|