accounting-dashboard-harves.../src/main/java/org/gcube/dataharvest/harvester/sobigdata/SoBigDataHarvester.java

149 lines
4.7 KiB
Java

package org.gcube.dataharvest.harvester.sobigdata;
import java.text.ParseException;
import java.util.Date;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import org.gcube.common.resources.gcore.GenericResource;
import org.gcube.dataharvest.harvester.BasicHarvester;
import org.gcube.resources.discovery.client.queries.api.SimpleQuery;
import org.gcube.resources.discovery.icclient.ICFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public abstract class SoBigDataHarvester extends BasicHarvester {
private static Logger logger = LoggerFactory.getLogger(SoBigDataHarvester.class);
public static final String SO_BIG_DATA_CONTEXT = "/d4science.research-infrastructures.eu/SoBigData";
protected SortedSet<String> excludedContexts;
protected SortedSet<String> contexts;
public SoBigDataHarvester(Date start, Date end, SortedSet<String> contexts) throws ParseException {
super(start, end);
this.excludedContexts = getExcludedContexts();
// Adding trailing slash to SO_BIG_DATA_CONTEXT to avoid to get VO
this.contexts = getSoBigDataContexts(contexts, SO_BIG_DATA_CONTEXT + "/");
logger.trace("Valid contexts are {}", contexts);
}
public SortedSet<String> getFilteredContexts() {
return contexts;
}
public static String SECONDARY_TYPE_FORMAT = "$resource/Profile/SecondaryType/text() eq '%1s'";
public static String NAME_FORMAT = "$resource/Profile/Name/text() eq '%1s'";
public static String SECONDARY_TYPE = "ExcludingVREs";
public static String NAME = "AccountingHarvesters";
protected SimpleQuery getFilteringGenericResource() {
return ICFactory.queryFor(GenericResource.class)
.addCondition(String.format(SECONDARY_TYPE_FORMAT, SECONDARY_TYPE))
.addCondition(String.format(NAME_FORMAT, NAME));
}
protected GenericResource getGenericResource() {
SimpleQuery simpleQuery = getFilteringGenericResource();
List<GenericResource> res = ICFactory.clientFor(GenericResource.class).submit(simpleQuery);
if(res.size()==0) {
// At time of writing it should be an error but it can change in the future
logger.info("No {} for filtering contexts.", GenericResource.class.getSimpleName());
return null;
}
return res.get(0);
}
public SortedSet<String> getExcludedContexts() {
SortedSet<String> excludedContexts = new TreeSet<>();
GenericResource genericResource = getGenericResource();
if(genericResource==null) {
return excludedContexts;
}
Element body = genericResource.profile().body();
/*
* The following code parse an XML formatted as this
*
* <ResourceCatalogueHarvester>
* <vres>
* <vre>/d4science.research-infrastructures.eu/SoBigData/ResourceCatalogue</vre>
* <vre>/d4science.research-infrastructures.eu/SoBigData/TagMe</vre>
* </vres>
* </ResourceCatalogueHarvester>
*
*/
NodeList nodeList = body.getElementsByTagName(this.getClass().getSimpleName());
if(nodeList.getLength()==0) {
// At time of writing it should be an error but it can change in the future
logger.info("The body of the {} does not contains any information to filter contexts.", GenericResource.class.getSimpleName());
}
Element classNameElement = null;
for(int c=0; c<nodeList.getLength(); c++) {
Node node = nodeList.item(c);
if(node.getNodeType() == Node.ELEMENT_NODE) {
classNameElement = (Element) node;
}
}
Element vresElement = null;
NodeList vresNodeList = classNameElement.getElementsByTagName("vres");
for(int c=0; c<vresNodeList.getLength(); c++) {
Node node = vresNodeList.item(c);
if(node.getNodeType() == Node.ELEMENT_NODE) {
vresElement = (Element) node;
}
}
NodeList vreNodeList = vresElement.getElementsByTagName("vre");
for(int c=0; c<vreNodeList.getLength(); c++) {
Node node = vreNodeList.item(c);
if(node.getNodeType() == Node.ELEMENT_NODE) {
Element vreElement = (Element) node;
NodeList nl = vreElement.getChildNodes();
for(int i=0; i<nl.getLength(); i++) {
Node n = nl.item(i);
if(n.getNodeType() == Node.TEXT_NODE) {
excludedContexts.add(n.getNodeValue());
}
}
}
}
return excludedContexts;
}
protected boolean filterContext(String context) {
if(excludedContexts.contains(context)) {
return true;
}
return false;
}
protected SortedSet<String> getSoBigDataContexts(Set<String> contexts, String base) {
SortedSet<String> filteredContext = new TreeSet<>();
for(String context : contexts) {
if(context.startsWith(SO_BIG_DATA_CONTEXT)) {
if(!filterContext(context)) {
filteredContext.add(context);
}
}
}
return filteredContext;
}
}