package org.gcube.dataharvest.harvester.sobigdata; import java.text.ParseException; import java.util.Date; import java.util.List; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import org.gcube.common.resources.gcore.GenericResource; import org.gcube.dataharvest.harvester.BasicHarvester; import org.gcube.resources.discovery.client.queries.api.SimpleQuery; import org.gcube.resources.discovery.icclient.ICFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; public abstract class SoBigDataHarvester extends BasicHarvester { private static Logger logger = LoggerFactory.getLogger(SoBigDataHarvester.class); public static final String SO_BIG_DATA_CONTEXT = "/d4science.research-infrastructures.eu/SoBigData"; protected SortedSet excludedContexts; protected SortedSet contexts; public SoBigDataHarvester(Date start, Date end, SortedSet contexts) throws ParseException { super(start, end); this.excludedContexts = getExcludedContexts(); // Adding trailing slash to SO_BIG_DATA_CONTEXT to avoid to get VO this.contexts = getSoBigDataContexts(contexts, SO_BIG_DATA_CONTEXT + "/"); logger.trace("Valid contexts are {}", contexts); } public SortedSet getFilteredContexts() { return contexts; } public static String SECONDARY_TYPE_FORMAT = "$resource/Profile/SecondaryType/text() eq '%1s'"; public static String NAME_FORMAT = "$resource/Profile/Name/text() eq '%1s'"; public static String SECONDARY_TYPE = "ExcludingVREs"; public static String NAME = "AccountingHarvesters"; protected SimpleQuery getFilteringGenericResource() { return ICFactory.queryFor(GenericResource.class) .addCondition(String.format(SECONDARY_TYPE_FORMAT, SECONDARY_TYPE)) .addCondition(String.format(NAME_FORMAT, NAME)); } protected GenericResource getGenericResource() { SimpleQuery simpleQuery = getFilteringGenericResource(); List res = ICFactory.clientFor(GenericResource.class).submit(simpleQuery); if(res.size()==0) { // At time of writing it should be an error but it can change in the future logger.info("No {} for filtering contexts.", GenericResource.class.getSimpleName()); return null; } return res.get(0); } public SortedSet getExcludedContexts() { SortedSet excludedContexts = new TreeSet<>(); GenericResource genericResource = getGenericResource(); if(genericResource==null) { return excludedContexts; } Element body = genericResource.profile().body(); /* * The following code parse an XML formatted as this * * * * /d4science.research-infrastructures.eu/SoBigData/ResourceCatalogue * /d4science.research-infrastructures.eu/SoBigData/TagMe * * * */ NodeList nodeList = body.getElementsByTagName(this.getClass().getSimpleName()); if(nodeList.getLength()==0) { // At time of writing it should be an error but it can change in the future logger.info("The body of the {} does not contains any information to filter contexts.", GenericResource.class.getSimpleName()); } Element classNameElement = null; for(int c=0; c getSoBigDataContexts(Set contexts, String base) { SortedSet filteredContext = new TreeSet<>(); for(String context : contexts) { if(context.startsWith(SO_BIG_DATA_CONTEXT)) { if(!filterContext(context)) { filteredContext.add(context); } } } return filteredContext; } }