package eu.dnetlib.data.utility.cleaner; import java.io.StringReader; import java.util.HashSet; import java.util.List; import java.util.Set; import org.dom4j.Document; import org.dom4j.Element; import org.dom4j.io.SAXReader; import org.springframework.beans.factory.annotation.Required; import com.google.common.base.Splitter; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import eu.dnetlib.data.utility.cleaner.rmi.CleanerException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.locators.UniqueServiceLocator; public class CleaningRuleFactory { private UniqueServiceLocator serviceLocator; public CleaningRule obtainCleaningRule(final String ruleId) throws CleanerException { try { final String prof = serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery( "/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='" + ruleId + "' or .//CLEANER_NAME='" + ruleId + "']//CONFIGURATION"); final SAXReader reader = new SAXReader(); final Document doc = reader.read(new StringReader(prof)); final CleaningRule rule = new CleaningRule(); final ISLookUpService lookup = serviceLocator.getService(ISLookUpService.class); for (Object o : doc.selectNodes("//RULE")) { final Element node = (Element) o; final String xpath = node.valueOf("@xpath"); final String vocabularies = node.valueOf("@vocabularies"); final String groovyRule = node.valueOf("@groovy"); final String strict = node.valueOf("@strict"); final XPATHCleaningRule xpathRule; if (vocabularies != null && vocabularies.length() > 0) { final Set list = Sets.newHashSet(Splitter.on(",").omitEmptyStrings().trimResults().split(vocabularies)); xpathRule = new VocabularyRule(list, lookup); } else { xpathRule = new GroovyRule(groovyRule); } xpathRule.setXpath(xpath); xpathRule.setStrict("true".equals(strict)); rule.getXpathRules().add(xpathRule); } return rule; } catch (Exception e) { throw new CleanerException("Error obtaing cleaner rule " + ruleId, e); } } public List getRuleIds() throws CleanerException { try { final HashSet response = new HashSet(); final List list = serviceLocator.getService(ISLookUpService.class).quickSearchProfile("//CLEANER_NAME"); if (list != null) { response.addAll(list); } return Lists.newArrayList(response); } catch (ISLookUpException e) { throw new CleanerException("Error obtaining IDs of cleaner DSs", e); } } public UniqueServiceLocator getServiceLocator() { return serviceLocator; } @Required public void setServiceLocator(final UniqueServiceLocator serviceLocator) { this.serviceLocator = serviceLocator; } }