dnet-core/dnet-data-services/src/main/java/eu/dnetlib/data/utility/cleaner/CleaningRuleFactory.java

87 lines
2.8 KiB
Java

package eu.dnetlib.data.utility.cleaner;
import java.io.StringReader;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.springframework.beans.factory.annotation.Required;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
public class CleaningRuleFactory {
private UniqueServiceLocator serviceLocator;
public CleaningRule obtainCleaningRule(final String ruleId) throws CleanerException {
try {
final String prof = serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(
"/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='" + ruleId + "' or .//CLEANER_NAME='" + ruleId + "']//CONFIGURATION");
final SAXReader reader = new SAXReader();
final Document doc = reader.read(new StringReader(prof));
final CleaningRule rule = new CleaningRule();
final ISLookUpService lookup = serviceLocator.getService(ISLookUpService.class);
for (Object o : doc.selectNodes("//RULE")) {
final Element node = (Element) o;
final String xpath = node.valueOf("@xpath");
final String vocabularies = node.valueOf("@vocabularies");
final String groovyRule = node.valueOf("@groovy");
final String strict = node.valueOf("@strict");
final XPATHCleaningRule xpathRule;
if (vocabularies != null && vocabularies.length() > 0) {
final Set<String> list = Sets.newHashSet(Splitter.on(",").omitEmptyStrings().trimResults().split(vocabularies));
xpathRule = new VocabularyRule(list, lookup);
} else {
xpathRule = new GroovyRule(groovyRule);
}
xpathRule.setXpath(xpath);
xpathRule.setStrict("true".equals(strict));
rule.getXpathRules().add(xpathRule);
}
return rule;
} catch (Exception e) {
throw new CleanerException("Error obtaing cleaner rule " + ruleId, e);
}
}
public List<String> getRuleIds() throws CleanerException {
try {
final HashSet<String> response = new HashSet<String>();
final List<String> list = serviceLocator.getService(ISLookUpService.class).quickSearchProfile("//CLEANER_NAME");
if (list != null) {
response.addAll(list);
}
return Lists.newArrayList(response);
} catch (ISLookUpException e) {
throw new CleanerException("Error obtaining IDs of cleaner DSs", e);
}
}
public UniqueServiceLocator getServiceLocator() {
return serviceLocator;
}
@Required
public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
this.serviceLocator = serviceLocator;
}
}