87 lines
2.8 KiB
Java
87 lines
2.8 KiB
Java
package eu.dnetlib.data.utility.cleaner;
|
|
|
|
import java.io.StringReader;
|
|
import java.util.HashSet;
|
|
import java.util.List;
|
|
import java.util.Set;
|
|
|
|
import org.dom4j.Document;
|
|
import org.dom4j.Element;
|
|
import org.dom4j.io.SAXReader;
|
|
import org.springframework.beans.factory.annotation.Required;
|
|
|
|
import com.google.common.base.Splitter;
|
|
import com.google.common.collect.Lists;
|
|
import com.google.common.collect.Sets;
|
|
|
|
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
|
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
|
|
|
|
public class CleaningRuleFactory {
|
|
|
|
private UniqueServiceLocator serviceLocator;
|
|
|
|
public CleaningRule obtainCleaningRule(final String ruleId) throws CleanerException {
|
|
try {
|
|
final String prof = serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(
|
|
"/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='" + ruleId + "' or .//CLEANER_NAME='" + ruleId + "']//CONFIGURATION");
|
|
|
|
final SAXReader reader = new SAXReader();
|
|
final Document doc = reader.read(new StringReader(prof));
|
|
|
|
final CleaningRule rule = new CleaningRule();
|
|
|
|
final ISLookUpService lookup = serviceLocator.getService(ISLookUpService.class);
|
|
|
|
for (Object o : doc.selectNodes("//RULE")) {
|
|
final Element node = (Element) o;
|
|
|
|
final String xpath = node.valueOf("@xpath");
|
|
final String vocabularies = node.valueOf("@vocabularies");
|
|
final String groovyRule = node.valueOf("@groovy");
|
|
final String strict = node.valueOf("@strict");
|
|
|
|
final XPATHCleaningRule xpathRule;
|
|
if (vocabularies != null && vocabularies.length() > 0) {
|
|
final Set<String> list = Sets.newHashSet(Splitter.on(",").omitEmptyStrings().trimResults().split(vocabularies));
|
|
xpathRule = new VocabularyRule(list, lookup);
|
|
} else {
|
|
xpathRule = new GroovyRule(groovyRule);
|
|
}
|
|
xpathRule.setXpath(xpath);
|
|
xpathRule.setStrict("true".equals(strict));
|
|
rule.getXpathRules().add(xpathRule);
|
|
}
|
|
return rule;
|
|
} catch (Exception e) {
|
|
throw new CleanerException("Error obtaing cleaner rule " + ruleId, e);
|
|
}
|
|
}
|
|
|
|
public List<String> getRuleIds() throws CleanerException {
|
|
try {
|
|
final HashSet<String> response = new HashSet<String>();
|
|
|
|
final List<String> list = serviceLocator.getService(ISLookUpService.class).quickSearchProfile("//CLEANER_NAME");
|
|
if (list != null) {
|
|
response.addAll(list);
|
|
}
|
|
|
|
return Lists.newArrayList(response);
|
|
} catch (ISLookUpException e) {
|
|
throw new CleanerException("Error obtaining IDs of cleaner DSs", e);
|
|
}
|
|
}
|
|
|
|
public UniqueServiceLocator getServiceLocator() {
|
|
return serviceLocator;
|
|
}
|
|
|
|
@Required
|
|
public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
|
|
this.serviceLocator = serviceLocator;
|
|
}
|
|
}
|