package eu.dnetlib.data.utility.cleaner; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dom4j.Document; import org.dom4j.Node; import org.springframework.beans.factory.annotation.Required; import com.google.common.collect.Lists; import eu.dnetlib.data.utility.cleaner.rmi.CleanerException; public abstract class XPATHCleaningRule { private String xpath; private boolean strict = false; private static final Log logCleaningRules = LogFactory.getLog("VOCABULARY_RULES"); public List> applyXpathRule(final Document doc) throws CleanerException { final List> errors = Lists.newArrayList(); final String id = doc.valueOf("//*[local-name()='objIdentifier']"); for (Object o : doc.selectNodes(xpath)) { final Node node = (Node) o; final String oldValue = node.getText().trim(); final String newValue = calculateNewValue(oldValue); if (strict) { final Map err = verifyValue(newValue); if (err != null) { errors.add(err); if (logCleaningRules.isInfoEnabled()) { logCleaningRules.info("[" + newValue + "] is INVALID, " + "RULE: " + toString() + ", " + "RECORD: " + id + ", " + "XPATH: " + this.getXpath()); } } } if (logCleaningRules.isInfoEnabled() && !newValue.equals(oldValue)) { logCleaningRules.info("[" + oldValue + "] => [" + newValue + "], " + toString() + ", " + "RECORD: " + id + ", " + "XPATH: " + this.getXpath()); } node.setText(newValue); } return errors; } protected abstract Map verifyValue(final String value) throws CleanerException; protected abstract String calculateNewValue(final String oldValue) throws CleanerException; public String getXpath() { return xpath; } @Required public void setXpath(final String xpath) { this.xpath = xpath; } public boolean isStrict() { return strict; } public void setStrict(final boolean strict) { this.strict = strict; } }