78 lines
2.0 KiB
Java
78 lines
2.0 KiB
Java
package eu.dnetlib.data.utility.cleaner;
|
|
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
import org.dom4j.Document;
|
|
import org.dom4j.Node;
|
|
import org.springframework.beans.factory.annotation.Required;
|
|
|
|
import com.google.common.collect.Lists;
|
|
|
|
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
|
|
|
|
public abstract class XPATHCleaningRule {
|
|
|
|
private String xpath;
|
|
private boolean strict = false;
|
|
|
|
private static final Log logCleaningRules = LogFactory.getLog("VOCABULARY_RULES");
|
|
|
|
public List<Map<String, String>> applyXpathRule(final Document doc) throws CleanerException {
|
|
final List<Map<String, String>> errors = Lists.newArrayList();
|
|
|
|
final String id = doc.valueOf("//*[local-name()='objIdentifier']");
|
|
|
|
for (Object o : doc.selectNodes(xpath)) {
|
|
final Node node = (Node) o;
|
|
|
|
final String oldValue = node.getText().trim();
|
|
|
|
final String newValue = calculateNewValue(oldValue);
|
|
if (strict) {
|
|
final Map<String, String> err = verifyValue(newValue);
|
|
if (err != null) {
|
|
errors.add(err);
|
|
|
|
if (logCleaningRules.isInfoEnabled()) {
|
|
logCleaningRules.info("[" + newValue + "] is INVALID, " + "RULE: " + toString() + ", " + "RECORD: " + id + ", " + "XPATH: "
|
|
+ this.getXpath());
|
|
}
|
|
}
|
|
}
|
|
|
|
if (logCleaningRules.isInfoEnabled() && !newValue.equals(oldValue)) {
|
|
logCleaningRules.info("[" + oldValue + "] => [" + newValue + "], " + toString() + ", " + "RECORD: " + id + ", " + "XPATH: " + this.getXpath());
|
|
}
|
|
|
|
node.setText(newValue);
|
|
}
|
|
|
|
return errors;
|
|
}
|
|
|
|
protected abstract Map<String, String> verifyValue(final String value) throws CleanerException;
|
|
|
|
protected abstract String calculateNewValue(final String oldValue) throws CleanerException;
|
|
|
|
public String getXpath() {
|
|
return xpath;
|
|
}
|
|
|
|
@Required
|
|
public void setXpath(final String xpath) {
|
|
this.xpath = xpath;
|
|
}
|
|
|
|
public boolean isStrict() {
|
|
return strict;
|
|
}
|
|
|
|
public void setStrict(final boolean strict) {
|
|
this.strict = strict;
|
|
}
|
|
|
|
}
|