imported cnr-data-utility-cleaner-rmi and cnr-data-utility-cleaner-service in dnet-data-services

This commit is contained in:
Claudio Atzori 2019-06-13 17:58:17 +02:00
parent 22d126ffa8
commit 337b42e945
19 changed files with 1090 additions and 0 deletions

View File

@ -28,6 +28,11 @@
<artifactId>commons-beanutils</artifactId>
</dependency>
<dependency>
<groupId>org.codehaus.groovy</groupId>
<artifactId>groovy</artifactId>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>

View File

@ -0,0 +1,45 @@
package eu.dnetlib.data.utility.cleaner;
import javax.xml.ws.wsaddressing.W3CEndpointReference;
import org.springframework.beans.factory.annotation.Required;
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
import eu.dnetlib.data.utility.cleaner.rmi.CleanerService;
import eu.dnetlib.enabling.resultset.MappedResultSetFactory;
import eu.dnetlib.enabling.tools.AbstractBaseService;
public class CleanerServiceImpl extends AbstractBaseService implements CleanerService {
private CleaningRuleFactory cleaningRuleFactory;
private MappedResultSetFactory mappedResultSetFactory;
@Override
public W3CEndpointReference clean(final W3CEndpointReference epr, final String ruleId) throws CleanerException {
if ((ruleId == null) || (ruleId.isEmpty())) { throw new CleanerException("Invalid ruleId: id is empty"); }
if (epr == null) { throw new CleanerException("Passed epr is empty"); }
return mappedResultSetFactory.createMappedResultSet(epr, cleaningRuleFactory.obtainCleaningRule(ruleId));
}
@Required
public MappedResultSetFactory getMappedResultSetFactory() {
return mappedResultSetFactory;
}
@Required
public void setMappedResultSetFactory(final MappedResultSetFactory mappedResultSetFactory) {
this.mappedResultSetFactory = mappedResultSetFactory;
}
public CleaningRuleFactory getCleaningRuleFactory() {
return cleaningRuleFactory;
}
@Required
public void setCleaningRuleFactory(final CleaningRuleFactory cleaningRuleFactory) {
this.cleaningRuleFactory = cleaningRuleFactory;
}
}

View File

@ -0,0 +1,67 @@
package eu.dnetlib.data.utility.cleaner;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Namespace;
import org.dom4j.QName;
import org.dom4j.io.SAXReader;
import org.springframework.beans.factory.annotation.Required;
import eu.dnetlib.miscutils.functional.UnaryFunction;
public class CleaningRule implements UnaryFunction<String, String> {
private static final Log log = LogFactory.getLog(CleaningRule.class); // NOPMD by marko on 11/24/08 5:02 PM
private List<XPATHCleaningRule> xpathRules = new ArrayList<XPATHCleaningRule>();
@Override
public String evaluate(final String text) {
try {
final List<Map<String, String>> errors = new ArrayList<Map<String, String>>();
final Document doc = (new SAXReader()).read(new StringReader(text));
for (final XPATHCleaningRule r : xpathRules) {
errors.addAll(r.applyXpathRule(doc));
}
if (errors.size() > 0) {
markAsInvalid(doc, errors);
}
return doc.asXML();
} catch (final Exception e) {
log.error("Error evaluating rule", e);
}
return "";
}
private void markAsInvalid(final Document doc, final List<Map<String, String>> errors) {
final Element element = (Element) doc.selectSingleNode("//*[local-name()='header']");
if (element != null) {
final Element inv = element.addElement(new QName("invalid", new Namespace("dri", "http://www.driver-repository.eu/namespace/dri")));
for (final Map<String, String> e : errors) {
final Element err = inv.addElement(new QName("error", new Namespace("dri", "http://www.driver-repository.eu/namespace/dri")));
for (final Map.Entry<String, String> entry : e.entrySet()) {
err.addAttribute(entry.getKey(), entry.getValue());
}
}
inv.addAttribute("value", "true");
}
}
public List<XPATHCleaningRule> getXpathRules() {
return xpathRules;
}
@Required
public void setXpathRules(final List<XPATHCleaningRule> xpathRules) {
this.xpathRules = xpathRules;
}
}

View File

@ -0,0 +1,86 @@
package eu.dnetlib.data.utility.cleaner;
import java.io.StringReader;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.springframework.beans.factory.annotation.Required;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
public class CleaningRuleFactory {
private UniqueServiceLocator serviceLocator;
public CleaningRule obtainCleaningRule(final String ruleId) throws CleanerException {
try {
final String prof = serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(
"/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='" + ruleId + "' or .//CLEANER_NAME='" + ruleId + "']//CONFIGURATION");
final SAXReader reader = new SAXReader();
final Document doc = reader.read(new StringReader(prof));
final CleaningRule rule = new CleaningRule();
final ISLookUpService lookup = serviceLocator.getService(ISLookUpService.class);
for (Object o : doc.selectNodes("//RULE")) {
final Element node = (Element) o;
final String xpath = node.valueOf("@xpath");
final String vocabularies = node.valueOf("@vocabularies");
final String groovyRule = node.valueOf("@groovy");
final String strict = node.valueOf("@strict");
final XPATHCleaningRule xpathRule;
if (vocabularies != null && vocabularies.length() > 0) {
final Set<String> list = Sets.newHashSet(Splitter.on(",").omitEmptyStrings().trimResults().split(vocabularies));
xpathRule = new VocabularyRule(list, lookup);
} else {
xpathRule = new GroovyRule(groovyRule);
}
xpathRule.setXpath(xpath);
xpathRule.setStrict("true".equals(strict));
rule.getXpathRules().add(xpathRule);
}
return rule;
} catch (Exception e) {
throw new CleanerException("Error obtaing cleaner rule " + ruleId, e);
}
}
public List<String> getRuleIds() throws CleanerException {
try {
final HashSet<String> response = new HashSet<String>();
final List<String> list = serviceLocator.getService(ISLookUpService.class).quickSearchProfile("//CLEANER_NAME");
if (list != null) {
response.addAll(list);
}
return Lists.newArrayList(response);
} catch (ISLookUpException e) {
throw new CleanerException("Error obtaining IDs of cleaner DSs", e);
}
}
public UniqueServiceLocator getServiceLocator() {
return serviceLocator;
}
@Required
public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
this.serviceLocator = serviceLocator;
}
}

View File

@ -0,0 +1,56 @@
package eu.dnetlib.data.utility.cleaner;
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
import groovy.lang.Closure;
import groovy.lang.GroovyShell;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* @author michele
*
* Groovy rules must be declared in a CleanerDS profile, some examples:
*
* <RULE xpath="..." groovy="(input =~ /X/).replaceAll('Y')" /> <RULE xpath="..." groovy="'CONSTANT'" /> <RULE xpath="..."
* groovy="input.toUpperCase()" />
*/
public class GroovyRule extends XPATHCleaningRule {
private static final Log log = LogFactory.getLog(GroovyRule.class); // NOPMD by marko on 11/24/08 5:02 PM
private String groovyRule;
private Closure<String> closure;
private GroovyShell groovyShell = new GroovyShell();
@SuppressWarnings("unchecked")
public GroovyRule(final String groovyRule) {
this.groovyRule = groovyRule;
this.closure = (Closure<String>) groovyShell.evaluate("{ input -> " + groovyRule + "}");
}
@Override
protected String calculateNewValue(final String oldValue) throws CleanerException {
try {
log.info("Executing groovy closure on value " + oldValue);
return closure.call(oldValue);
} catch (Exception e) {
log.error("Failed Groovy execution, groovyRule: " + groovyRule + ", input: " + oldValue, e);
throw new CleanerException("Error executing groovy", e);
}
}
@Override
protected Map<String, String> verifyValue(final String value) throws CleanerException {
return null;
}
@Override
public String toString() {
return "GROOVY: " + groovyRule;
}
}

View File

@ -0,0 +1,113 @@
package eu.dnetlib.data.utility.cleaner;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.google.common.base.Joiner;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
/**
* @author michele
*
* Vocabulary rules must be declared in a CleanerDS profile, for each vocabulary must be present the relative VocabularyDS profile:
*
* <RULE xpath="..." vocabularies="VOC1" /> <RULE xpath="..." vocabularies="VOC1, VOC2, VOC3" />
*/
public class VocabularyRule extends XPATHCleaningRule {
private Set<String> vocabularies;
private static final Log log = LogFactory.getLog(VocabularyRule.class); // NOPMD by marko on 11/24/08 5:02 PM
private Map<String, String> synonyms = Maps.newHashMap();
private Set<String> validTerms = Sets.newHashSet();
public VocabularyRule(final Set<String> vocabularies, final ISLookUpService lookup) throws CleanerException {
this.vocabularies = vocabularies;
loadSynonymsAndTerms(lookup);
}
@Override
protected String calculateNewValue(final String oldValue) throws CleanerException {
log.debug("calculating new value for: " + oldValue);
if (synonyms.isEmpty()) {
log.warn("Vocabulary terms is void, vocabularies: " + this.vocabularies);
}
String newValue = null;
if (synonyms.containsKey(oldValue.toLowerCase())) {
newValue = synonyms.get(oldValue.toLowerCase());
}
if (newValue == null) {
log.debug("Synonym " + oldValue + " not found in vocabulary");
return oldValue;
}
return newValue;
}
private void loadSynonymsAndTerms(final ISLookUpService lookup) throws CleanerException {
for (final String vocabulary : vocabularies) {
try {
final String query = "for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')"
+ "//RESOURCE_PROFILE[.//VOCABULARY_NAME/@code='" + vocabulary + "']//TERM return "
+ "( concat($x/@code,'|-:-|', $x/@code), concat($x/@english_name,'|-:-|', $x/@code), concat($x/@native_name,'|-:-|', $x/@code), "
+ "for $y in $x//SYNONYM return concat($y/@term,'|-:-|', $x/@code) )";
for (final String s : lookup.quickSearchProfile(query)) {
log.debug("SYNONYM : " + s);
final String[] arr = s.split("\\|-:-\\|");
if (arr[0] == null || arr[0].isEmpty()) {
continue;
}
synonyms.put(arr[0].toLowerCase(), arr[1]);
validTerms.add(arr[1].toLowerCase());
}
log.info("VOCABULARY " + vocabulary.trim() + " - terms size " + synonyms.size());
} catch (final Exception e) {
throw new CleanerException("Error obtaining vocabulary " + vocabulary, e);
}
}
}
@Override
protected Map<String, String> verifyValue(final String value) throws CleanerException {
if (synonyms.isEmpty()) {
log.warn("Vocabulary terms is void, vocabularies: " + this.vocabularies);
}
if (validTerms.contains(value.toLowerCase())) { return null; }
final Map<String, String> error = new HashMap<String, String>();
error.put("term", value);
error.put("vocabularies", this.vocabularies.toString().replaceAll("\\[", "").replaceAll("\\]", ""));
error.put("xpath", this.getXpath());
return error;
}
public Map<String, String> getVocabularyTerms() {
return synonyms;
}
@Override
public String toString() {
return "VOCABULARIES: [" + Joiner.on(", ").join(vocabularies) + "]";
}
}

View File

@ -0,0 +1,77 @@
package eu.dnetlib.data.utility.cleaner;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.Node;
import org.springframework.beans.factory.annotation.Required;
import com.google.common.collect.Lists;
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
public abstract class XPATHCleaningRule {
private String xpath;
private boolean strict = false;
private static final Log logCleaningRules = LogFactory.getLog("VOCABULARY_RULES");
public List<Map<String, String>> applyXpathRule(final Document doc) throws CleanerException {
final List<Map<String, String>> errors = Lists.newArrayList();
final String id = doc.valueOf("//*[local-name()='objIdentifier']");
for (Object o : doc.selectNodes(xpath)) {
final Node node = (Node) o;
final String oldValue = node.getText().trim();
final String newValue = calculateNewValue(oldValue);
if (strict) {
final Map<String, String> err = verifyValue(newValue);
if (err != null) {
errors.add(err);
if (logCleaningRules.isInfoEnabled()) {
logCleaningRules.info("[" + newValue + "] is INVALID, " + "RULE: " + toString() + ", " + "RECORD: " + id + ", " + "XPATH: "
+ this.getXpath());
}
}
}
if (logCleaningRules.isInfoEnabled() && !newValue.equals(oldValue)) {
logCleaningRules.info("[" + oldValue + "] => [" + newValue + "], " + toString() + ", " + "RECORD: " + id + ", " + "XPATH: " + this.getXpath());
}
node.setText(newValue);
}
return errors;
}
protected abstract Map<String, String> verifyValue(final String value) throws CleanerException;
protected abstract String calculateNewValue(final String oldValue) throws CleanerException;
public String getXpath() {
return xpath;
}
@Required
public void setXpath(final String xpath) {
this.xpath = xpath;
}
public boolean isStrict() {
return strict;
}
public void setStrict(final boolean strict) {
this.strict = strict;
}
}

View File

@ -0,0 +1,91 @@
package eu.dnetlib.data.utility.cleaner.inspector;
import java.util.List;
import javax.annotation.Resource;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import com.google.common.collect.Lists;
import eu.dnetlib.data.utility.cleaner.CleaningRule;
import eu.dnetlib.data.utility.cleaner.CleaningRuleFactory;
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
import eu.dnetlib.enabling.inspector.AbstractInspectorController;
import eu.dnetlib.miscutils.collections.MappedCollection;
import eu.dnetlib.miscutils.functional.UnaryFunction;
@Controller
public class CleanerInspector extends AbstractInspectorController {
@Resource
private CleaningRuleFactory cleaningRuleFactory;
public static class SelectOption {
private String value;
private boolean selected;
public SelectOption(final String value, final boolean selected) {
super();
this.value = value;
this.selected = selected;
}
public String getValue() {
return value;
}
public void setValue(final String value) {
this.value = value;
}
public boolean isSelected() {
return selected;
}
public void setSelected(final boolean selected) {
this.selected = selected;
}
}
@RequestMapping(value = "/inspector/cleaner.do")
public void cleaner(final Model model,
@RequestParam(value = "rule", required = false) final String ruleId,
@RequestParam(value = "dirtyText", required = false) final String dirtyText) throws CleanerException {
List<String> rules = Lists.newArrayList(cleaningRuleFactory.getRuleIds());
model.addAttribute("rules", selectOptions(rules, ruleId));
if ((ruleId != null) && (dirtyText != null)) {
CleaningRule rule = cleaningRuleFactory.obtainCleaningRule(ruleId);
model.addAttribute("dirtyText", dirtyText);
model.addAttribute("cleanedText", rule.evaluate(dirtyText));
}
}
/**
* Given an list of values, return a list of SelectOption instances which have the "selected" boolean field set to true only for the
* element matching "current".
*
* @param input
* list of input strings
* @param current
* current value to select
* @return
*/
private List<SelectOption> selectOptions(final List<String> input, final String current) {
final UnaryFunction<SelectOption, String> mapper = new UnaryFunction<SelectOption, String>() {
@Override
public SelectOption evaluate(final String value) {
return new SelectOption(value, value.equals(current));
}
};
return Lists.newArrayList(new MappedCollection<SelectOption, String>(input, mapper));
}
}

View File

@ -0,0 +1,24 @@
package eu.dnetlib.data.utility.cleaner.rmi;
import eu.dnetlib.common.rmi.RMIException;
public class CleanerException extends RMIException {
/**
*
*/
private static final long serialVersionUID = -7889315488590536918L;
public CleanerException(final Throwable e) {
super(e);
}
public CleanerException(final String message, final Throwable e) {
super(message, e);
}
public CleanerException(final String message) {
super(message);
}
}

View File

@ -0,0 +1,25 @@
package eu.dnetlib.data.utility.cleaner.rmi;
import javax.jws.WebService;
import javax.xml.ws.wsaddressing.W3CEndpointReference;
import eu.dnetlib.common.rmi.BaseService;
/**
* @author michele
*
*/
@WebService(targetNamespace = "http://services.dnetlib.eu/")
public interface CleanerService extends BaseService {
/**
* @param epr
* an epr of a resultset with dirty records
* @param ruleId
* the identifier of a rule
* @return an epr of a resultset with clean records
* @throws CleanerException
*/
W3CEndpointReference clean(final W3CEndpointReference epr, final String ruleId) throws CleanerException;
}

View File

@ -0,0 +1 @@
service.cleaner.mapped.resultset.factory=mappedResultSetFactory

View File

@ -0,0 +1,36 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:jaxws="http://cxf.apache.org/jaxws"
xmlns:sec="http://cxf.apache.org/configuration/security" xmlns:wsa="http://cxf.apache.org/ws/addressing"
xmlns:p="http://www.springframework.org/schema/p" xmlns:http="http://cxf.apache.org/transports/http/configuration"
xmlns:t="http://dnetlib.eu/springbeans/t" xmlns:template="http://dnetlib.eu/springbeans/template"
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
http://cxf.apache.org/ws/addressing http://cxf.apache.org/schemas/ws-addr-conf.xsd
http://cxf.apache.org/configuration/security http://cxf.apache.org/schemas/configuration/security.xsd
http://cxf.apache.org/transports/http/configuration http://cxf.apache.org/schemas/configuration/http-conf.xsd
http://cxf.apache.org/jaxws http://cxf.apache.org/schemas/jaxws.xsd
http://dnetlib.eu/springbeans/template http://dnetlib.eu/springbeans/template.xsd">
<!-- beans -->
<bean id="cleanerService"
class="eu.dnetlib.data.utility.cleaner.CleanerServiceImpl"
init-method="start" destroy-method="stop" p:cleaningRuleFactory-ref="cleaningRuleFactory"
p:mappedResultSetFactory-ref="${service.cleaner.mapped.resultset.factory}" />
<bean id="cleaningRuleFactory"
class="eu.dnetlib.data.utility.cleaner.CleaningRuleFactory"
p:serviceLocator-ref="uniqueServiceLocator">
</bean>
<!-- endpoints -->
<jaxws:endpoint id="cleanerServiceEndpoint" implementor="#cleanerService"
implementorClass="eu.dnetlib.data.utility.cleaner.rmi.CleanerService"
address="/cleaner" />
<template:instance name="serviceRegistrationManager"
t:serviceRegistrationManagerClass="eu.dnetlib.enabling.tools.registration.ValidatingServiceRegistrationManagerImpl"
t:name="cleanerServiceRegistrationManager" t:service="cleanerService"
t:endpoint="cleanerServiceEndpoint" t:jobScheduler="jobScheduler"
t:serviceRegistrator="blackboardServiceRegistrator" />
</beans>

View File

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:p="http://www.springframework.org/schema/p"
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd">
<bean id="cleanerInspectorGroup"
class="eu.dnetlib.enabling.inspector.StaticEntryPointDescriptorGroup"
p:name="cleaner">
<property name="descriptors">
<list>
<bean class="eu.dnetlib.enabling.inspector.StaticEntryPointDescriptor"
p:name="cleaner" p:relativeUrl="cleaner.do"
p:hiddenAsDefault="true"/>
</list>
</property>
</bean>
</beans>

View File

@ -0,0 +1,36 @@
$inspector/master(it={
<style type="text/css">
#results {
width: 100%;
}
#results td:first-child {
width: 2em;
}
#results td {
border: 1px solid #cecece;
}
</style>
<h2>Browse indices</h2>
<form method="POST">
Cleaner rules:
<select name="rule">
$rules:{<option $if(it.selected)$selected$endif$>$it.value$</option>}$
</select><br /><br />
Dirty Record:<br />
<textarea name="dirtyText" cols="80" rows="10">$dirtyText$</textarea>
<br /><br />
<input type="submit" value="submit"/>
</form>
<br />
Cleaned Record:<br />
<textarea readonly="readonly" cols="80" rows="10">$cleanedText$</textarea>
})$

View File

@ -0,0 +1,67 @@
package eu.dnetlib.data.utility.cleaner;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import javax.xml.ws.wsaddressing.W3CEndpointReference;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.runners.MockitoJUnit44Runner;
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
import eu.dnetlib.enabling.resultset.MappedResultSetFactory;
import eu.dnetlib.test.utils.EPRTestUtil;
@RunWith(MockitoJUnit44Runner.class)
public class CleanerServiceImplTest {
/**
* Class under test.
*/
private CleanerServiceImpl service;
@Mock
private CleaningRuleFactory cleaningRuleFactory;
@Mock
private MappedResultSetFactory mappedResultSetFactory;
@Mock
private CleaningRule cleaningRule;
private W3CEndpointReference epr_IN = EPRTestUtil.getTestEpr("http://1");
private W3CEndpointReference epr_OUT = EPRTestUtil.getTestEpr("http://2");
private static final String RULE_ID = "RULE_01";
@Before
public void setUp() throws Exception {
when(cleaningRuleFactory.obtainCleaningRule(RULE_ID)).thenReturn(cleaningRule);
when(mappedResultSetFactory.createMappedResultSet(epr_IN, cleaningRule)).thenReturn(epr_OUT);
service = new CleanerServiceImpl();
service.setCleaningRuleFactory(cleaningRuleFactory);
service.setMappedResultSetFactory(mappedResultSetFactory);
}
@Test
public void testClean() throws CleanerException {
W3CEndpointReference epr = service.clean(epr_IN, RULE_ID);
assertNotNull(epr);
assertEquals(epr_OUT, epr);
verify(cleaningRuleFactory).obtainCleaningRule(RULE_ID);
}
@Test(expected = CleanerException.class)
public void testClean_null_1() throws CleanerException {
service.clean(epr_IN, null);
}
@Test(expected = CleanerException.class)
public void testClean_null_2() throws CleanerException {
service.clean(null, RULE_ID);
}
}

View File

@ -0,0 +1,135 @@
package eu.dnetlib.data.utility.cleaner;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.StringReader;
import java.util.List;
import org.dom4j.Document;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.runners.MockitoJUnit44Runner;
@RunWith(MockitoJUnit44Runner.class)
public class GroovyRuleTest {
@Test
public void testApplyXpathRule_simple_constant() throws Exception {
final GroovyRule rule = new GroovyRule("'YYYY'");
final String xpath = "/a/b";
rule.setXpath(xpath);
final Document doc = execute(rule, "<a><b>XXXX</b></a>");
assertEquals("YYYY", doc.valueOf(xpath));
}
@Test
public void testApplyXpathRule_simple_regex() throws Exception {
final GroovyRule rule = new GroovyRule("(input =~ /X/).replaceAll('Y')");
final String xpath = "/a/b";
rule.setXpath(xpath);
final Document doc = execute(rule, "<a><b>aXaXa</b></a>");
assertEquals("aYaYa", doc.valueOf(xpath));
}
@Test
public void testApplyXpathRule_simple_upper() throws Exception {
final GroovyRule rule = new GroovyRule("input.toUpperCase()");
final String xpath = "/a/b";
rule.setXpath(xpath);
final Document doc = execute(rule, "<a><b>xyz</b></a>");
assertEquals("XYZ", doc.valueOf(xpath));
}
@Test
public void testApplyXpathRule_multi() throws Exception {
final GroovyRule rule = new GroovyRule("'Y'");
final String xpath = "/a/b";
rule.setXpath(xpath);
final Document doc = execute(rule, "<a><b>X</b><b>X</b><b>X</b></a>");
List<?> list = doc.selectNodes(xpath);
assertEquals(3, list.size());
for (Object o : list) {
assertEquals("Y", ((Node) o).getText());
}
}
@Test
public void testApplyXpathRule_singleAttr() throws Exception {
final GroovyRule rule = new GroovyRule("'BBBB'");
final String xpath = "/a/b/@value";
rule.setXpath(xpath);
final Document doc = execute(rule, "<a><b value='AAAA'>XXXX</b></a>");
assertEquals("BBBB", doc.valueOf(xpath));
assertEquals("XXXX", doc.valueOf("/a/b"));
}
@Test
public void testApplyXpathRule_multiAttr() throws Exception {
final GroovyRule rule = new GroovyRule("'B'");
final String xpath = "/a/b/@value";
rule.setXpath(xpath);
final Document doc = execute(rule, "<a><b value='a' /><b value='b' /><b value='c' /></a>");
final List<?> list = doc.selectNodes(xpath);
assertEquals(3, list.size());
for (Object o : list) {
assertEquals("B", ((Node) o).getText());
}
}
@Test
public void testApplyXpathRule_complex() throws Exception {
final GroovyRule rule = new GroovyRule("'B'");
final String xpath = "/a/b";
rule.setXpath(xpath);
final Document doc = execute(rule, "<a><b>X<c>C</c></b></a>");
assertTrue(doc.valueOf(xpath).contains("B"));
assertEquals("C", doc.valueOf("/a/b/c"));
}
private Document execute(final GroovyRule rule, final String xml) throws Exception {
final SAXReader reader = new SAXReader();
final Document doc = reader.read(new StringReader(xml));
System.out.println("BEFORE:\n" + doc.asXML() + "\n");
rule.applyXpathRule(doc);
System.out.println("AFTER:\n" + doc.asXML() + "\n");
System.out.println("-----------------------------\n");
return doc;
}
}

View File

@ -0,0 +1,128 @@
package eu.dnetlib.data.utility.cleaner;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import java.io.StringReader;
import java.util.List;
import org.dom4j.Document;
import org.dom4j.io.SAXReader;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.runners.MockitoJUnit44Runner;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@RunWith(MockitoJUnit44Runner.class)
public class VocabularyRuleTest {
private static final String VOCABULARY_NAME_1 = "TEST VOCABULARY 1";
private static final String VOCABULARY_NAME_2 = "TEST VOCABULARY 2";
private static final List<String> VOCABULARY = Lists.newArrayList("XXXX|-:-|AAAA", "YYYY|-:-|AAAA", "ZZZZ|-:-|AAAA");
/**
* Class Under Test
*/
private VocabularyRule rule;
@Mock
private ISLookUpService lookup;
@Before
public void setUp() throws Exception {
when(lookup.quickSearchProfile(anyString())).thenReturn(VOCABULARY);
rule = new VocabularyRule(Sets.newHashSet(VOCABULARY_NAME_1, VOCABULARY_NAME_2), lookup);
}
@Test
public void testSetup() throws Exception {
final String xpath = "/a/b";
rule.setXpath(xpath);
execute("<a><b>XXXX</b></a>");
verify(lookup, times(2)).quickSearchProfile(anyString());
assertEquals(VOCABULARY.size(), rule.getVocabularyTerms().size());
}
@Test
public void testApplyXpathRule() throws Exception {
final String xpath = "/a/b";
rule.setXpath(xpath);
final Document doc = execute("<a><b>XXXX</b></a>");
assertEquals("AAAA", doc.valueOf(xpath));
assertNull(rule.verifyValue("AAAA"));
assertNotNull(rule.verifyValue("XXXX"));
}
@Test
public void testApplyXpathRule_2() throws Exception {
final String xpath = "/a/b";
rule.setXpath(xpath);
final Document doc = execute("<a><b>XXXX</b></a>");
assertEquals("AAAA", doc.valueOf(xpath));
assertNull(rule.verifyValue("AAAA"));
assertNotNull(rule.verifyValue("XXXX"));
}
@Test
public void testApplyXpathRule_3() throws Exception {
final String xpath = "/a/b";
rule.setXpath(xpath);
final Document doc = execute("<a><b>XXXX</b></a>");
assertEquals("AAAA", doc.valueOf(xpath));
}
@Test
public void testApplyXpathRule_attr() throws Exception {
final String xpath = "/a/b/@value";
rule.setXpath(xpath);
final Document doc = execute("<a><b value='XXXX' /></a>");
assertEquals("AAAA", doc.valueOf(xpath));
assertNull(rule.verifyValue("AAAA"));
assertNotNull(rule.verifyValue("XXXX"));
}
@Test
public void testApplyXpathRule_with_spaces() throws Exception {
final String xpath = "/a/b";
rule.setXpath(xpath);
final Document doc = execute("<a><b> XXXX </b></a>");
assertEquals("AAAA", doc.valueOf(xpath));
assertNull(rule.verifyValue("AAAA"));
assertNotNull(rule.verifyValue(" XXXX "));
}
@Test
public void testApplyXpathRule_case() throws Exception {
final String xpath = "/a/b";
rule.setXpath(xpath);
final Document doc = execute("<a><b>Xxxx</b></a>");
assertEquals("AAAA", doc.valueOf(xpath));
assertNull(rule.verifyValue("AAAA"));
assertNotNull(rule.verifyValue("Xxxx"));
}
private Document execute(final String xml) throws Exception {
final SAXReader reader = new SAXReader();
final Document doc = reader.read(new StringReader(xml));
System.out.println("BEFORE:\n" + doc.asXML() + "\n");
rule.applyXpathRule(doc);
System.out.println("AFTER:\n" + doc.asXML() + "\n");
System.out.println("-----------------------------\n");
return doc;
}
}

View File

@ -0,0 +1,72 @@
package eu.dnetlib.data.utility.cleaner;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.util.HashMap;
import java.util.Map;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.runners.MockitoJUnit44Runner;
import com.google.common.collect.Lists;
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
@RunWith(MockitoJUnit44Runner.class)
public class XMLCleaningRuleTest {
private static final String INPUT_VALID = "<record>" + " <header />" + " <metadata>" + " <a>HELLO</a>" + " </metadata>" + "</record>";
private static final String INPUT_INVALID = "<record>" + " <header />" + " <metadata>" + " <a>GOOD BYE</a>" + " </metadata>" + "</record>";
/**
* Class under test.
*/
private CleaningRule xmlRule;
private XPATHCleaningRule mockXpathRule = new XPATHCleaningRule() {
@Override
protected Map<String, String> verifyValue(final String value) throws CleanerException {
if (value.equals("CIAO")) { return null; }
Map<String, String> err = new HashMap<String, String>();
err.put("term", value);
return err;
}
@Override
protected String calculateNewValue(final String oldValue) throws CleanerException {
if (oldValue.equals("HELLO")) { return "CIAO"; }
return oldValue;
}
};;
@Before
public void setUp() throws Exception {
xmlRule = new CleaningRule();
mockXpathRule.setStrict(true);
mockXpathRule.setXpath("//a");
xmlRule.setXpathRules(Lists.newArrayList(mockXpathRule));
}
@Test
public void testEvaluate_valid() {
String s = xmlRule.evaluate(INPUT_VALID);
assertTrue(s.contains("CIAO"));
assertFalse(s.contains("invalid"));
}
@Test
public void testEvaluate_invalid() {
String s = xmlRule.evaluate(INPUT_INVALID);
System.out.println(s);
assertFalse(s.contains("CIAO"));
assertTrue(s.contains("invalid"));
}
}

View File

@ -155,6 +155,13 @@
</dependency>
<dependency>
<groupId>org.codehaus.groovy</groupId>
<artifactId>groovy</artifactId>
<version>2.1.6</version>
</dependency>
<dependency>
<groupId>com.jcraft</groupId>
<artifactId>jsch</artifactId>