forked from D-Net/dnet-hadoop
implementation of the test classes and minor changes
This commit is contained in:
parent
7a8d28991f
commit
0735f3a822
|
@ -74,7 +74,6 @@
|
|||
<artifactId>commons-math3</artifactId>
|
||||
</dependency>
|
||||
|
||||
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
|
|
|
@ -23,8 +23,11 @@ public class JaroWinklerNormalizedName extends SecondStringDistanceAlgo {
|
|||
|
||||
private static Map<String,String> cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv");
|
||||
|
||||
private Map<String, Number> params;
|
||||
|
||||
public JaroWinklerNormalizedName(Map<String, Number> params){
|
||||
super(params, new com.wcohen.ss.JaroWinkler());
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
public JaroWinklerNormalizedName(double weight) {
|
||||
|
@ -52,9 +55,8 @@ public class JaroWinklerNormalizedName extends SecondStringDistanceAlgo {
|
|||
// ca = norm.split("\\|\\|\\|")[0].trim();
|
||||
// cb = norm.split("\\|\\|\\|")[1].trim();
|
||||
|
||||
ca = normalizeCities2(ca, cityMap, 4);
|
||||
cb = normalizeCities2(cb, cityMap, 4);
|
||||
|
||||
ca = normalizeCities2(ca, cityMap, params.getOrDefault("windowSize", 4).intValue());
|
||||
cb = normalizeCities2(cb, cityMap, params.getOrDefault("windowSize", 4).intValue());
|
||||
|
||||
if (sameCity(ca,cb)){
|
||||
if (sameKeywords(ca,cb)){
|
||||
|
|
|
@ -17,9 +17,6 @@ public class UndefinedNode implements Comparator {
|
|||
final List<String> sa = ((FieldList) a).stringList();
|
||||
final List<String> sb = ((FieldList) b).stringList();
|
||||
|
||||
System.out.println("sa = " + sa.size());
|
||||
System.out.println("sb = " + sb.size());
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +1,21 @@
|
|||
package eu.dnetlib.pace.tree.support;
|
||||
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
|
||||
public enum AggType {
|
||||
|
||||
AVG,
|
||||
SUM,
|
||||
MAX,
|
||||
MIN
|
||||
MIN;
|
||||
|
||||
public static AggType getEnum(String value) {
|
||||
|
||||
try {
|
||||
return AggType.valueOf(value);
|
||||
}
|
||||
catch (IllegalArgumentException e) {
|
||||
throw new PaceException("Undefined aggregation type", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -76,7 +76,7 @@ public class BlockProcessor {
|
|||
}
|
||||
}
|
||||
|
||||
private MatchType navigateTree(final MapDocument doc1, final MapDocument doc2){
|
||||
public MatchType navigateTree(final MapDocument doc1, final MapDocument doc2){
|
||||
|
||||
final Map<String, TreeNodeDef> decisionTree = dedupConf.getPace().getDecisionTree();
|
||||
|
||||
|
|
|
@ -1,14 +1,15 @@
|
|||
package eu.dnetlib.pace;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
import eu.dnetlib.pace.config.Type;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.model.FieldListImpl;
|
||||
import eu.dnetlib.pace.model.FieldValueImpl;
|
||||
import org.junit.Test;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public abstract class AbstractPaceTest {
|
||||
|
||||
|
@ -34,4 +35,14 @@ public abstract class AbstractPaceTest {
|
|||
return new FieldValueImpl(Type.URL, "url", s);
|
||||
}
|
||||
|
||||
protected Field createFieldList(List<String> strings, String fieldName){
|
||||
|
||||
List<FieldValueImpl> fieldValueStream = strings.stream().map(s -> new FieldValueImpl(Type.String, fieldName, s)).collect(Collectors.toList());
|
||||
|
||||
FieldListImpl a = new FieldListImpl();
|
||||
a.addAll(fieldValueStream);
|
||||
|
||||
return a;
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,144 @@
|
|||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import eu.dnetlib.pace.AbstractPaceTest;
|
||||
import eu.dnetlib.pace.config.Type;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.model.FieldListImpl;
|
||||
import eu.dnetlib.pace.model.FieldValueImpl;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
import static junit.framework.Assert.assertTrue;
|
||||
|
||||
//test class for comparators (to be used into the tree nodes)
|
||||
public class ComparatorTest extends AbstractPaceTest {
|
||||
|
||||
private Map<String, Number> params;
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
params = new HashMap<>();
|
||||
//to put all the needed parameters
|
||||
params.put("minCoauthors", 5);
|
||||
params.put("maxCoauthors", 200);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCoauthorsMatch() {
|
||||
|
||||
final CoauthorsMatch coauthorsMatch = new CoauthorsMatch(params);
|
||||
|
||||
Field a = createFieldList(Arrays.asList("la bruzzo, sandro", "atzori, claudio", "artini, michele", "de bonis, michele", "bardi, alessia", "dell'amico, andrea", "baglioni, miriam"), "coauthors");
|
||||
Field b = createFieldList(Arrays.asList("la bruzzo, sandro"), "coauthors");
|
||||
|
||||
double result1 = coauthorsMatch.compare(a, b);
|
||||
double result2 = coauthorsMatch.compare(a, a);
|
||||
|
||||
System.out.println("a = " + a);
|
||||
System.out.println("b = " + b);
|
||||
|
||||
System.out.println("a vs b = " + result1);
|
||||
System.out.println("a vs a = " + result2);
|
||||
|
||||
assertEquals(result1, -1.0);
|
||||
assertEquals(result2, 7.0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExactMatch() {
|
||||
|
||||
final ExactMatch exactMatch = new ExactMatch(params);
|
||||
|
||||
Field a = new FieldValueImpl(Type.String, "doi", "10.1000/0000000000");
|
||||
Field b = new FieldValueImpl(Type.String, "doi", "10.1033/0000000000");
|
||||
Field c = new FieldValueImpl(Type.String, "doi", "");
|
||||
|
||||
double result1 = exactMatch.compare(a,a);
|
||||
double result2 = exactMatch.compare(a,b);
|
||||
double result3 = exactMatch.compare(a,c);
|
||||
|
||||
System.out.println("a = " + a);
|
||||
System.out.println("b = " + b);
|
||||
System.out.println("c = " + c);
|
||||
|
||||
System.out.println("a vs a = " + result1);
|
||||
System.out.println("a vs b = " + result2);
|
||||
System.out.println("a vs c = " + result3);
|
||||
|
||||
assertEquals(result1, 1.0);
|
||||
assertEquals(result2, 0.0);
|
||||
assertEquals(result3, -1.0);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimilarMatch() {
|
||||
|
||||
final SimilarMatch similarMatch = new SimilarMatch(params);
|
||||
|
||||
Field a = new FieldValueImpl(Type.String, "firstname", "sandro");
|
||||
Field b = new FieldValueImpl(Type.String, "firstname", "s.");
|
||||
Field c = new FieldValueImpl(Type.String, "firstname", "stefano");
|
||||
|
||||
double result1 = similarMatch.compare(a,b);
|
||||
double result2 = similarMatch.compare(a,c);
|
||||
double result3 = similarMatch.compare(b,c);
|
||||
|
||||
System.out.println("a = " + a);
|
||||
System.out.println("b = " + b);
|
||||
System.out.println("c = " + c);
|
||||
|
||||
System.out.println("a vs b = " + result1);
|
||||
System.out.println("a vs c = " + result2);
|
||||
System.out.println("b vs c = " + result3);
|
||||
|
||||
assertEquals(result1, 1.0);
|
||||
assertEquals(result3, 1.0);
|
||||
assertTrue(result2<0.7);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTopicsMatch() {
|
||||
|
||||
final TopicsMatch topicsMatch = new TopicsMatch(params);
|
||||
|
||||
Field a = createFieldList(Arrays.asList("0.0", "1.0", "0.0"), "topics");
|
||||
Field b = createFieldList(Arrays.asList("0.0", "0.0", "1.0"), "topics");
|
||||
Field c = createFieldList(Arrays.asList("0.5", "0.5", "0.0"), "topics");
|
||||
|
||||
double result1 = topicsMatch.compare(a,a);
|
||||
double result2 = topicsMatch.compare(a,c);
|
||||
double result3 = topicsMatch.compare(b,c);
|
||||
|
||||
System.out.println("a = " + a);
|
||||
System.out.println("b = " + b);
|
||||
System.out.println("c = " + c);
|
||||
|
||||
System.out.println("a vs a = " + result1);
|
||||
System.out.println("a vs c = " + result2);
|
||||
System.out.println("b vs c = " + result3);
|
||||
|
||||
assertEquals(result1, 1.0);
|
||||
assertEquals(result2, 0.5);
|
||||
assertEquals(result3, 0.0);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUndefinedNode() {
|
||||
|
||||
final UndefinedNode undefinedNode = new UndefinedNode();
|
||||
double result = undefinedNode.compare(new FieldListImpl(),new FieldListImpl());
|
||||
|
||||
assertEquals(result, 0.0);
|
||||
}
|
||||
|
||||
|
||||
}
|
Loading…
Reference in New Issue