Master branch updates from beta September 2023 #337

Manually merged
claudio.atzori merged 1271 commits from beta into master 2023-09-06 11:31:09 +02:00
7 changed files with 180 additions and 15 deletions
Showing only changes of commit 0735f3a822 - Show all commits

View File

@ -74,7 +74,6 @@
<artifactId>commons-math3</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -23,8 +23,11 @@ public class JaroWinklerNormalizedName extends SecondStringDistanceAlgo {
private static Map<String,String> cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv");
private Map<String, Number> params;
public JaroWinklerNormalizedName(Map<String, Number> params){
super(params, new com.wcohen.ss.JaroWinkler());
this.params = params;
}
public JaroWinklerNormalizedName(double weight) {
@ -52,9 +55,8 @@ public class JaroWinklerNormalizedName extends SecondStringDistanceAlgo {
// ca = norm.split("\\|\\|\\|")[0].trim();
// cb = norm.split("\\|\\|\\|")[1].trim();
ca = normalizeCities2(ca, cityMap, 4);
cb = normalizeCities2(cb, cityMap, 4);
ca = normalizeCities2(ca, cityMap, params.getOrDefault("windowSize", 4).intValue());
cb = normalizeCities2(cb, cityMap, params.getOrDefault("windowSize", 4).intValue());
if (sameCity(ca,cb)){
if (sameKeywords(ca,cb)){

View File

@ -17,9 +17,6 @@ public class UndefinedNode implements Comparator {
final List<String> sa = ((FieldList) a).stringList();
final List<String> sb = ((FieldList) b).stringList();
System.out.println("sa = " + sa.size());
System.out.println("sb = " + sb.size());
return 0;
}
}

View File

@ -1,9 +1,21 @@
package eu.dnetlib.pace.tree.support;
import eu.dnetlib.pace.util.PaceException;
public enum AggType {
AVG,
SUM,
MAX,
MIN
MIN;
public static AggType getEnum(String value) {
try {
return AggType.valueOf(value);
}
catch (IllegalArgumentException e) {
throw new PaceException("Undefined aggregation type", e);
}
}
}

View File

@ -76,7 +76,7 @@ public class BlockProcessor {
}
}
private MatchType navigateTree(final MapDocument doc1, final MapDocument doc2){
public MatchType navigateTree(final MapDocument doc1, final MapDocument doc2){
final Map<String, TreeNodeDef> decisionTree = dedupConf.getPace().getDecisionTree();

View File

@ -1,14 +1,15 @@
package eu.dnetlib.pace;
import java.io.IOException;
import java.io.StringWriter;
import org.apache.commons.io.IOUtils;
import eu.dnetlib.pace.config.Type;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.FieldListImpl;
import eu.dnetlib.pace.model.FieldValueImpl;
import org.junit.Test;
import org.apache.commons.io.IOUtils;
import java.io.IOException;
import java.io.StringWriter;
import java.util.List;
import java.util.stream.Collectors;
public abstract class AbstractPaceTest {
@ -34,4 +35,14 @@ public abstract class AbstractPaceTest {
return new FieldValueImpl(Type.URL, "url", s);
}
protected Field createFieldList(List<String> strings, String fieldName){
List<FieldValueImpl> fieldValueStream = strings.stream().map(s -> new FieldValueImpl(Type.String, fieldName, s)).collect(Collectors.toList());
FieldListImpl a = new FieldListImpl();
a.addAll(fieldValueStream);
return a;
}
}

View File

@ -0,0 +1,144 @@
package eu.dnetlib.pace.tree;
import eu.dnetlib.pace.AbstractPaceTest;
import eu.dnetlib.pace.config.Type;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.FieldListImpl;
import eu.dnetlib.pace.model.FieldValueImpl;
import org.junit.Before;
import org.junit.Test;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertTrue;
//test class for comparators (to be used into the tree nodes)
public class ComparatorTest extends AbstractPaceTest {
private Map<String, Number> params;
@Before
public void setup() {
params = new HashMap<>();
//to put all the needed parameters
params.put("minCoauthors", 5);
params.put("maxCoauthors", 200);
}
@Test
public void testCoauthorsMatch() {
final CoauthorsMatch coauthorsMatch = new CoauthorsMatch(params);
Field a = createFieldList(Arrays.asList("la bruzzo, sandro", "atzori, claudio", "artini, michele", "de bonis, michele", "bardi, alessia", "dell'amico, andrea", "baglioni, miriam"), "coauthors");
Field b = createFieldList(Arrays.asList("la bruzzo, sandro"), "coauthors");
double result1 = coauthorsMatch.compare(a, b);
double result2 = coauthorsMatch.compare(a, a);
System.out.println("a = " + a);
System.out.println("b = " + b);
System.out.println("a vs b = " + result1);
System.out.println("a vs a = " + result2);
assertEquals(result1, -1.0);
assertEquals(result2, 7.0);
}
@Test
public void testExactMatch() {
final ExactMatch exactMatch = new ExactMatch(params);
Field a = new FieldValueImpl(Type.String, "doi", "10.1000/0000000000");
Field b = new FieldValueImpl(Type.String, "doi", "10.1033/0000000000");
Field c = new FieldValueImpl(Type.String, "doi", "");
double result1 = exactMatch.compare(a,a);
double result2 = exactMatch.compare(a,b);
double result3 = exactMatch.compare(a,c);
System.out.println("a = " + a);
System.out.println("b = " + b);
System.out.println("c = " + c);
System.out.println("a vs a = " + result1);
System.out.println("a vs b = " + result2);
System.out.println("a vs c = " + result3);
assertEquals(result1, 1.0);
assertEquals(result2, 0.0);
assertEquals(result3, -1.0);
}
@Test
public void testSimilarMatch() {
final SimilarMatch similarMatch = new SimilarMatch(params);
Field a = new FieldValueImpl(Type.String, "firstname", "sandro");
Field b = new FieldValueImpl(Type.String, "firstname", "s.");
Field c = new FieldValueImpl(Type.String, "firstname", "stefano");
double result1 = similarMatch.compare(a,b);
double result2 = similarMatch.compare(a,c);
double result3 = similarMatch.compare(b,c);
System.out.println("a = " + a);
System.out.println("b = " + b);
System.out.println("c = " + c);
System.out.println("a vs b = " + result1);
System.out.println("a vs c = " + result2);
System.out.println("b vs c = " + result3);
assertEquals(result1, 1.0);
assertEquals(result3, 1.0);
assertTrue(result2<0.7);
}
@Test
public void testTopicsMatch() {
final TopicsMatch topicsMatch = new TopicsMatch(params);
Field a = createFieldList(Arrays.asList("0.0", "1.0", "0.0"), "topics");
Field b = createFieldList(Arrays.asList("0.0", "0.0", "1.0"), "topics");
Field c = createFieldList(Arrays.asList("0.5", "0.5", "0.0"), "topics");
double result1 = topicsMatch.compare(a,a);
double result2 = topicsMatch.compare(a,c);
double result3 = topicsMatch.compare(b,c);
System.out.println("a = " + a);
System.out.println("b = " + b);
System.out.println("c = " + c);
System.out.println("a vs a = " + result1);
System.out.println("a vs c = " + result2);
System.out.println("b vs c = " + result3);
assertEquals(result1, 1.0);
assertEquals(result2, 0.5);
assertEquals(result3, 0.0);
}
@Test
public void testUndefinedNode() {
final UndefinedNode undefinedNode = new UndefinedNode();
double result = undefinedNode.compare(new FieldListImpl(),new FieldListImpl());
assertEquals(result, 0.0);
}
}