2019-08-09 15:41:49 +02:00
package eu.dnetlib.pace.comparators ;
2018-10-02 10:37:54 +02:00
2019-08-06 12:09:34 +02:00
import eu.dnetlib.pace.clustering.NGramUtils ;
2019-11-20 10:45:00 +01:00
import eu.dnetlib.pace.tree.CityMatch ;
2019-11-21 09:37:56 +01:00
import eu.dnetlib.pace.tree.ContainsMatch ;
2019-08-09 15:41:49 +02:00
import eu.dnetlib.pace.tree.JaroWinklerNormalizedName ;
2019-10-08 14:53:52 +02:00
import eu.dnetlib.pace.config.DedupConfig ;
2019-11-21 09:37:56 +01:00
import eu.dnetlib.pace.tree.KeywordMatch ;
2018-10-02 10:37:54 +02:00
import org.junit.Before ;
2019-11-21 09:37:56 +01:00
import org.junit.Ignore ;
2018-10-02 10:37:54 +02:00
import org.junit.Test ;
import eu.dnetlib.pace.common.AbstractPaceFunctions ;
2018-11-05 17:22:59 +01:00
import java.util.HashMap ;
import java.util.Map ;
2018-11-16 16:11:03 +01:00
import static junit.framework.Assert.assertEquals ;
2019-03-21 14:27:27 +01:00
import static junit.framework.Assert.assertTrue ;
2018-11-16 16:11:03 +01:00
2019-11-07 12:47:12 +01:00
public class ComparatorTest extends AbstractPaceFunctions {
2018-10-02 10:37:54 +02:00
2019-11-21 09:37:56 +01:00
private Map < String , String > params ;
2019-10-08 14:53:52 +02:00
private DedupConfig conf ;
2018-10-02 10:37:54 +02:00
@Before
public void setup ( ) {
2018-11-05 17:22:59 +01:00
params = new HashMap < > ( ) ;
2019-11-21 09:37:56 +01:00
params . put ( " weight " , " 1.0 " ) ;
2019-11-07 12:47:12 +01:00
conf = DedupConfig . load ( readFromClasspath ( " /eu/dnetlib/pace/config/organization.current.conf " , ComparatorTest . class ) ) ;
2018-10-02 10:37:54 +02:00
}
2019-08-06 12:09:34 +02:00
@Test
public void testCleanForSorting ( ) {
NGramUtils utils = new NGramUtils ( ) ;
System . out . println ( " utils = " + utils . cleanupForOrdering ( " University of Pisa " ) ) ;
}
2018-11-05 17:22:59 +01:00
@Test
2019-11-21 09:37:56 +01:00
public void cityMatchTest ( ) {
final CityMatch cityMatch = new CityMatch ( params ) ;
2019-07-08 09:44:02 +02:00
2019-11-21 09:37:56 +01:00
//both names with no cities
assertEquals ( 1 . 0 , cityMatch . distance ( " Università " , " Centro di ricerca " , conf ) ) ;
2019-07-08 09:44:02 +02:00
2019-11-21 09:37:56 +01:00
//one of the two names with no cities
assertEquals ( - 1 . 0 , cityMatch . distance ( " Università di Bologna " , " Centro di ricerca " , conf ) ) ;
2019-07-08 09:44:02 +02:00
2019-11-21 09:37:56 +01:00
//both names with cities (same)
assertEquals ( 1 . 0 , cityMatch . distance ( " Universita di Bologna " , " Biblioteca di Bologna " , conf ) ) ;
2019-07-19 17:10:29 +02:00
2019-11-21 09:37:56 +01:00
//both names with cities (different)
assertEquals ( 0 . 0 , cityMatch . distance ( " Universita di Bologna " , " Universita di Torino " , conf ) ) ;
2019-07-19 17:10:29 +02:00
2019-11-21 09:37:56 +01:00
//particular cases
assertEquals ( 1 . 0 , cityMatch . distance ( " Free University of Bozen-Bolzano " , " Università di Bolzano " , conf ) ) ;
assertEquals ( 1 . 0 , cityMatch . distance ( " Politechniki Warszawskiej (Warsaw University of Technology) " , " Warsaw University of Technology " , conf ) ) ;
2019-07-19 17:10:29 +02:00
}
2019-11-21 09:37:56 +01:00
// @Test
// public void testJaroWinklerNormalizedName6() {
//
// final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
// double result = jaroWinklerNormalizedName.distance("Fonds zur Förderung der wissenschaftlichen Forschung (Austrian Science Fund)", "Fonds zur Förderung der wissenschaftlichen Forschung", conf);
//
// System.out.println("result = " + result);
// assertTrue(result > 0.9);
//
// }
// @Test
// public void testJaroWinklerNormalizedName10(){
//
// final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params);
//
// double result = jaroWinklerNormalizedName.distance("Firenze University Press", "University of Florence", conf);
//
// System.out.println("result = " + result);
// }
2019-08-06 17:06:05 +02:00
@Test
2019-11-21 09:37:56 +01:00
public void keywordMatchTest ( ) {
params . put ( " threshold " , " 0.4 " ) ;
2019-08-06 17:06:05 +02:00
2019-11-21 09:37:56 +01:00
final KeywordMatch keywordMatch = new KeywordMatch ( params ) ;
2019-08-06 17:06:05 +02:00
2019-11-21 09:37:56 +01:00
assertEquals ( 1 . 0 , keywordMatch . distance ( " Biblioteca dell'Universita di Bologna " , " Università di Bologna " , conf ) ) ;
assertEquals ( 1 . 0 , keywordMatch . distance ( " Universita degli studi di Pisa " , " Universita di Pisa " , conf ) ) ;
assertEquals ( 1 . 0 , keywordMatch . distance ( " Polytechnic University of Turin " , " POLITECNICO DI TORINO " , conf ) ) ;
assertEquals ( 1 . 0 , keywordMatch . distance ( " Istanbul Commerce University " , " İstanbul Ticarət Universiteti " , conf ) ) ;
2019-08-06 17:06:05 +02:00
}
2019-11-20 10:45:00 +01:00
@Test
2019-11-21 09:37:56 +01:00
public void containsMatchTest ( ) {
2019-11-20 10:45:00 +01:00
2019-11-21 09:37:56 +01:00
params . put ( " string " , " openorgs " ) ;
params . put ( " bool " , " XOR " ) ;
params . put ( " caseSensitive " , " false " ) ;
2019-11-20 10:45:00 +01:00
2019-11-21 09:37:56 +01:00
final ContainsMatch containsMatch = new ContainsMatch ( params ) ;
2019-11-20 10:45:00 +01:00
2019-11-21 09:37:56 +01:00
assertEquals ( 0 . 0 , containsMatch . distance ( " openorgs " , " openorgs " , conf ) ) ;
2019-11-20 10:45:00 +01:00
}
2018-10-02 10:37:54 +02:00
}