2019-08-09 15:41:49 +02:00
package eu.dnetlib.pace.comparators ;
2018-10-02 10:37:54 +02:00
2019-08-06 12:09:34 +02:00
import eu.dnetlib.pace.clustering.NGramUtils ;
2019-08-09 15:41:49 +02:00
import eu.dnetlib.pace.tree.JaroWinklerNormalizedName ;
2019-10-08 14:53:52 +02:00
import eu.dnetlib.pace.config.DedupConfig ;
2018-10-02 10:37:54 +02:00
import org.junit.Before ;
import org.junit.Test ;
import eu.dnetlib.pace.common.AbstractPaceFunctions ;
2018-11-05 17:22:59 +01:00
import java.util.HashMap ;
import java.util.Map ;
2018-11-16 16:11:03 +01:00
import static junit.framework.Assert.assertEquals ;
2019-03-21 14:27:27 +01:00
import static junit.framework.Assert.assertTrue ;
2018-11-16 16:11:03 +01:00
2019-11-07 12:47:12 +01:00
public class ComparatorTest extends AbstractPaceFunctions {
2018-10-02 10:37:54 +02:00
2018-11-05 17:22:59 +01:00
private Map < String , Number > params ;
2019-10-08 14:53:52 +02:00
private DedupConfig conf ;
2018-10-02 10:37:54 +02:00
@Before
public void setup ( ) {
2018-11-05 17:22:59 +01:00
params = new HashMap < > ( ) ;
params . put ( " weight " , 1 . 0 ) ;
2019-11-07 12:47:12 +01:00
conf = DedupConfig . load ( readFromClasspath ( " /eu/dnetlib/pace/config/organization.current.conf " , ComparatorTest . class ) ) ;
2018-10-02 10:37:54 +02:00
}
2019-08-06 12:09:34 +02:00
@Test
public void testCleanForSorting ( ) {
NGramUtils utils = new NGramUtils ( ) ;
System . out . println ( " utils = " + utils . cleanupForOrdering ( " University of Pisa " ) ) ;
}
2018-11-05 17:22:59 +01:00
@Test
public void testJaroWinklerNormalizedName ( ) {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName ( params ) ;
2019-10-08 14:53:52 +02:00
double result = jaroWinklerNormalizedName . distance ( " Free University of Bozen-Bolzano " , " University of the Free State " , conf ) ;
2018-11-16 16:11:03 +01:00
2019-06-18 14:05:31 +02:00
System . out . println ( " result = " + result ) ;
2019-07-19 17:10:29 +02:00
assertEquals ( 0 . 0 , result ) ;
2018-11-16 16:11:03 +01:00
}
@Test
public void testJaroWinklerNormalizedName2 ( ) {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName ( params ) ;
2019-10-08 14:53:52 +02:00
double result = jaroWinklerNormalizedName . distance ( " University of New York " , " Università di New York " , conf ) ;
2018-11-05 17:22:59 +01:00
2019-09-25 10:15:13 +02:00
assertEquals ( 1 . 0 , result ) ;
2018-11-05 17:22:59 +01:00
}
2019-03-21 14:27:27 +01:00
@Test
public void testJaroWinklerNormalizedName3 ( ) {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName ( params ) ;
2019-10-08 14:53:52 +02:00
double result = jaroWinklerNormalizedName . distance ( " Biblioteca dell'Universita di Bologna " , " Università di Bologna " , conf ) ;
2019-03-21 14:27:27 +01:00
System . out . println ( " result = " + result ) ;
2019-09-25 10:15:13 +02:00
assertEquals ( 0 . 0 , result ) ;
2019-03-21 14:27:27 +01:00
}
@Test
public void testJaroWinklerNormalizedName4 ( ) {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName ( params ) ;
2019-10-08 14:53:52 +02:00
double result = jaroWinklerNormalizedName . distance ( " Universita degli studi di Pisa " , " Universita di Pisa " , conf ) ;
2019-03-21 14:27:27 +01:00
System . out . println ( " result = " + result ) ;
2019-09-25 10:15:13 +02:00
assertEquals ( 1 . 0 , result ) ;
2019-03-21 14:27:27 +01:00
}
@Test
public void testJaroWinklerNormalizedName5 ( ) {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName ( params ) ;
2019-10-08 14:53:52 +02:00
double result = jaroWinklerNormalizedName . distance ( " RESEARCH PROMOTION FOUNDATION " , " IDRYMA PROOTHISIS EREVNAS " , conf ) ;
2019-03-21 14:27:27 +01:00
System . out . println ( " result = " + result ) ;
2019-09-25 10:15:13 +02:00
assertEquals ( 1 . 0 , result ) ;
2019-03-21 14:27:27 +01:00
}
@Test
public void testJaroWinklerNormalizedName6 ( ) {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName ( params ) ;
2019-10-08 14:53:52 +02:00
double result = jaroWinklerNormalizedName . distance ( " Fonds zur Förderung der wissenschaftlichen Forschung (Austrian Science Fund) " , " Fonds zur Förderung der wissenschaftlichen Forschung " , conf ) ;
2019-03-21 14:27:27 +01:00
System . out . println ( " result = " + result ) ;
2019-09-25 10:15:13 +02:00
assertTrue ( result > 0 . 9 ) ;
2019-03-21 14:27:27 +01:00
}
2018-12-20 09:54:41 +01:00
2019-07-08 09:44:02 +02:00
@Test
public void testJaroWinklerNormalizedName7 ( ) {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName ( params ) ;
2019-10-08 14:53:52 +02:00
double result = jaroWinklerNormalizedName . distance ( " Polytechnic University of Turin " , " POLITECNICO DI TORINO " , conf ) ;
2019-07-08 09:44:02 +02:00
System . out . println ( " result = " + result ) ;
2019-09-25 10:15:13 +02:00
assertTrue ( result > 0 . 9 ) ;
2019-07-08 09:44:02 +02:00
}
2019-07-19 17:10:29 +02:00
@Test
public void testJaroWinklerNormalizedName8 ( ) {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName ( params ) ;
2019-10-08 14:53:52 +02:00
double result = jaroWinklerNormalizedName . distance ( " Politechniki Warszawskiej (Warsaw University of Technology) " , " Warsaw University of Technology " , conf ) ;
2019-07-19 17:10:29 +02:00
System . out . println ( " result = " + result ) ;
}
2019-08-06 17:06:05 +02:00
@Test
public void testJaroWinklerNormalizedName9 ( ) {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName ( params ) ;
2019-10-08 14:53:52 +02:00
double result = jaroWinklerNormalizedName . distance ( " Istanbul Commerce University " , " İstanbul Ticarət Universiteti " , conf ) ;
2019-08-06 17:06:05 +02:00
System . out . println ( " result = " + result ) ;
}
@Test
public void testJaroWinklerNormalizedName10 ( ) {
final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName ( params ) ;
2019-10-08 14:53:52 +02:00
double result = jaroWinklerNormalizedName . distance ( " Firenze University Press " , " University of Florence " , conf ) ;
2019-08-06 17:06:05 +02:00
System . out . println ( " result = " + result ) ;
}
2018-10-02 10:37:54 +02:00
}