From 4502b4433761a59510bbd96c4b245f445d645ebe Mon Sep 17 00:00:00 2001 From: miconis Date: Tue, 6 Aug 2019 12:09:34 +0200 Subject: [PATCH] addition of the BlockUtils class for meta-blocking, implementation of a new local test with edge filtering example --- .../resources/eu/dnetlib/pace/config/translation_map.csv | 2 +- .../eu/dnetlib/pace/clustering/ClusteringFunctionTest.java | 2 +- .../java/eu/dnetlib/pace/distance/DistanceAlgoTest.java | 7 +++++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/translation_map.csv b/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/translation_map.csv index bab68360d..4aad426f0 100644 --- a/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/translation_map.csv +++ b/dnet-pace-core/src/main/resources/eu/dnetlib/pace/config/translation_map.csv @@ -58,7 +58,7 @@ key::57;social;sociali;social;social;Sozial;sociaal;maatschappelijk;κοινων key::58;environmental;ambiente;medioambiental;ambiente;medioambiente;meioambiente;Umwelt;milieu;milieuwetenschap;milieukunde;περιβαλλοντικός;περιβαλλοντική;περιβαλλοντικό;περιβαλλοντικά;çevre;környezeti;okoliški;keskonna;; key::59;business;economia;economiche;economica;negocio;empresa;negócio;Unternehmen;bedrijf;bedrijfskunde;επιχείρηση;iş;üzleti;posel;ettevõte/äri; key::60;pharmaceuticals;pharmacy;farmacia;farmaceutica;farmacéutica;farmacia;farmacêutica;farmácia;Pharmazeutika;Arzneimittelkunde;farmaceutica;geneesmiddelen;apotheek;φαρμακευτικός;φαρμακευτική;φαρμακευτικό;φαρμακευτικά;φαρμακείο;ilaç;eczane;gyógyszerészeti;gyógyszertár;farmacevtika;lekarništvo;farmaatsia;farmatseutiline; -key::61;healthcare;salute;atenciónmédica;cuidadodelasalud;cuidadoscomasaúde;Gesundheitswesen;gezondheidszorg;ιατροφαρμακευτικήπερίθαλψη;sağlıkhizmeti;egészségügy;zdravstvo;tervishoid;tervishoiu; +key::61;healthcare;health services;salute;atenciónmédica;cuidadodelasalud;cuidadoscomasaúde;Gesundheitswesen;gezondheidszorg;ιατροφαρμακευτικήπερίθαλψη;sağlıkhizmeti;egészségügy;zdravstvo;tervishoid;tervishoiu; key::62;history;storia;historia;história;Geschichte;geschiedenis;geschiedkunde;ιστορία;tarih;történelem;zgodovina;ajalugu; key::63;materials;materiali;materia;materiales;materiais;materialen;υλικά;τεκμήρια;malzemeler;anyagok;materiali;materjalid;vahendid; key::64;economics;economia;economiche;economica;economía;economia;Wirtschaft;economie;οικονομικά;οικονομικέςεπιστήμες;ekonomi;közgazdaságtan;gospodarstvo;ekonomija;majanduslik;majandus; diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java index 58f86d01d..265f3973f 100644 --- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java +++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java @@ -46,7 +46,7 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testNgramPairs() { params.put("ngramLen", 3); - params.put("max", 3); + params.put("max", 1); final ClusteringFunction np = new NgramPairs(params); diff --git a/dnet-pace-core/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java b/dnet-pace-core/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java index 11ae4183a..1cce9a65b 100644 --- a/dnet-pace-core/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java +++ b/dnet-pace-core/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java @@ -1,5 +1,6 @@ package eu.dnetlib.pace.distance; +import eu.dnetlib.pace.clustering.NGramUtils; import eu.dnetlib.pace.distance.algo.JaroWinklerNormalizedName; import org.junit.Before; import org.junit.Test; @@ -25,6 +26,12 @@ public class DistanceAlgoTest extends AbstractPaceFunctions { params.put("weight", 1.0); } + @Test + public void testCleanForSorting() { + NGramUtils utils = new NGramUtils(); + System.out.println("utils = " + utils.cleanupForOrdering("University of Pisa")); + } + @Test public void testGetNumbers() { System.out.println("Numbers : " + getNumbers(TEST_STRING));