dnet-dedup/dnet-pace-core/src/test/java/eu/dnetlib/pace/config/ConfigTest.java

166 lines
4.8 KiB
Java

package eu.dnetlib.pace.config;
import eu.dnetlib.pace.AbstractPaceTest;
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
import eu.dnetlib.pace.clustering.ClusteringClass;
import eu.dnetlib.pace.clustering.ClusteringCombiner;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.FieldList;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.tree.JsonListMatch;
import eu.dnetlib.pace.tree.support.AggType;
import eu.dnetlib.pace.tree.support.FieldConf;
import eu.dnetlib.pace.tree.support.TreeNodeDef;
import eu.dnetlib.pace.tree.support.TreeNodeStats;
import eu.dnetlib.pace.util.MapDocumentUtil;
import org.junit.jupiter.api.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class ConfigTest extends AbstractPaceTest {
private static Map<String, String> params;
@BeforeAll
public static void setup() {
params = new HashMap<>();
params.put("jpath_value", "$.value");
params.put("jpath_classid", "$.qualifier.classid");
}
@Test
public void dedupConfigSerializationTest() {
final DedupConfig cfgFromClasspath = DedupConfig.load(readFromClasspath("organization.current.conf.json"));
final String conf = cfgFromClasspath.toString();
final DedupConfig cfgFromSerialization = DedupConfig.load(conf);
assertEquals(cfgFromClasspath.toString(), cfgFromSerialization.toString());
assertNotNull(cfgFromClasspath);
assertNotNull(cfgFromSerialization);
}
@Test
public void dedupConfigTest() {
DedupConfig load = DedupConfig.load(readFromClasspath("organization.current.conf.json"));
System.out.println(load.toString());
}
@Test
public void initTranslationMapTest() {
DedupConfig load = DedupConfig.load(readFromClasspath("organization.current.conf.json"));
Map<String, String> translationMap = load.translationMap();
System.out.println("translationMap = " + translationMap.size());
for (String key: translationMap.keySet()) {
if (translationMap.get(key).equals("key::1"))
System.out.println("key = " + key);
}
}
@Test
public void emptyTranslationMapTest() {
DedupConfig load = DedupConfig.load(readFromClasspath("organization.no_synonyms.conf.json"));
assertEquals(0, load.getPace().translationMap().keySet().size());
}
@Test
public void asMapDocumentTest() {
DedupConfig dedupConf = DedupConfig.load(readFromClasspath("publication.current.conf.json"));
final String json = readFromClasspath("publication.json");
final MapDocument mapDocument = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, json);
// System.out.println("mapDocument = " + mapDocument.getFieldMap());
// JsonListMatch jsonListMatch = new JsonListMatch(params);
//
// jsonListMatch.compare(mapDocument.getFieldMap().get("pid"), mapDocument.getFieldMap().get("pid"), null);
System.out.println("mapDocument = " + mapDocument.getFieldMap().get("title").stringValue());
}
@Test
public void testJPath() {
final String json = readFromClasspath("organization.json");
final String jpath ="$.id";
System.out.println("result = " + MapDocumentUtil.getJPathString(jpath, json));
}
@Test
public void clusteringCombinerTest() {
DedupConfig dedupConf = DedupConfig.load(readFromClasspath("publication.current.conf.json"));
final String json = readFromClasspath("publication.json");
final MapDocument mapDocument = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, json);
String[] combine = ClusteringCombiner.combine(mapDocument, dedupConf).toArray(new String[3]);
assertEquals("test", combine[0].split(":")[1]);
assertEquals("title", combine[1].split(":")[1]);
assertEquals("doi", combine[2].split(":")[1]);
}
@Test
public void filterAndCombineTest() {
DedupConfig dedupConf = DedupConfig.load(readFromClasspath("pub.prod.conf.json"));
final String json = readFromClasspath("publication.example.json");
final MapDocument mapDocument = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, json);
Collection<String> strings = BlacklistAwareClusteringCombiner.filterAndCombine(mapDocument, dedupConf);
for (String s: strings) {
System.out.println("s = " + s);
}
}
@Test
public void crossCompareTest() {
DedupConfig dedupConf = DedupConfig.load(readFromClasspath("organization.cross.compare.conf.json"));
TreeNodeDef treeNode = dedupConf.decisionTree().get("start");
final String json = readFromClasspath("organization.json");
final MapDocument doc = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, json);
TreeNodeStats nodeStats = treeNode.evaluate(doc, doc, dedupConf);
assertTrue(nodeStats.getFinalScore(AggType.MAX)>0.7);
}
}