implementation of the test to see dedup results

This commit is contained in:
miconis 2021-10-22 11:21:09 +02:00
parent fb314e3441
commit 2c1488b91f
2 changed files with 17 additions and 9 deletions

View File

@ -177,11 +177,19 @@ public class DedupLocalTest extends DedupTestUtils {
@Ignore @Ignore
public void deduplicationTest() throws IOException { public void deduplicationTest() throws IOException {
//custom parameters for this test
DedupConfig dedupConfig = DedupConfig.load(readFileFromHDFS("/Users/miconis/IdeaProjects/DnetDedup/dnet-dedup/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/ds.tree.conf.json"));
String inputPath = "/Users/miconis/Desktop/Fairsharing dedup/datasources";
String workingPath = "/tmp/fairsharing_working_dir";
String simRelsPath = workingPath + "/simrels";
String mergeRelsPath = workingPath + "/mergerels";
String outputPath = workingPath + "/dedup";
long before_simrels = System.currentTimeMillis(); long before_simrels = System.currentTimeMillis();
Deduper.createSimRels( Deduper.createSimRels(
config, dedupConfig,
spark, spark,
entitiesPath, inputPath,
simRelsPath, simRelsPath,
true true
); );
@ -191,8 +199,8 @@ public class DedupLocalTest extends DedupTestUtils {
long before_mergerels = System.currentTimeMillis(); long before_mergerels = System.currentTimeMillis();
Deduper.createMergeRels( Deduper.createMergeRels(
config, dedupConfig,
entitiesPath, inputPath,
mergeRelsPath, mergeRelsPath,
simRelsPath, simRelsPath,
spark spark
@ -203,15 +211,15 @@ public class DedupLocalTest extends DedupTestUtils {
long before_dedupentity = System.currentTimeMillis(); long before_dedupentity = System.currentTimeMillis();
Deduper.createDedupEntity( Deduper.createDedupEntity(
config, dedupConfig,
mergeRelsPath, mergeRelsPath,
entitiesPath, inputPath,
spark, spark,
dedupEntityPath outputPath
); );
long dedupentity_time = System.currentTimeMillis() - before_dedupentity; long dedupentity_time = System.currentTimeMillis() - before_dedupentity;
long dedupentity_number = context.textFile(dedupEntityPath).count(); long dedupentity_number = context.textFile(outputPath).count();
System.out.println("Number of simrels : " + simrels_number); System.out.println("Number of simrels : " + simrels_number);
System.out.println("Number of mergerels : " + mergerels_number); System.out.println("Number of mergerels : " + mergerels_number);

View File

@ -40,7 +40,7 @@
"fields": [ "fields": [
{ {
"field": "name", "field": "name",
"comparator": "jaroWinkler", "comparator": "levensteinTitle",
"weight": 1.0, "weight": 1.0,
"countIfUndefined": "true", "countIfUndefined": "true",
"params": { "params": {