implementation of the test to see dedup results

optimized-clustering
miconis 3 years ago
parent fb314e3441
commit 2c1488b91f

@ -177,11 +177,19 @@ public class DedupLocalTest extends DedupTestUtils {
@Ignore
public void deduplicationTest() throws IOException {
//custom parameters for this test
DedupConfig dedupConfig = DedupConfig.load(readFileFromHDFS("/Users/miconis/IdeaProjects/DnetDedup/dnet-dedup/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/ds.tree.conf.json"));
String inputPath = "/Users/miconis/Desktop/Fairsharing dedup/datasources";
String workingPath = "/tmp/fairsharing_working_dir";
String simRelsPath = workingPath + "/simrels";
String mergeRelsPath = workingPath + "/mergerels";
String outputPath = workingPath + "/dedup";
long before_simrels = System.currentTimeMillis();
Deduper.createSimRels(
config,
dedupConfig,
spark,
entitiesPath,
inputPath,
simRelsPath,
true
);
@ -191,8 +199,8 @@ public class DedupLocalTest extends DedupTestUtils {
long before_mergerels = System.currentTimeMillis();
Deduper.createMergeRels(
config,
entitiesPath,
dedupConfig,
inputPath,
mergeRelsPath,
simRelsPath,
spark
@ -203,15 +211,15 @@ public class DedupLocalTest extends DedupTestUtils {
long before_dedupentity = System.currentTimeMillis();
Deduper.createDedupEntity(
config,
dedupConfig,
mergeRelsPath,
entitiesPath,
inputPath,
spark,
dedupEntityPath
outputPath
);
long dedupentity_time = System.currentTimeMillis() - before_dedupentity;
long dedupentity_number = context.textFile(dedupEntityPath).count();
long dedupentity_number = context.textFile(outputPath).count();
System.out.println("Number of simrels : " + simrels_number);
System.out.println("Number of mergerels : " + mergerels_number);

@ -40,7 +40,7 @@
"fields": [
{
"field": "name",
"comparator": "jaroWinkler",
"comparator": "levensteinTitle",
"weight": 1.0,
"countIfUndefined": "true",
"params": {

Loading…
Cancel
Save