implementation of the test to see dedup results
This commit is contained in:
parent
fb314e3441
commit
2c1488b91f
|
@ -177,11 +177,19 @@ public class DedupLocalTest extends DedupTestUtils {
|
||||||
@Ignore
|
@Ignore
|
||||||
public void deduplicationTest() throws IOException {
|
public void deduplicationTest() throws IOException {
|
||||||
|
|
||||||
|
//custom parameters for this test
|
||||||
|
DedupConfig dedupConfig = DedupConfig.load(readFileFromHDFS("/Users/miconis/IdeaProjects/DnetDedup/dnet-dedup/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/config/ds.tree.conf.json"));
|
||||||
|
String inputPath = "/Users/miconis/Desktop/Fairsharing dedup/datasources";
|
||||||
|
String workingPath = "/tmp/fairsharing_working_dir";
|
||||||
|
String simRelsPath = workingPath + "/simrels";
|
||||||
|
String mergeRelsPath = workingPath + "/mergerels";
|
||||||
|
String outputPath = workingPath + "/dedup";
|
||||||
|
|
||||||
long before_simrels = System.currentTimeMillis();
|
long before_simrels = System.currentTimeMillis();
|
||||||
Deduper.createSimRels(
|
Deduper.createSimRels(
|
||||||
config,
|
dedupConfig,
|
||||||
spark,
|
spark,
|
||||||
entitiesPath,
|
inputPath,
|
||||||
simRelsPath,
|
simRelsPath,
|
||||||
true
|
true
|
||||||
);
|
);
|
||||||
|
@ -191,8 +199,8 @@ public class DedupLocalTest extends DedupTestUtils {
|
||||||
|
|
||||||
long before_mergerels = System.currentTimeMillis();
|
long before_mergerels = System.currentTimeMillis();
|
||||||
Deduper.createMergeRels(
|
Deduper.createMergeRels(
|
||||||
config,
|
dedupConfig,
|
||||||
entitiesPath,
|
inputPath,
|
||||||
mergeRelsPath,
|
mergeRelsPath,
|
||||||
simRelsPath,
|
simRelsPath,
|
||||||
spark
|
spark
|
||||||
|
@ -203,15 +211,15 @@ public class DedupLocalTest extends DedupTestUtils {
|
||||||
|
|
||||||
long before_dedupentity = System.currentTimeMillis();
|
long before_dedupentity = System.currentTimeMillis();
|
||||||
Deduper.createDedupEntity(
|
Deduper.createDedupEntity(
|
||||||
config,
|
dedupConfig,
|
||||||
mergeRelsPath,
|
mergeRelsPath,
|
||||||
entitiesPath,
|
inputPath,
|
||||||
spark,
|
spark,
|
||||||
dedupEntityPath
|
outputPath
|
||||||
);
|
);
|
||||||
long dedupentity_time = System.currentTimeMillis() - before_dedupentity;
|
long dedupentity_time = System.currentTimeMillis() - before_dedupentity;
|
||||||
|
|
||||||
long dedupentity_number = context.textFile(dedupEntityPath).count();
|
long dedupentity_number = context.textFile(outputPath).count();
|
||||||
|
|
||||||
System.out.println("Number of simrels : " + simrels_number);
|
System.out.println("Number of simrels : " + simrels_number);
|
||||||
System.out.println("Number of mergerels : " + mergerels_number);
|
System.out.println("Number of mergerels : " + mergerels_number);
|
||||||
|
|
|
@ -40,7 +40,7 @@
|
||||||
"fields": [
|
"fields": [
|
||||||
{
|
{
|
||||||
"field": "name",
|
"field": "name",
|
||||||
"comparator": "jaroWinkler",
|
"comparator": "levensteinTitle",
|
||||||
"weight": 1.0,
|
"weight": 1.0,
|
||||||
"countIfUndefined": "true",
|
"countIfUndefined": "true",
|
||||||
"params": {
|
"params": {
|
||||||
|
|
Loading…
Reference in New Issue