{ "wf" : { "threshold" : "0.99", "dedupRun" : "001", "entityType" : "datasource", "orderField" : "englishname", "queueMaxSize" : "2000", "groupMaxSize" : "50", "slidingWindowSize" : "200", "idPath":"$.id", "rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ], "includeChildren" : "true", "maxIterations": "20" }, "pace" : { "clustering" : [ { "name" : "sortedngrampairs", "fields" : [ "officialname", "englishname" ], "params" : { "max" : 2, "ngramLen" : "3", "collapseOn:name": "0"} }, { "name" : "suffixprefix", "fields" : [ "officialname", "englishname" ], "params" : { "max" : 1, "len" : "3", "collapseOn:name": "0" } }, {"name" : "ngrams", "fields" : ["officialname", "englishname"], "params" : {"ngramLen": 4, "max" : 2, "maxPerToken": 2, "minNgramLen": 1, "collapseOn:name": "0"}}, { "name" : "urlclustering", "fields" : [ "websiteurl" ], "params" : { } } ], "decisionTree" : { "start": { "fields": [ { "field": "websiteurl", "comparator": "domainExactMatch", "weight": 1, "countIfUndefined": "false", "params": {} } ], "threshold": 1, "aggregation": "AVG", "positive": "layer2", "negative": "NO_MATCH", "undefined": "layer2", "ignoreUndefined": "true" }, "layer2": { "fields": [ { "field": "officialname", "comparator": "levensteinTitle", "weight": 1.0, "countIfUndefined": "true", "params": { "threshold": 0.9 } }, { "field": "englishname", "comparator": "levensteinTitle", "weight": 1.0, "countIfUndefined": "true", "params": { "threshold": 0.9 } }, { "field": "officialname", "comparator": "levensteinTitle", "weight": 1.0, "countIfUndefined": "true", "params": { "crossCompare": "englishname", "threshold": 0.9 } } ], "threshold": 0.9, "aggregation": "MAX", "positive": "MATCH", "negative": "NO_MATCH", "undefined": "NO_MATCH", "ignoreUndefined": "true" } }, "model" : [ { "name" : "englishname", "type" : "String", "path" : "$.englishname" }, { "name" : "officialname", "type" : "String", "path" : "$.officialname" }, { "name" : "websiteurl", "type" : "URL", "path" : "$.websiteurl" } ], "blacklists" : {}, "synonyms": {} } }