2021-09-13 14:53:19 +02:00
|
|
|
{
|
|
|
|
"wf" : {
|
|
|
|
"threshold" : "0.99",
|
|
|
|
"dedupRun" : "001",
|
|
|
|
"entityType" : "datasource",
|
2022-04-26 11:30:40 +02:00
|
|
|
"orderField" : "officialname",
|
2021-09-13 14:53:19 +02:00
|
|
|
"queueMaxSize" : "2000",
|
|
|
|
"groupMaxSize" : "50",
|
|
|
|
"slidingWindowSize" : "200",
|
|
|
|
"idPath":"$.id",
|
|
|
|
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ],
|
|
|
|
"includeChildren" : "true",
|
|
|
|
"maxIterations": "20"
|
|
|
|
},
|
|
|
|
"pace" : {
|
|
|
|
"clustering" : [
|
2022-04-26 11:30:40 +02:00
|
|
|
{ "name" : "sortedngrampairs", "fields" : [ "officialname", "englishname" ], "params" : { "max" : 2, "ngramLen" : "3", "collapseOn:name": "0"} },
|
|
|
|
{ "name" : "suffixprefix", "fields" : [ "officialname", "englishname" ], "params" : { "max" : 1, "len" : "3", "collapseOn:name": "0" } },
|
2021-09-13 14:53:19 +02:00
|
|
|
{ "name" : "urlclustering", "fields" : [ "websiteurl" ], "params" : { } }
|
|
|
|
],
|
|
|
|
"decisionTree" : {
|
|
|
|
"start": {
|
|
|
|
"fields": [
|
|
|
|
{
|
|
|
|
"field": "websiteurl",
|
|
|
|
"comparator": "domainExactMatch",
|
|
|
|
"weight": 1,
|
|
|
|
"countIfUndefined": "false",
|
|
|
|
"params": {}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"threshold": 1,
|
|
|
|
"aggregation": "AVG",
|
|
|
|
"positive": "layer2",
|
|
|
|
"negative": "NO_MATCH",
|
|
|
|
"undefined": "layer2",
|
|
|
|
"ignoreUndefined": "true"
|
|
|
|
},
|
|
|
|
"layer2": {
|
|
|
|
"fields": [
|
|
|
|
{
|
2022-04-26 11:30:40 +02:00
|
|
|
"field": "officialname",
|
2021-10-22 11:21:09 +02:00
|
|
|
"comparator": "levensteinTitle",
|
2021-09-13 14:53:19 +02:00
|
|
|
"weight": 1.0,
|
|
|
|
"countIfUndefined": "true",
|
|
|
|
"params": {
|
2022-04-26 11:30:40 +02:00
|
|
|
"threshold": 0.9
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"field": "englishname",
|
|
|
|
"comparator": "levensteinTitle",
|
|
|
|
"weight": 1.0,
|
|
|
|
"countIfUndefined": "true",
|
|
|
|
"params": {
|
|
|
|
"threshold": 0.9
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"field": "officialname",
|
|
|
|
"comparator": "levensteinTitle",
|
|
|
|
"weight": 1.0,
|
|
|
|
"countIfUndefined": "true",
|
|
|
|
"params": {
|
|
|
|
"crossCompare": "englishname",
|
|
|
|
"threshold": 0.9
|
2021-09-13 14:53:19 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"threshold": 0.9,
|
2022-04-26 11:30:40 +02:00
|
|
|
"aggregation": "MAX",
|
2021-09-13 14:53:19 +02:00
|
|
|
"positive": "MATCH",
|
|
|
|
"negative": "NO_MATCH",
|
|
|
|
"undefined": "NO_MATCH",
|
|
|
|
"ignoreUndefined": "true"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"model" : [
|
2022-04-26 11:30:40 +02:00
|
|
|
{ "name" : "englishname", "type" : "String", "path" : "$.englishname" },
|
|
|
|
{ "name" : "officialname", "type" : "String", "path" : "$.officialname" },
|
2021-09-13 14:53:19 +02:00
|
|
|
{ "name" : "websiteurl", "type" : "URL", "path" : "$.websiteurl" }
|
|
|
|
],
|
|
|
|
"blacklists" : {
|
|
|
|
"legalname" : []
|
|
|
|
},
|
|
|
|
"synonyms": {}
|
|
|
|
}
|
|
|
|
}
|