2018-12-12 16:30:03 +01:00
|
|
|
{
|
|
|
|
"wf" : {
|
|
|
|
"threshold" : "0.99",
|
|
|
|
"dedupRun" : "001",
|
|
|
|
"entityType" : "person",
|
|
|
|
"orderField" : "fullname",
|
|
|
|
"queueMaxSize" : "2000",
|
|
|
|
"groupMaxSize" : "10",
|
|
|
|
"slidingWindowSize" : "200",
|
|
|
|
"rootBuilder" : [ "person" ],
|
|
|
|
"includeChildren" : "true"
|
|
|
|
},
|
|
|
|
"pace": {
|
|
|
|
"clustering": [
|
|
|
|
{"name": "personClustering", "fields": ["fullname"], "params": {}}
|
|
|
|
],
|
|
|
|
"conditions": [],
|
|
|
|
"decisionTree": {
|
2018-12-20 09:54:41 +01:00
|
|
|
"start": {"fields": [{"field":"pubID", "comparator":"exactMatch", "weight":1.0, "params":{}}], "threshold":1.0, "aggregation": "SUM", "positive":"NO_MATCH", "negative":"layer2", "undefined": "layer2", "ignoreMissing": "false"},
|
|
|
|
"layer2": {"fields": [{"field":"orcid", "comparator":"exactMatch", "weight":1.0, "params":{}}], "threshold":1.0, "aggregation": "SUM", "positive":"ORCID_MATCH", "negative":"NO_MATCH", "undefined": "layer3", "ignoreMissing": "false"},
|
|
|
|
"layer3": {"fields": [{"field":"firstname", "comparator":"similar", "weight":1.0, "params":{}}], "threshold":0.7, "aggregation": "SUM", "positive":"layer4", "negative":"NO_MATCH", "undefined": "layer4", "ignoreMissing": "false"},
|
|
|
|
"layer4": {"fields": [{"field":"coauthors", "comparator":"coauthorsMatch", "weight":1.0, "params":{"minCoauthors":6, "maxCoauthors": 200}}], "threshold":5.0, "aggregation": "SUM", "positive":"COAUTHORS_MATCH", "negative":"NO_MATCH", "undefined": "layer5", "ignoreMissing": "false"},
|
|
|
|
"layer5": {"fields": [{"field":"area", "comparator":"exactMatch", "weight":1.0, "params":{}}], "threshold":1.0, "aggregation": "SUM", "positive":"layer6", "negative":"NO_MATCH", "undefined": "NO_MATCH", "ignoreMissing": "false"},
|
|
|
|
"layer6": {"fields": [{"field":"topics", "comparator":"topicsMatch", "weight":1.0, "params":{}}], "threshold":0.7, "aggregation": "SUM", "positive":"TOPICS_MATCH", "negative":"NO_MATCH", "undefined": "NO_MATCH", "ignoreMissing": "false"}
|
2018-12-12 16:30:03 +01:00
|
|
|
},
|
|
|
|
"model": [
|
|
|
|
{"name": "fullname", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/fullname"},
|
|
|
|
{"name": "firstname", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/firstname"},
|
|
|
|
{"name": "lastname", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/lastname"},
|
|
|
|
{"name": "coauthors", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/coauthors"},
|
|
|
|
{"name": "orcid", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/orcid"},
|
|
|
|
{"name": "topics", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/topics"},
|
|
|
|
{"name": "pubID", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/pubID"},
|
|
|
|
{"name": "pubDOI", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/pubDOI"},
|
2018-12-20 09:54:41 +01:00
|
|
|
{"name": "rank", "algo": "Null", "type": "Int", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/rank"},
|
|
|
|
{"name": "area", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/area"}
|
2018-12-12 16:30:03 +01:00
|
|
|
],
|
|
|
|
"blacklists": {}
|
|
|
|
}
|
|
|
|
}
|