dnet-dedup/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/authors.test.pace.conf

40 lines
3.3 KiB
Plaintext

{
"wf" : {
"threshold" : "0.99",
"dedupRun" : "001",
"entityType" : "person",
"orderField" : "fullname",
"queueMaxSize" : "2000",
"groupMaxSize" : "10",
"slidingWindowSize" : "200",
"rootBuilder" : [ "person" ],
"includeChildren" : "true"
},
"pace": {
"clustering": [
{"name": "personClustering", "fields": ["fullname"], "params": {}}
],
"necessaryConditions": [],
"decisionTree": {
"start": {"fields": [{"field":"pubID", "comparator":"exactMatch", "weight":1.0, "params":{}}], "threshold":1.0, "aggregation": "SUM", "positive":"NO_MATCH", "negative":"layer2", "undefined": "layer2", "ignoreMissing": "false"},
"layer2": {"fields": [{"field":"orcid", "comparator":"exactMatch", "weight":1.0, "params":{}}], "threshold":1.0, "aggregation": "SUM", "positive":"ORCID_MATCH", "negative":"NO_MATCH", "undefined": "layer3", "ignoreMissing": "false"},
"layer3": {"fields": [{"field":"firstname", "comparator":"similar", "weight":1.0, "params":{}}], "threshold":0.7, "aggregation": "SUM", "positive":"layer4", "negative":"NO_MATCH", "undefined": "layer4", "ignoreMissing": "false"},
"layer4": {"fields": [{"field":"coauthors", "comparator":"coauthorsMatch", "weight":1.0, "params":{"minCoauthors":6, "maxCoauthors": 200}}], "threshold":5.0, "aggregation": "SUM", "positive":"COAUTHORS_MATCH", "negative":"NO_MATCH", "undefined": "layer5", "ignoreMissing": "false"},
"layer5": {"fields": [{"field":"area", "comparator":"exactMatch", "weight":1.0, "params":{}}], "threshold":1.0, "aggregation": "SUM", "positive":"layer6", "negative":"NO_MATCH", "undefined": "NO_MATCH", "ignoreMissing": "false"},
"layer6": {"fields": [{"field":"topics", "comparator":"topicsMatch", "weight":1.0, "params":{}}], "threshold":0.7, "aggregation": "SUM", "positive":"TOPICS_MATCH", "negative":"NO_MATCH", "undefined": "NO_MATCH", "ignoreMissing": "false"}
},
"model": [
{"name": "fullname", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/fullname"},
{"name": "firstname", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/firstname"},
{"name": "lastname", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/lastname"},
{"name": "coauthors", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/coauthors"},
{"name": "orcid", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/orcid"},
{"name": "topics", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/topics"},
{"name": "pubID", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/pubID"},
{"name": "pubDOI", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/pubDOI"},
{"name": "rank", "algo": "Null", "type": "Int", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/rank"},
{"name": "area", "algo": "Null", "type": "String", "weight": "0", "ignoreMissing": "false", "path": "person/metadata/area"}
],
"blacklists": {}
}
}