dnet-hadoop/dnet-pace-core/src/test/resources/eu/dnetlib/pace/config/result.test.conf

52 lines
2.9 KiB
Plaintext

{
"wf" : {
"threshold" : "0.99",
"dedupRun" : "001",
"entityType" : "result",
"orderField" : "title",
"queueMaxSize" : "2000",
"groupMaxSize" : "10",
"slidingWindowSize" : "200",
"rootBuilder" : [ "result" ],
"includeChildren" : "true"
},
"pace" : {
"clustering" : [
{ "name" : "acronyms", "fields" : [ "title" ], "params" : { "max" : "1", "minLen" : "2", "maxLen" : "4"} },
{ "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} },
{ "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } }
],
"decisionTree": {
"start": {"fields": [{"field":"pid", "comparator":"exactMatch", "weight":1.0, "countIfUndefined":"true", "params": {}}], "threshold":1.0, "aggregation": "MAX", "positive":"MATCH", "negative":"layer2", "undefined":"layer2", "ignoreUndefined": "true"},
"layer2": {"fields": [{"field":"dateofacceptance", "comparator":"yearMatch", "weight":1.0, "countIfUndefined":"true", "params" : {}}, {"field":"title", "comparator":"titleVersionMatch", "weight":1.0, "countIfUndefined":"false", "params": {}}, {"field":"authors", "comparator":"sizeMatch", "weight":1.0, "countIfUndefined":"false", "params": {}}], "threshold":1.0, "aggregation": "MIN", "positive":"layer3", "negative":"NO_MATCH", "undefined":"layer3", "ignoreUndefined": "false"},
"layer3": {"fields": [{"field":"title", "comparator":"JaroWinkler", "weight":1.0, "countIfUndefined":"false", "params":{}}], "threshold": 0.99, "aggregation": "MAX", "positive":"MATCH", "negative":"NO_MATCH", "undefined":"NO_MATCH", "ignoreUndefined": "false"}
},
"model" : [
{ "name" : "pid", "type" : "String", "path" : "pid[qualifier#classid = {doi}]/value", "overrideMatch" : "true" },
{ "name" : "title", "type" : "String", "path" : "result/metadata/title[qualifier#classid = {main title}]/value" },
{ "name" : "dateofacceptance", "type" : "String", "path" : "result/metadata/dateofacceptance/value" } ,
{ "name" : "authors", "type" : "List", "path" : "result/author/metadata/fullname/value" }
],
"blacklists" : {
"title" : [
"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
"^(Kiri Karl Morgensternile).*$",
"^(\\[Eksliibris Aleksandr).*\\]$",
"^(\\[Eksliibris Aleksandr).*$",
"^(Eksliibris Aleksandr).*$",
"^(Kiri A\\. de Vignolles).*$",
"^(2 kirja Karl Morgensternile).*$",
"^(Pirita kloostri idaosa arheoloogilised).*$",
"^(Kiri tundmatule).*$",
"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
"^(Eksliibris Nikolai Birukovile).*$",
"^(Eksliibris Nikolai Issakovile).*$",
"^(WHP Cruise Summary Information of section).*$",
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
"^(Measurement of the spin\\-dependent structure function).*"
] },
"synonyms": {}
}
}