configuration updated for testing

This commit is contained in:
Michele De Bonis 2023-02-02 12:05:06 +01:00
parent 66472ce408
commit b4b6a61576
4 changed files with 193 additions and 35 deletions

View File

@ -5,7 +5,7 @@
"entityType": "author", "entityType": "author",
"subEntityType": "author", "subEntityType": "author",
"subEntityValue": "author", "subEntityValue": "author",
"orderField": "fullname", "orderField": "name",
"queueMaxSize": "200", "queueMaxSize": "200",
"groupMaxSize": "100", "groupMaxSize": "100",
"maxChildren": "100", "maxChildren": "100",

View File

@ -5,7 +5,7 @@
"entityType": "author", "entityType": "author",
"subEntityType": "author", "subEntityType": "author",
"subEntityValue": "author", "subEntityValue": "author",
"orderField": "fullname", "orderField": "name",
"queueMaxSize": "200", "queueMaxSize": "200",
"groupMaxSize": "100", "groupMaxSize": "100",
"maxChildren": "100", "maxChildren": "100",

View File

@ -6,9 +6,9 @@
"subEntityType": "resulttype", "subEntityType": "resulttype",
"subEntityValue": "publication", "subEntityValue": "publication",
"orderField": "title", "orderField": "title",
"queueMaxSize": "5000", "queueMaxSize": "200",
"groupMaxSize": "2000", "groupMaxSize": "100",
"maxChildren": "1000", "maxChildren": "100",
"slidingWindowSize": "50", "slidingWindowSize": "50",
"rootBuilder": [ "rootBuilder": [
"result", "result",
@ -28,9 +28,26 @@
"idPath": "$.id" "idPath": "$.id"
}, },
"pace": { "pace": {
"clustering" : [ "clustering": [
{ "name" : "wordsStatsSuffixPrefixChain", "fields" : [ "title" ], "params" : { "mod" : "10" } }, {
{ "name" : "lowercase", "fields" : [ "doi" ], "params" : { } } "name": "wordsStatsSuffixPrefixChain",
"fields": [
"title"
],
"params": {
"mod": "10"
}
},
{
"name": "lowercase",
"fields": [
"doi",
"altdoi"
],
"params": {
"collapseOn:pid": "0"
}
}
], ],
"decisionTree": { "decisionTree": {
"start": { "start": {
@ -42,18 +59,75 @@
"countIfUndefined": "false", "countIfUndefined": "false",
"params": { "params": {
"jpath_value": "$.value", "jpath_value": "$.value",
"jpath_classid": "$.qualifier.classid" "jpath_classid": "$.qualifier.classid",
"mode": "count"
} }
} }
], ],
"threshold": 0.5, "threshold": 1.0,
"aggregation": "AVG", "aggregation": "MAX",
"positive": "MATCH", "positive": "MATCH",
"negative": "layer2", "negative": "instanceTypeCheck",
"undefined": "layer2", "undefined": "instanceTypeCheck",
"ignoreUndefined": "false"
},
"instanceTypeCheck": {
"fields": [
{
"field": "instance",
"comparator": "instanceTypeMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {}
}
],
"threshold": 0.5,
"aggregation": "MAX",
"positive": "pidVSaltid",
"negative": "NO_MATCH",
"undefined": "pidVSaltid",
"ignoreUndefined": "true" "ignoreUndefined": "true"
}, },
"layer2": { "pidVSaltid": {
"fields": [
{
"field": "pid",
"comparator": "jsonListMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {
"jpath_value": "$.value",
"jpath_classid": "$.qualifier.classid",
"crossCompare": "alternateid",
"mode": "count"
}
}
],
"threshold": 1.0,
"aggregation": "MAX",
"positive": "softCheck",
"negative": "earlyExits",
"undefined": "earlyExits",
"ignoreUndefined": "true"
},
"softCheck": {
"fields": [
{
"field": "title",
"comparator": "levensteinTitle",
"weight": 1.0,
"countIfUndefined": "true",
"params": {}
}
],
"threshold": 0.9,
"aggregation": "AVG",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "true"
},
"earlyExits": {
"fields": [ "fields": [
{ {
"field": "title", "field": "title",
@ -72,12 +146,12 @@
], ],
"threshold": 1.0, "threshold": 1.0,
"aggregation": "AND", "aggregation": "AND",
"positive": "layer3", "positive": "strongCheck",
"negative": "NO_MATCH", "negative": "NO_MATCH",
"undefined": "layer3", "undefined": "strongCheck",
"ignoreUndefined": "false" "ignoreUndefined": "false"
}, },
"layer3": { "strongCheck": {
"fields": [ "fields": [
{ {
"field": "title", "field": "title",
@ -89,28 +163,60 @@
], ],
"threshold": 0.99, "threshold": 0.99,
"aggregation": "AVG", "aggregation": "AVG",
"positive": "MATCH", "positive": "surnames",
"negative": "NO_MATCH", "negative": "NO_MATCH",
"undefined": "NO_MATCH", "undefined": "NO_MATCH",
"ignoreUndefined": "true" "ignoreUndefined": "true"
},
"surnames": {
"fields": [
{
"field": "authors",
"comparator": "authorsMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {
"surname_th": 0.75,
"fullname_th": 0.75,
"mode": "full"
}
}
],
"threshold": 0.6,
"aggregation": "MAX",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "MATCH",
"ignoreUndefined": "true"
} }
}, },
"model": [ "model": [
{ {
"name": "doi", "name": "doi",
"type": "String", "type": "String",
"path": "$.pid[?(@.qualifier.classid == 'doi')].value" "path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
},
{
"name": "altdoi",
"type": "String",
"path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
}, },
{ {
"name": "pid", "name": "pid",
"type": "JSON", "type": "JSON",
"path": "$.pid", "path": "$.instance[*].pid[*]",
"overrideMatch": "true"
},
{
"name": "alternateid",
"type": "JSON",
"path": "$.instance[*].alternateIdentifier[*]",
"overrideMatch": "true" "overrideMatch": "true"
}, },
{ {
"name": "title", "name": "title",
"type": "String", "type": "StringConcat",
"path": "$.title[?(@.qualifier.classid == 'main title')].value", "path": "$.title[?(@.qualifier.classid == 'main title')].value|||$.title[?(@.qualifier.classid == 'subtitle')].value",
"length": 250, "length": 250,
"size": 5 "size": 5
}, },
@ -124,6 +230,11 @@
"name": "resulttype", "name": "resulttype",
"type": "String", "type": "String",
"path": "$.resulttype.classid" "path": "$.resulttype.classid"
},
{
"name": "instance",
"type": "List",
"path": "$.instance[*].instancetype.classname"
} }
], ],
"blacklists": { "blacklists": {
@ -354,7 +465,16 @@
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$", "^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
"^(Measurement of the spin\\-dependent structure function).*", "^(Measurement of the spin\\-dependent structure function).*",
"(?i)^.*authors[']? reply\\.?$", "(?i)^.*authors[']? reply\\.?$",
"(?i)^.*authors[']? response\\.?$" "(?i)^.*authors[']? response\\.?$",
"^Data [mM]anagement [sS]ervices\\.$",
"Research and Advanced Technology for Digital Libraries",
"(?i)^risky business$",
"(?i)^great expectations\\.?$",
"(?i)^what's in a name\\?$",
"(?i)^decisions, decisions\\.?$",
"(?i)^update to our reader, reviewer, and author communities.*",
"(?i)^lest we forget$",
"(?i)^measure for measure$"
] ]
}, },
"synonyms": {} "synonyms": {}

File diff suppressed because one or more lines are too long