214 lines
5.6 KiB
JSON
214 lines
5.6 KiB
JSON
{
|
|
"wf": {
|
|
"threshold" : "0.99",
|
|
"dedupRun" : "001",
|
|
"entityType" : "result",
|
|
"subEntityType" : "resulttype",
|
|
"subEntityValue" : "otherresearchproduct",
|
|
"orderField" : "title",
|
|
"queueMaxSize" : "100",
|
|
"groupMaxSize" : "100",
|
|
"maxChildren" : "100",
|
|
"slidingWindowSize" : "100",
|
|
"rootBuilder" : [ "result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments", "resultOrganization_affiliation_hasAuthorInstitution", "resultResult_part_hasPart", "resultResult_part_isPartOf", "resultResult_supplement_isSupplementTo", "resultResult_supplement_isSupplementedBy", "resultResult_version_isVersionOf" ],
|
|
"includeChildren" : "true",
|
|
"idPath" : "$.id",
|
|
"maxIterations" : 20
|
|
},
|
|
"pace": {
|
|
"clustering": [
|
|
{
|
|
"name": "wordsStatsSuffixPrefixChain",
|
|
"fields": [
|
|
"title"
|
|
],
|
|
"params": {
|
|
"mod": "10"
|
|
}
|
|
},
|
|
{
|
|
"name": "lowercase",
|
|
"fields": [
|
|
"doi",
|
|
"altdoi"
|
|
],
|
|
"params": {
|
|
"collapseOn:pid": "0"
|
|
}
|
|
}
|
|
],
|
|
"decisionTree": {
|
|
"start": {
|
|
"fields": [
|
|
{
|
|
"field": "pid",
|
|
"comparator": "jsonListMatch",
|
|
"weight": 1.0,
|
|
"countIfUndefined": "false",
|
|
"params": {
|
|
"jpath_value": "$.value",
|
|
"jpath_classid": "$.qualifier.classid",
|
|
"mode": "count"
|
|
}
|
|
}
|
|
],
|
|
"threshold": 1.0,
|
|
"aggregation": "MAX",
|
|
"positive": "MATCH",
|
|
"negative": "pidVSaltid",
|
|
"undefined": "pidVSaltid",
|
|
"ignoreUndefined": "false"
|
|
},
|
|
"pidVSaltid": {
|
|
"fields": [
|
|
{
|
|
"field": "pid",
|
|
"comparator": "jsonListMatch",
|
|
"weight": 1.0,
|
|
"countIfUndefined": "false",
|
|
"params": {
|
|
"jpath_value": "$.value",
|
|
"jpath_classid": "$.qualifier.classid",
|
|
"crossCompare": "alternateid",
|
|
"mode": "count"
|
|
}
|
|
}
|
|
],
|
|
"threshold": 1.0,
|
|
"aggregation": "MAX",
|
|
"positive": "softCheck",
|
|
"negative": "earlyExits",
|
|
"undefined": "earlyExits",
|
|
"ignoreUndefined": "true"
|
|
},
|
|
"softCheck": {
|
|
"fields": [
|
|
{
|
|
"field": "title",
|
|
"comparator": "levensteinTitle",
|
|
"weight": 1.0,
|
|
"countIfUndefined": "true",
|
|
"params": {}
|
|
}
|
|
],
|
|
"threshold": 0.9,
|
|
"aggregation": "AVG",
|
|
"positive": "MATCH",
|
|
"negative": "NO_MATCH",
|
|
"undefined": "NO_MATCH",
|
|
"ignoreUndefined": "true"
|
|
},
|
|
"earlyExits": {
|
|
"fields": [
|
|
{
|
|
"field": "title",
|
|
"comparator": "titleVersionMatch",
|
|
"weight": 1.0,
|
|
"countIfUndefined": "false",
|
|
"params": {}
|
|
},
|
|
{
|
|
"field": "authors",
|
|
"comparator": "sizeMatch",
|
|
"weight": 1.0,
|
|
"countIfUndefined": "false",
|
|
"params": {}
|
|
}
|
|
],
|
|
"threshold": 1.0,
|
|
"aggregation": "AND",
|
|
"positive": "strongCheck",
|
|
"negative": "NO_MATCH",
|
|
"undefined": "strongCheck",
|
|
"ignoreUndefined": "false"
|
|
},
|
|
"strongCheck": {
|
|
"fields": [
|
|
{
|
|
"field": "title",
|
|
"comparator": "levensteinTitle",
|
|
"weight": 1.0,
|
|
"countIfUndefined": "true",
|
|
"params": {}
|
|
}
|
|
],
|
|
"threshold": 0.99,
|
|
"aggregation": "AVG",
|
|
"positive": "surnames",
|
|
"negative": "NO_MATCH",
|
|
"undefined": "NO_MATCH",
|
|
"ignoreUndefined": "true"
|
|
},
|
|
"surnames": {
|
|
"fields": [
|
|
{
|
|
"field": "authors",
|
|
"comparator": "authorsMatch",
|
|
"weight": 1.0,
|
|
"countIfUndefined": "false",
|
|
"params": {
|
|
"surname_th": 0.75,
|
|
"fullname_th": 0.75,
|
|
"mode": "surname"
|
|
}
|
|
}
|
|
],
|
|
"threshold": 0.6,
|
|
"aggregation": "MAX",
|
|
"positive": "MATCH",
|
|
"negative": "NO_MATCH",
|
|
"undefined": "MATCH",
|
|
"ignoreUndefined": "true"
|
|
}
|
|
},
|
|
"model": [
|
|
{
|
|
"name": "doi",
|
|
"type": "String",
|
|
"path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
|
|
},
|
|
{
|
|
"name": "altdoi",
|
|
"type": "String",
|
|
"path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
|
|
},
|
|
{
|
|
"name": "pid",
|
|
"type": "JSON",
|
|
"path": "$.instance[*].pid[*]",
|
|
"overrideMatch": "true"
|
|
},
|
|
{
|
|
"name": "alternateid",
|
|
"type": "JSON",
|
|
"path": "$.instance[*].alternateIdentifier[*]",
|
|
"overrideMatch": "true"
|
|
},
|
|
{
|
|
"name": "title",
|
|
"type": "String",
|
|
"path": "$.title[?(@.qualifier.classid == 'main title')].value",
|
|
"length": 250,
|
|
"size": 5
|
|
},
|
|
{
|
|
"name": "authors",
|
|
"type": "List",
|
|
"path": "$.author[*].fullname",
|
|
"size": 200
|
|
},
|
|
{
|
|
"name": "resulttype",
|
|
"type": "String",
|
|
"path": "$.resulttype.classid"
|
|
},
|
|
{
|
|
"name": "instance",
|
|
"type": "List",
|
|
"path": "$.instance[*].instancetype.classname"
|
|
}
|
|
],
|
|
"blacklists": {},
|
|
"synonyms": {}
|
|
}
|
|
} |