121 lines
3.5 KiB
JSON
121 lines
3.5 KiB
JSON
{
|
|
"wf" : {
|
|
"threshold" : "0.99",
|
|
"dedupRun" : "001",
|
|
"entityType" : "result",
|
|
"subEntityType" : "resulttype",
|
|
"subEntityValue" : "dataset",
|
|
"orderField" : "title",
|
|
"queueMaxSize" : "100",
|
|
"groupMaxSize" : "100",
|
|
"maxChildren" : "100",
|
|
"slidingWindowSize" : "100",
|
|
"rootBuilder" : ["result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments", "resultOrganization_affiliation_hasAuthorInstitution", "resultResult_part_hasPart", "resultResult_part_isPartOf", "resultResult_supplement_isSupplementTo", "resultResult_supplement_isSupplementedBy", "resultResult_version_isVersionOf" ],
|
|
"includeChildren" : "true",
|
|
"idPath" : "$.id",
|
|
"maxIterations" : 20
|
|
},
|
|
"pace" : {
|
|
"clustering" : [
|
|
{ "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} },
|
|
{ "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } },
|
|
{ "name" : "lowercase", "fields" : [ "doi" ], "params" : { } }
|
|
],
|
|
"decisionTree" : {
|
|
"start" : {
|
|
"fields": [
|
|
{
|
|
"field": "pid",
|
|
"comparator": "jsonListMatch",
|
|
"weight": 1.0,
|
|
"countIfUndefined": "false",
|
|
"params": {
|
|
"jpath_value": "$.value",
|
|
"jpath_classid": "$.qualifier.classid"
|
|
}
|
|
}
|
|
],
|
|
"threshold": 0.5,
|
|
"aggregation": "AVG",
|
|
"positive": "MATCH",
|
|
"negative": "layer2",
|
|
"undefined": "layer2",
|
|
"ignoreUndefined": "true"
|
|
},
|
|
"layer2" : {
|
|
"fields": [
|
|
{
|
|
"field": "title",
|
|
"comparator": "titleVersionMatch",
|
|
"weight": 1.0,
|
|
"countIfUndefined": "false",
|
|
"params": {}
|
|
},
|
|
{
|
|
"field": "authors",
|
|
"comparator": "sizeMatch",
|
|
"weight": 1.0,
|
|
"countIfUndefined": "false",
|
|
"params": {}
|
|
}
|
|
],
|
|
"threshold": 1.0,
|
|
"aggregation": "AND",
|
|
"positive": "layer3",
|
|
"negative": "NO_MATCH",
|
|
"undefined": "layer3",
|
|
"ignoreUndefined": "false"
|
|
},
|
|
"layer3" : {
|
|
"fields": [
|
|
{
|
|
"field": "title",
|
|
"comparator": "levensteinTitle",
|
|
"weight": 1.0,
|
|
"countIfUndefined": "true",
|
|
"params": {}
|
|
}
|
|
],
|
|
"threshold": 0.99,
|
|
"aggregation": "AVG",
|
|
"positive": "MATCH",
|
|
"negative": "NO_MATCH",
|
|
"undefined": "NO_MATCH",
|
|
"ignoreUndefined": "true"
|
|
}
|
|
},
|
|
"model" : [
|
|
{
|
|
"name" : "doi",
|
|
"type" : "String",
|
|
"path" : "$.pid[?(@.qualifier.classid == 'doi')].value"
|
|
},
|
|
{
|
|
"name" : "pid",
|
|
"type" : "JSON",
|
|
"path" : "$.pid",
|
|
"overrideMatch" : "true"
|
|
},
|
|
{
|
|
"name" : "title",
|
|
"type" : "String",
|
|
"path" : "$.title[?(@.qualifier.classid == 'main title')].value",
|
|
"length" : 250,
|
|
"size" : 5
|
|
},
|
|
{
|
|
"name" : "authors",
|
|
"type" : "String",
|
|
"path" : "$.author[*].fullname",
|
|
"size" : 200
|
|
},
|
|
{
|
|
"name" : "resulttype",
|
|
"type" : "String",
|
|
"path" : "$.resulttype.classid"
|
|
}
|
|
],
|
|
"blacklists" : {},
|
|
"synonyms" : {}
|
|
}
|
|
} |