minor changes

This commit is contained in:
miconis 2019-12-18 16:20:35 +01:00
parent b21b1b8f61
commit b3748b8d77
2 changed files with 45 additions and 30 deletions

View File

@ -7,10 +7,10 @@
"queueMaxSize" : "2000", "queueMaxSize" : "2000",
"groupMaxSize" : "50", "groupMaxSize" : "50",
"slidingWindowSize" : "200", "slidingWindowSize" : "200",
"idPath":"$.id",
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ], "rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ],
"includeChildren" : "true", "includeChildren" : "true",
"maxIterations": "20", "maxIterations": "20"
"idPath": "$.id"
}, },
"pace" : { "pace" : {
"clustering" : [ "clustering" : [
@ -31,7 +31,7 @@
} }
], ],
"threshold": 1, "threshold": 1,
"aggregation": "SC", "aggregation": "AVG",
"positive": "MATCH", "positive": "MATCH",
"negative": "NO_MATCH", "negative": "NO_MATCH",
"undefined": "layer2", "undefined": "layer2",
@ -69,7 +69,7 @@
} }
], ],
"threshold": 1, "threshold": 1,
"aggregation": "NC", "aggregation": "AND",
"positive": "layer3", "positive": "layer3",
"negative": "NO_MATCH", "negative": "NO_MATCH",
"undefined": "layer3", "undefined": "layer3",
@ -87,7 +87,7 @@
} }
} }
], ],
"threshold": 0.1, "threshold": 0.7,
"aggregation": "W_MEAN", "aggregation": "W_MEAN",
"positive": "layer4", "positive": "layer4",
"negative": "NO_MATCH", "negative": "NO_MATCH",
@ -100,18 +100,18 @@
"field": "legalname", "field": "legalname",
"comparator": "keywordMatch", "comparator": "keywordMatch",
"weight": 1.0, "weight": 1.0,
"countIfUndefined": "false", "countIfUndefined": "true",
"params": { "params": {
"windowSize": "4" "windowSize": "4"
} }
} }
], ],
"threshold": 0.7, "threshold": 0.9,
"aggregation": "W_MEAN", "aggregation": "AVG",
"positive": "layer5", "positive": "layer5",
"negative": "NO_MATCH", "negative": "NO_MATCH",
"undefined": "layer5", "undefined": "layer5",
"ignoreUndefined": "false" "ignoreUndefined": "true"
}, },
"layer5": { "layer5": {
"fields": [ "fields": [
@ -132,7 +132,7 @@
"params": {} "params": {}
} }
], ],
"threshold": 0.9, "threshold": 0.99,
"aggregation": "W_MEAN", "aggregation": "W_MEAN",
"positive": "MATCH", "positive": "MATCH",
"negative": "NO_MATCH", "negative": "NO_MATCH",
@ -145,7 +145,8 @@
{ "name" : "legalshortname", "type" : "String", "path" : "$.organization.metadata.legalshortname.value"}, { "name" : "legalshortname", "type" : "String", "path" : "$.organization.metadata.legalshortname.value"},
{ "name" : "legalname", "type" : "String", "path" : "$.organization.metadata.legalname.value" }, { "name" : "legalname", "type" : "String", "path" : "$.organization.metadata.legalname.value" },
{ "name" : "websiteurl", "type" : "URL", "path" : "$.organization.metadata.websiteurl.value" }, { "name" : "websiteurl", "type" : "URL", "path" : "$.organization.metadata.websiteurl.value" },
{ "name" : "gridid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid=='grid.ac')].value"} { "name" : "gridid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid =='grid.ac')].value"},
{ "name" : "originalId", "type" : "String", "path" : "$.id" }
], ],
"blacklists" : { "blacklists" : {
"legalname" : [] "legalname" : []
@ -257,8 +258,8 @@
"key::104": ["commerce","ticaret","ticarət","commercio","trade","handel","comercio"], "key::104": ["commerce","ticaret","ticarət","commercio","trade","handel","comercio"],
"key::105" : ["state", "stato", "etade", "estado", "statale", "etat", "zustand", "estado"], "key::105" : ["state", "stato", "etade", "estado", "statale", "etat", "zustand", "estado"],
"key::106" : ["seminary", "seminario", "seminaire", "seminar"], "key::106" : ["seminary", "seminario", "seminaire", "seminar"],
"key::107" : ["agricultural forestry", "af", "a f", "a&f"], "key::107" : ["agricultural forestry", "af", "a f"],
"key::108" : ["agricultural mechanical", "am", "a m", "a&m"] "key::108" : ["agricultural mechanical", "am", "a m"]
} }
} }
} }

View File

@ -7,6 +7,7 @@
"queueMaxSize" : "2000", "queueMaxSize" : "2000",
"groupMaxSize" : "50", "groupMaxSize" : "50",
"slidingWindowSize" : "200", "slidingWindowSize" : "200",
"idPath":"$.id",
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ], "rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ],
"includeChildren" : "true", "includeChildren" : "true",
"maxIterations": "20" "maxIterations": "20"
@ -30,7 +31,7 @@
} }
], ],
"threshold": 1, "threshold": 1,
"aggregation": "SC", "aggregation": "AVG",
"positive": "MATCH", "positive": "MATCH",
"negative": "NO_MATCH", "negative": "NO_MATCH",
"undefined": "layer2", "undefined": "layer2",
@ -51,10 +52,24 @@
"weight": 1, "weight": 1,
"countIfUndefined": "true", "countIfUndefined": "true",
"params": {} "params": {}
},
{
"field": "legalname",
"comparator": "numbersMatch",
"weight": 1,
"countIfUndefined": "true",
"params": {}
},
{
"field": "legalname",
"comparator": "romansMatch",
"weight": 1,
"countIfUndefined": "true",
"params": {}
} }
], ],
"threshold": 1, "threshold": 1,
"aggregation": "NC", "aggregation": "AND",
"positive": "layer3", "positive": "layer3",
"negative": "NO_MATCH", "negative": "NO_MATCH",
"undefined": "layer3", "undefined": "layer3",
@ -68,12 +83,11 @@
"weight": 1.0, "weight": 1.0,
"countIfUndefined": "true", "countIfUndefined": "true",
"params": { "params": {
"windowSize": "4", "windowSize": "4"
"threshold": "0.0"
} }
} }
], ],
"threshold": 1.0, "threshold": 0.7,
"aggregation": "W_MEAN", "aggregation": "W_MEAN",
"positive": "layer4", "positive": "layer4",
"negative": "NO_MATCH", "negative": "NO_MATCH",
@ -86,19 +100,18 @@
"field": "legalname", "field": "legalname",
"comparator": "keywordMatch", "comparator": "keywordMatch",
"weight": 1.0, "weight": 1.0,
"countIfUndefined": "false", "countIfUndefined": "true",
"params": { "params": {
"windowSize": "4", "windowSize": "4"
"threshold": "0.7"
} }
} }
], ],
"threshold": 1.0, "threshold": 0.9,
"aggregation": "W_MEAN", "aggregation": "AVG",
"positive": "layer5", "positive": "layer5",
"negative": "NO_MATCH", "negative": "NO_MATCH",
"undefined": "layer5", "undefined": "layer5",
"ignoreUndefined": "false" "ignoreUndefined": "true"
}, },
"layer5": { "layer5": {
"fields": [ "fields": [
@ -119,7 +132,7 @@
"params": {} "params": {}
} }
], ],
"threshold": 0.9, "threshold": 0.99,
"aggregation": "W_MEAN", "aggregation": "W_MEAN",
"positive": "MATCH", "positive": "MATCH",
"negative": "NO_MATCH", "negative": "NO_MATCH",
@ -128,11 +141,12 @@
} }
}, },
"model" : [ "model" : [
{ "name" : "country", "type" : "String", "path" : "organization/metadata/country/classid"}, { "name" : "country", "type" : "String", "path" : "$.organization.metadata.country.classid"},
{ "name" : "legalshortname", "type" : "String", "path" : "organization/metadata/legalshortname/value"}, { "name" : "legalshortname", "type" : "String", "path" : "$.organization.metadata.legalshortname.value"},
{ "name" : "legalname", "type" : "String", "path" : "organization/metadata/legalname/value" }, { "name" : "legalname", "type" : "String", "path" : "$.organization.metadata.legalname.value" },
{ "name" : "websiteurl", "type" : "URL", "path" : "organization/metadata/websiteurl/value" }, { "name" : "websiteurl", "type" : "URL", "path" : "$.organization.metadata.websiteurl.value" },
{ "name" : "gridid", "type" : "String", "path" : "pid[qualifier#classid = {grid}]/value"} { "name" : "gridid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid =='grid.ac')].value"},
{ "name" : "originalId", "type" : "String", "path" : "$.id" }
], ],
"blacklists" : { "blacklists" : {
"legalname" : [] "legalname" : []