ccp.docs/source/methods_ccp/Gate Cloud Measurement Expr...

234 lines
8.2 KiB
JSON

{
"title": "Gate Cloud Measurement Expression Annotator",
"description": "Annotates numbers and measurement expressions in text. This method recognises many types of measurements including length, temperature, time and speed, and calculates their normalised values in the SI system of units - for full details see https://cloud.gate.ac.uk/shopfront/displayItem/measurement-expression-annotator",
"version": "1.0.0",
"jobControlOptions": "async-execute",
"metadata": [
{
"title": "Marco Lettere",
"role": "author",
"href": "https://accounts.d4science.org/auth/admin/realms/d4science/users/09138708-9a19-4724-93d1-8c721d591da2"
},
{
"role": "category",
"title": "Text_Analytics"
}
],
"inputs": {
"ccpimage": {
"id": "ccpimage",
"title": "Runtime",
"description": "The image of the runtime to use for method execution. This depends on the infrastructure specific protocol for interacting with registries.",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "url",
"contentMediaType": "text/plain",
"default": "hub.dev.d4science.org/ccp-runtimes/gatecloud-base:latest",
"readOnly": true
}
},
"inputFile": {
"id": "inputFile",
"title": "inputFile",
"description": "Input CSV file()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "remotefile",
"default": "",
"contentMediaType": "text/csv"
}
},
"columnSeparator": {
"id": "columnSeparator",
"title": "columnSeparator",
"description": "Separator between columns - comma for a normal CSV file but tab is also supported()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"enum": [
"comma",
"tab"
],
"default": "comma"
}
},
"hasHeaders": {
"id": "hasHeaders",
"title": "hasHeaders",
"description": "Does the CSV have a header row?()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "boolean",
"default": "false"
}
},
"textColumn": {
"id": "textColumn",
"title": "textColumn",
"description": "Column name (if hasHeaders) or index (1-based) that contains the text to process()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "none",
"default": "1"
}
},
"copyColumns": {
"id": "copyColumns",
"title": "copyColumns",
"description": "Comma-separated list of column names (if hasHeaders is true) or indexes (1-based) that should be copied from the input to output, for example a column representing the document identifier. 0 means do not copy any columns to the output.()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "none",
"default": "0"
}
},
"output1": {
"id": "output1",
"title": "output1",
"description": "Output column definition (8 pre-configured options available, select as many as you wish, or if you prefer you can also add custom output column definitions below)()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"enum": [
"none",
"Measurement",
"Measurement value",
"Measurement unit",
"Measurement normalizedValue normalizedUnit",
"Measurement normalizedValue",
"Measurement normalizedUnit",
"Measurement dimension",
"Ratio"
],
"default": "none"
}
},
"output2": {
"id": "output2",
"title": "output2",
"description": "Output column definition()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"enum": [
"none",
"Measurement",
"Measurement value",
"Measurement unit",
"Measurement normalizedValue normalizedUnit",
"Measurement normalizedValue",
"Measurement normalizedUnit",
"Measurement dimension",
"Ratio"
],
"default": "none"
}
},
"output3": {
"id": "output3",
"title": "output3",
"description": "Output column definition()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"enum": [
"none",
"Measurement",
"Measurement value",
"Measurement unit",
"Measurement normalizedValue normalizedUnit",
"Measurement normalizedValue",
"Measurement normalizedUnit",
"Measurement dimension",
"Ratio"
],
"default": "none"
}
},
"moreOutput": {
"id": "moreOutput",
"title": "moreOutput",
"description": "More output column definitions, using the output specification language. Enter none if you do not require any extra outputs [a sequence of values separated by #](https://cloud.gate.ac.uk/info/help/sobigdata/#output-spec)",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "none",
"default": "none"
}
}
},
"outputs": {
"result": {
"id": "result",
"title": "result",
"description": "Result CSV file",
"minOccurs": 1,
"maxOccurs": 1,
"metadata": [
{
"title": "result.csv",
"role": "file",
"href": "/ccp_data/output/result.csv"
}
],
"schema": {
"type": "string",
"contentEncoding": "binary",
"contentMediaType": "text/csv"
}
}
},
"additionalParameters": {
"parameters": [
{
"name": "deploy-script",
"value": [
"./download.sh {{inputFile}}"
]
},
{
"name": "execute-script",
"value": [
"python build/measurement-expression-annotator/standard_service.py /ccp_data/inputFile.csv {{columnSeparator}} {{hasHeaders}} '{{textColumn}}' '{{copyColumns}}' '{{output1}}' '{{output2}}' '{{output3}}' '{{moreOutput}}'",
"cp -f result.csv /ccp_data/"
]
},
{
"name": "undeploy-script",
"value": []
}
]
},
"links": [
{
"rel": "compatibleWith",
"title": "D4Science development Infrastructure",
"href": "infrastructures/d4science-dev-swarm"
},
{
"rel": "compatibleWith",
"title": "D4Science production Infrastructure",
"href": "infrastructures/d4science-prod-swarm"
}
],
"keywords": [
"gatecloud"
],
"id": "49af3c8e-31fb-495c-93f9-96fe603eb7ee"
}