224 lines
7.6 KiB
JSON
224 lines
7.6 KiB
JSON
{
|
|
"title": "Gate Cloud Chemical Entity Recognizer",
|
|
"description": "This service annotates chemical named entities using the open source OSCAR4 tagger. As well as the names of the detected entities the tagger also returns their structure in either the SMILES or InChI nomenclature - for full details see https://cloud.gate.ac.uk/shopfront/displayItem/oscar4",
|
|
"version": "1.0.0",
|
|
"jobControlOptions": "async-execute",
|
|
"metadata": [
|
|
{
|
|
"title": "Marco Lettere",
|
|
"role": "author",
|
|
"href": "https://accounts.d4science.org/auth/admin/realms/d4science/users/09138708-9a19-4724-93d1-8c721d591da2"
|
|
},
|
|
{
|
|
"title": "Marco Lettere",
|
|
"role": "author",
|
|
"href": "https://accounts.d4science.org/auth/admin/realms/d4science/users/09138708-9a19-4724-93d1-8c721d591da2"
|
|
},
|
|
{
|
|
"role": "category",
|
|
"title": "Chemical_Text_Processing"
|
|
}
|
|
],
|
|
"inputs": {
|
|
"ccpimage": {
|
|
"id": "ccpimage",
|
|
"title": "Runtime",
|
|
"description": "The image of the runtime to use for method execution. This depends on the infrastructure specific protocol for interacting with registries.",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "url",
|
|
"contentMediaType": "text/plain",
|
|
"default": "hub.dev.d4science.org/ccp-runtimes/gatecloud-base:latest",
|
|
"readOnly": true
|
|
}
|
|
},
|
|
"inputFile": {
|
|
"id": "inputFile",
|
|
"title": "inputFile",
|
|
"description": "Input CSV file()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "remotefile",
|
|
"default": "",
|
|
"contentMediaType": "text/csv"
|
|
}
|
|
},
|
|
"columnSeparator": {
|
|
"id": "columnSeparator",
|
|
"title": "columnSeparator",
|
|
"description": "Separator between columns - comma for a normal CSV file but tab is also supported()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"enum": [
|
|
"comma",
|
|
"tab"
|
|
],
|
|
"default": "comma"
|
|
}
|
|
},
|
|
"hasHeaders": {
|
|
"id": "hasHeaders",
|
|
"title": "hasHeaders",
|
|
"description": "Does the CSV have a header row?()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "boolean",
|
|
"default": "false"
|
|
}
|
|
},
|
|
"textColumn": {
|
|
"id": "textColumn",
|
|
"title": "textColumn",
|
|
"description": "Column name (if hasHeaders) or index (1-based) that contains the text to process()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "none",
|
|
"default": "1"
|
|
}
|
|
},
|
|
"copyColumns": {
|
|
"id": "copyColumns",
|
|
"title": "copyColumns",
|
|
"description": "Comma-separated list of column names (if hasHeaders is true) or indexes (1-based) that should be copied from the input to output, for example a column representing the document identifier. 0 means do not copy any columns to the output.()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "none",
|
|
"default": "0"
|
|
}
|
|
},
|
|
"output1": {
|
|
"id": "output1",
|
|
"title": "output1",
|
|
"description": "Output column definition (3 pre-configured options available, select as many as you wish, or if you prefer you can also add custom output column definitions below)()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"enum": [
|
|
"none",
|
|
"ChemicalNE",
|
|
"ChemicalNE SMILES",
|
|
"ChemicalNE Std_InChI"
|
|
],
|
|
"default": "none"
|
|
}
|
|
},
|
|
"output2": {
|
|
"id": "output2",
|
|
"title": "output2",
|
|
"description": "Output column definition()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"enum": [
|
|
"none",
|
|
"ChemicalNE",
|
|
"ChemicalNE SMILES",
|
|
"ChemicalNE Std_InChI"
|
|
],
|
|
"default": "none"
|
|
}
|
|
},
|
|
"output3": {
|
|
"id": "output3",
|
|
"title": "output3",
|
|
"description": "Output column definition()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"enum": [
|
|
"none",
|
|
"ChemicalNE",
|
|
"ChemicalNE SMILES",
|
|
"ChemicalNE Std_InChI"
|
|
],
|
|
"default": "none"
|
|
}
|
|
},
|
|
"moreOutput": {
|
|
"id": "moreOutput",
|
|
"title": "moreOutput",
|
|
"description": "More output column definitions, using the output specification language. Enter none if you do not require any extra outputs [a sequence of values separated by #](https://cloud.gate.ac.uk/info/help/sobigdata/#output-spec)",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "none",
|
|
"default": "none"
|
|
}
|
|
}
|
|
},
|
|
"outputs": {
|
|
"result": {
|
|
"id": "result",
|
|
"title": "result",
|
|
"description": "Result CSV file",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"metadata": [
|
|
{
|
|
"title": "result.csv",
|
|
"role": "file",
|
|
"href": "/ccp_data/output/result.csv"
|
|
}
|
|
],
|
|
"schema": {
|
|
"type": "string",
|
|
"contentEncoding": "binary",
|
|
"contentMediaType": "text/csv"
|
|
}
|
|
}
|
|
},
|
|
"additionalParameters": {
|
|
"parameters": [
|
|
{
|
|
"name": "deploy-script",
|
|
"value": [
|
|
"./download.sh {{inputFile}}"
|
|
]
|
|
},
|
|
{
|
|
"name": "execute-script",
|
|
"value": [
|
|
"python build/oscar4/standard_service.py /ccp_data/inputFile.csv {{columnSeparator}} {{hasHeaders}} '{{textColumn}}' '{{copyColumns}}' '{{output1}}' '{{output2}}' '{{output3}}' '{{moreOutput}}'",
|
|
"cp -f result.csv /ccp_data/"
|
|
]
|
|
},
|
|
{
|
|
"name": "undeploy-script",
|
|
"value": []
|
|
}
|
|
]
|
|
},
|
|
"links": [
|
|
{
|
|
"rel": "compatibleWith",
|
|
"title": "D4Science development Infrastructure",
|
|
"href": "infrastructures/d4science-dev-swarm"
|
|
},
|
|
{
|
|
"rel": "compatibleWith",
|
|
"title": "D4Science production Infrastructure",
|
|
"href": "infrastructures/d4science-prod-swarm"
|
|
}
|
|
],
|
|
"keywords": [
|
|
"gatecloud"
|
|
],
|
|
"id": "b9a094f7-e3fe-4b05-9b8a-b0816ea3aa1f"
|
|
}
|