ccp.docs/source/developermanual/ccp_methods/Gate Cloud Twitie Named Ent...

{
    "title": "Gate Cloud Twitie Named Entity Recognizer For Tweets",
    "description": "Named entity recognition service for Twitter data.  Identifies person, location, organization etc. and also performs normalization of abbreviations and common shorthands (such as brb, gr8, 2day, etc.) - for full details see https://cloud.gate.ac.uk/shopfront/displayItem/twitie-named-entity-recognizer-for-tweets",
    "version": "1.0.0",
    "jobControlOptions": "async-execute",
    "metadata": [
        {
            "title": "Marco Lettere",
            "role": "author",
            "href": "https://accounts.dev.d4science.org/auth/admin/realms/d4science/users/88c76e47-5881-4716-a2bf-02d3b4073574"
        },
        {
            "role": "category",
            "title": "Text_Analytics"
        }
    ],
    "inputs": {
        "ccpimage": {
            "id": "ccpimage",
            "title": "Runtime",
            "description": "The image of the runtime to use for method execution. This depends on the infrastructure specific protocol for interacting with registries.",
            "minOccurs": 1,
            "maxOccurs": 1,
            "schema": {
                "type": "string",
                "format": "url",
                "contentMediaType": "text/plain",
                "default": "hub.dev.d4science.org/ccp-runtimes/gatecloud-base:latest",
                "readOnly": true
            }
        },
        "inputFile": {
            "id": "inputFile",
            "title": "inputFile",
            "description": "Input CSV file()",
            "minOccurs": 1,
            "maxOccurs": 1,
            "schema": {
                "type": "string",
                "format": "remotefile",
                "default": "",
                "contentMediaType": "text/csv"
            }
        },
        "columnSeparator": {
            "id": "columnSeparator",
            "title": "columnSeparator",
            "description": "Separator between columns - comma for a normal CSV file but tab is also supported()",
            "minOccurs": 1,
            "maxOccurs": 1,
            "schema": {
                "type": "string",
                "enum": [
                    "comma",
                    "tab"
                ],
                "default": "comma"
            }
        },
        "hasHeaders": {
            "id": "hasHeaders",
            "title": "hasHeaders",
            "description": "Does the CSV have a header row?()",
            "minOccurs": 1,
            "maxOccurs": 1,
            "schema": {
                "type": "string",
                "format": "boolean",
                "default": "false"
            }
        },
        "textColumn": {
            "id": "textColumn",
            "title": "textColumn",
            "description": "Column name (if hasHeaders) or index (1-based) that contains the text to process()",
            "minOccurs": 1,
            "maxOccurs": 1,
            "schema": {
                "type": "string",
                "format": "none",
                "default": "1"
            }
        },
        "copyColumns": {
            "id": "copyColumns",
            "title": "copyColumns",
            "description": "Comma-separated list of column names (if hasHeaders is true) or indexes (1-based) that should be copied from the input to output, for example a column representing the document identifier. 0 means do not copy any columns to the output.()",
            "minOccurs": 1,
            "maxOccurs": 1,
            "schema": {
                "type": "string",
                "format": "none",
                "default": "0"
            }
        },
        "output1": {
            "id": "output1",
            "title": "output1",
            "description": "Output column definition (11 pre-configured options available, select as many as you wish, or if you prefer you can also add custom output column definitions below)()",
            "minOccurs": 1,
            "maxOccurs": 1,
            "schema": {
                "type": "string",
                "enum": [
                    "none",
                    "Person",
                    "Location",
                    "Organization",
                    "Date",
                    "URL",
                    "Hashtag tokenized",
                    "UserID",
                    "Tweet lang",
                    "Token string (category)",
                    "UserID user",
                    "Emoticon normalized"
                ],
                "default": "none"
            }
        },
        "output2": {
            "id": "output2",
            "title": "output2",
            "description": "Output column definition()",
            "minOccurs": 1,
            "maxOccurs": 1,
            "schema": {
                "type": "string",
                "enum": [
                    "none",
                    "Person",
                    "Location",
                    "Organization",
                    "Date",
                    "URL",
                    "Hashtag tokenized",
                    "UserID",
                    "Tweet lang",
                    "Token string (category)",
                    "UserID user",
                    "Emoticon normalized"
                ],
                "default": "none"
            }
        },
        "output3": {
            "id": "output3",
            "title": "output3",
            "description": "Output column definition()",
            "minOccurs": 1,
            "maxOccurs": 1,
            "schema": {
                "type": "string",
                "enum": [
                    "none",
                    "Person",
                    "Location",
                    "Organization",
                    "Date",
                    "URL",
                    "Hashtag tokenized",
                    "UserID",
                    "Tweet lang",
                    "Token string (category)",
                    "UserID user",
                    "Emoticon normalized"
                ],
                "default": "none"
            }
        },
        "output4": {
            "id": "output4",
            "title": "output4",
            "description": "Output column definition()",
            "minOccurs": 1,
            "maxOccurs": 1,
            "schema": {
                "type": "string",
                "enum": [
                    "none",
                    "Person",
                    "Location",
                    "Organization",
                    "Date",
                    "URL",
                    "Hashtag tokenized",
                    "UserID",
                    "Tweet lang",
                    "Token string (category)",
                    "UserID user",
                    "Emoticon normalized"
                ],
                "default": "none"
            }
        },
        "output5": {
            "id": "output5",
            "title": "output5",
            "description": "Output column definition()",
            "minOccurs": 1,
            "maxOccurs": 1,
            "schema": {
                "type": "string",
                "enum": [
                    "none",
                    "Person",
                    "Location",
                    "Organization",
                    "Date",
                    "URL",
                    "Hashtag tokenized",
                    "UserID",
                    "Tweet lang",
                    "Token string (category)",
                    "UserID user",
                    "Emoticon normalized"
                ],
                "default": "none"
            }
        },
        "output6": {
            "id": "output6",
            "title": "output6",
            "description": "Output column definition()",
            "minOccurs": 1,
            "maxOccurs": 1,
            "schema": {
                "type": "string",
                "enum": [
                    "none",
                    "Person",
                    "Location",
                    "Organization",
                    "Date",
                    "URL",
                    "Hashtag tokenized",
                    "UserID",
                    "Tweet lang",
                    "Token string (category)",
                    "UserID user",
                    "Emoticon normalized"
                ],
                "default": "none"
            }
        },
        "output7": {
            "id": "output7",
            "title": "output7",
            "description": "Output column definition()",
            "minOccurs": 1,
            "maxOccurs": 1,
            "schema": {
                "type": "string",
                "enum": [
                    "none",
                    "Person",
                    "Location",
                    "Organization",
                    "Date",
                    "URL",
                    "Hashtag tokenized",
                    "UserID",
                    "Tweet lang",
                    "Token string (category)",
                    "UserID user",
                    "Emoticon normalized"
                ],
                "default": "none"
            }
        },
        "output8": {
            "id": "output8",
            "title": "output8",
            "description": "Output column definition()",
            "minOccurs": 1,
            "maxOccurs": 1,
            "schema": {
                "type": "string",
                "enum": [
                    "none",
                    "Person",
                    "Location",
                    "Organization",
                    "Date",
                    "URL",
                    "Hashtag tokenized",
                    "UserID",
                    "Tweet lang",
                    "Token string (category)",
                    "UserID user",
                    "Emoticon normalized"
                ],
                "default": "none"
            }
        },
        "moreOutput": {
            "id": "moreOutput",
            "title": "moreOutput",
            "description": "More output column definitions, using the output specification language. Enter none if you do not require any extra outputs [a sequence of values separated by #](https://cloud.gate.ac.uk/info/help/sobigdata/#output-spec)",
            "minOccurs": 1,
            "maxOccurs": 1,
            "schema": {
                "type": "string",
                "format": "none",
                "default": "none"
            }
        }
    },
    "outputs": {
        "result": {
            "id": "result",
            "title": "result",
            "description": "Result CSV file",
            "minOccurs": 1,
            "maxOccurs": 1,
            "metadata": [
                {
                    "title": "result.csv",
                    "role": "file",
                    "href": "/ccp_data/output/result.csv"
                }
            ],
            "schema": {
                "type": "string",
                "contentEncoding": "binary",
                "contentMediaType": "text/csv"
            }
        }
    },
    "additionalParameters": {
        "parameters": [
            {
                "name": "deploy-script",
                "value": [
                    "./download.sh {{inputFile}}"
                ]
            },
            {
                "name": "execute-script",
                "value": [
                    "python build/twitie-named-entity-recognizer-for-tweets/standard_service.py /ccp_data/inputFile.csv {{columnSeparator}} {{hasHeaders}} '{{textColumn}}' '{{copyColumns}}' '{{output1}}' '{{output2}}' '{{output3}}' '{{output4}}' '{{output5}}' '{{output6}}' '{{output7}}' '{{output8}}' '{{moreOutput}}'",
                    "cp -f result.csv /ccp_data/"
                ]
            },
            {
                "name": "undeploy-script",
                "value": []
            }
        ]
    },
    "links": [
        {
            "rel": "compatibleWith",
            "title": "D4Science development Infrastructure",
            "href": "infrastructures/d4science-dev-swarm"
        }
    ],
    "keywords": [
        "gatecloud"
    ],
    "id": "003b05cb-d546-4348-9345-09eeb06b440d"
}