{ "title": "Gate Cloud Brexit Tweet Analysis", "description": "A pipeline designed to detect political topics, hashtags, URLs, user mention, and hashtag-based voting intentions, expressed in tweets about the UK referendum on membership of the EU - for full details see https://cloud.gate.ac.uk/shopfront/displayItem/sobigdata-brexit", "version": "1.0.0", "jobControlOptions": "async-execute", "metadata": [ { "title": "Marco Lettere", "role": "author", "href": "https://accounts.d4science.org/auth/admin/realms/d4science/users/09138708-9a19-4724-93d1-8c721d591da2" }, { "title": "Alfredo Oliviero", "role": "author", "href": "https://accounts.d4science.org/auth/admin/realms/d4science/users/b20300b9-d2d7-4ef8-b164-917f661f7ee0" }, { "role": "category", "title": "Text_Analytics" } ], "inputs": { "ccpimage": { "id": "ccpimage", "title": "Runtime", "description": "The image of the runtime to use for method execution. This depends on the infrastructure specific protocol for interacting with registries.", "minOccurs": 1, "maxOccurs": 1, "schema": { "type": "string", "format": "url", "contentMediaType": "text/plain", "default": "hub.dev.d4science.org/ccp-runtimes/gatecloud-base:latest", "readOnly": true } }, "inputFile": { "id": "inputFile", "title": "inputFile", "description": "Input CSV file()", "minOccurs": 1, "maxOccurs": 1, "schema": { "type": "string", "format": "remotefile", "default": "", "contentMediaType": "text/csv" } }, "columnSeparator": { "id": "columnSeparator", "title": "columnSeparator", "description": "Separator between columns - comma for a normal CSV file but tab is also supported()", "minOccurs": 1, "maxOccurs": 1, "schema": { "type": "string", "enum": [ "comma", "tab" ], "default": "comma" } }, "hasHeaders": { "id": "hasHeaders", "title": "hasHeaders", "description": "Does the CSV have a header row?()", "minOccurs": 1, "maxOccurs": 1, "schema": { "type": "string", "format": "boolean", "default": "false" } }, "textColumn": { "id": "textColumn", "title": "textColumn", "description": "Column name (if hasHeaders) or index (1-based) that contains the text to process()", "minOccurs": 1, "maxOccurs": 1, "schema": { "type": "string", "format": "none", "default": "1" } }, "copyColumns": { "id": "copyColumns", "title": "copyColumns", "description": "Comma-separated list of column names (if hasHeaders is true) or indexes (1-based) that should be copied from the input to output, for example a column representing the document identifier. 0 means do not copy any columns to the output.()", "minOccurs": 1, "maxOccurs": 1, "schema": { "type": "string", "format": "none", "default": "0" } }, "output1": { "id": "output1", "title": "output1", "description": "Output column definition (4 pre-configured options available, select as many as you wish, or if you prefer you can also add custom output column definitions below)()", "minOccurs": 1, "maxOccurs": 1, "schema": { "type": "string", "enum": [ "none", "Topic", "Topic theme", "UserID", "Hashtag" ], "default": "none" } }, "output2": { "id": "output2", "title": "output2", "description": "Output column definition()", "minOccurs": 1, "maxOccurs": 1, "schema": { "type": "string", "enum": [ "none", "Topic", "Topic theme", "UserID", "Hashtag" ], "default": "none" } }, "output3": { "id": "output3", "title": "output3", "description": "Output column definition()", "minOccurs": 1, "maxOccurs": 1, "schema": { "type": "string", "enum": [ "none", "Topic", "Topic theme", "UserID", "Hashtag" ], "default": "none" } }, "moreOutput": { "id": "moreOutput", "title": "moreOutput", "description": "More output column definitions, using the output specification language. Enter none if you do not require any extra outputs [a sequence of values separated by #](https://cloud.gate.ac.uk/info/help/sobigdata/#output-spec)", "minOccurs": 1, "maxOccurs": 1, "schema": { "type": "string", "format": "none", "default": "none" } } }, "outputs": { "result": { "id": "result", "title": "result", "description": "Result CSV file", "minOccurs": 1, "maxOccurs": 1, "metadata": [ { "title": "result.csv", "role": "file", "href": "/ccp_data/output/result.csv" } ], "schema": { "type": "string", "contentEncoding": "binary", "contentMediaType": "text/csv" } } }, "additionalParameters": { "parameters": [ { "name": "deploy-script", "value": [ "./download.sh {{inputFile}}" ] }, { "name": "execute-script", "value": [ "python build/sobigdata-brexit/standard_service.py /ccp_data/inputFile.csv {{columnSeparator}} {{hasHeaders}} '{{textColumn}}' '{{copyColumns}}' '{{output1}}' '{{output2}}' '{{output3}}' '{{moreOutput}}'", "cp -f result.csv /ccp_data/" ] }, { "name": "undeploy-script", "value": [] } ] }, "links": [ { "rel": "compatibleWith", "title": "D4Science development Infrastructure", "href": "infrastructures/d4science-dev-swarm" }, { "rel": "compatibleWith", "title": "D4Science production Infrastructure", "href": "infrastructures/d4science-prod-swarm" } ], "keywords": [ "gatecloud" ], "id": "1a45d3ba-02f2-4562-b59e-e9a805dccf64" }