227 lines
7.6 KiB
JSON
227 lines
7.6 KiB
JSON
{
|
|
"title": "Gate Cloud Brexit Tweet Analysis",
|
|
"description": "A pipeline designed to detect political topics, hashtags, URLs, user mention, and hashtag-based voting intentions, expressed in tweets about the UK referendum on membership of the EU - for full details see https://cloud.gate.ac.uk/shopfront/displayItem/sobigdata-brexit",
|
|
"version": "1.0.0",
|
|
"jobControlOptions": "async-execute",
|
|
"metadata": [
|
|
{
|
|
"title": "Marco Lettere",
|
|
"role": "author",
|
|
"href": "https://accounts.d4science.org/auth/admin/realms/d4science/users/09138708-9a19-4724-93d1-8c721d591da2"
|
|
},
|
|
{
|
|
"title": "Alfredo Oliviero",
|
|
"role": "author",
|
|
"href": "https://accounts.d4science.org/auth/admin/realms/d4science/users/b20300b9-d2d7-4ef8-b164-917f661f7ee0"
|
|
},
|
|
{
|
|
"role": "category",
|
|
"title": "Text_Analytics"
|
|
}
|
|
],
|
|
"inputs": {
|
|
"ccpimage": {
|
|
"id": "ccpimage",
|
|
"title": "Runtime",
|
|
"description": "The image of the runtime to use for method execution. This depends on the infrastructure specific protocol for interacting with registries.",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "url",
|
|
"contentMediaType": "text/plain",
|
|
"default": "hub.dev.d4science.org/ccp-runtimes/gatecloud-base:latest",
|
|
"readOnly": true
|
|
}
|
|
},
|
|
"inputFile": {
|
|
"id": "inputFile",
|
|
"title": "inputFile",
|
|
"description": "Input CSV file()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "remotefile",
|
|
"default": "",
|
|
"contentMediaType": "text/csv"
|
|
}
|
|
},
|
|
"columnSeparator": {
|
|
"id": "columnSeparator",
|
|
"title": "columnSeparator",
|
|
"description": "Separator between columns - comma for a normal CSV file but tab is also supported()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"enum": [
|
|
"comma",
|
|
"tab"
|
|
],
|
|
"default": "comma"
|
|
}
|
|
},
|
|
"hasHeaders": {
|
|
"id": "hasHeaders",
|
|
"title": "hasHeaders",
|
|
"description": "Does the CSV have a header row?()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "boolean",
|
|
"default": "false"
|
|
}
|
|
},
|
|
"textColumn": {
|
|
"id": "textColumn",
|
|
"title": "textColumn",
|
|
"description": "Column name (if hasHeaders) or index (1-based) that contains the text to process()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "none",
|
|
"default": "1"
|
|
}
|
|
},
|
|
"copyColumns": {
|
|
"id": "copyColumns",
|
|
"title": "copyColumns",
|
|
"description": "Comma-separated list of column names (if hasHeaders is true) or indexes (1-based) that should be copied from the input to output, for example a column representing the document identifier. 0 means do not copy any columns to the output.()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "none",
|
|
"default": "0"
|
|
}
|
|
},
|
|
"output1": {
|
|
"id": "output1",
|
|
"title": "output1",
|
|
"description": "Output column definition (4 pre-configured options available, select as many as you wish, or if you prefer you can also add custom output column definitions below)()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"enum": [
|
|
"none",
|
|
"Topic",
|
|
"Topic theme",
|
|
"UserID",
|
|
"Hashtag"
|
|
],
|
|
"default": "none"
|
|
}
|
|
},
|
|
"output2": {
|
|
"id": "output2",
|
|
"title": "output2",
|
|
"description": "Output column definition()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"enum": [
|
|
"none",
|
|
"Topic",
|
|
"Topic theme",
|
|
"UserID",
|
|
"Hashtag"
|
|
],
|
|
"default": "none"
|
|
}
|
|
},
|
|
"output3": {
|
|
"id": "output3",
|
|
"title": "output3",
|
|
"description": "Output column definition()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"enum": [
|
|
"none",
|
|
"Topic",
|
|
"Topic theme",
|
|
"UserID",
|
|
"Hashtag"
|
|
],
|
|
"default": "none"
|
|
}
|
|
},
|
|
"moreOutput": {
|
|
"id": "moreOutput",
|
|
"title": "moreOutput",
|
|
"description": "More output column definitions, using the output specification language. Enter none if you do not require any extra outputs [a sequence of values separated by #](https://cloud.gate.ac.uk/info/help/sobigdata/#output-spec)",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "none",
|
|
"default": "none"
|
|
}
|
|
}
|
|
},
|
|
"outputs": {
|
|
"result": {
|
|
"id": "result",
|
|
"title": "result",
|
|
"description": "Result CSV file",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"metadata": [
|
|
{
|
|
"title": "result.csv",
|
|
"role": "file",
|
|
"href": "/ccp_data/output/result.csv"
|
|
}
|
|
],
|
|
"schema": {
|
|
"type": "string",
|
|
"contentEncoding": "binary",
|
|
"contentMediaType": "text/csv"
|
|
}
|
|
}
|
|
},
|
|
"additionalParameters": {
|
|
"parameters": [
|
|
{
|
|
"name": "deploy-script",
|
|
"value": [
|
|
"./download.sh {{inputFile}}"
|
|
]
|
|
},
|
|
{
|
|
"name": "execute-script",
|
|
"value": [
|
|
"python build/sobigdata-brexit/standard_service.py /ccp_data/inputFile.csv {{columnSeparator}} {{hasHeaders}} '{{textColumn}}' '{{copyColumns}}' '{{output1}}' '{{output2}}' '{{output3}}' '{{moreOutput}}'",
|
|
"cp -f result.csv /ccp_data/"
|
|
]
|
|
},
|
|
{
|
|
"name": "undeploy-script",
|
|
"value": []
|
|
}
|
|
]
|
|
},
|
|
"links": [
|
|
{
|
|
"rel": "compatibleWith",
|
|
"title": "D4Science development Infrastructure",
|
|
"href": "infrastructures/d4science-dev-swarm"
|
|
},
|
|
{
|
|
"rel": "compatibleWith",
|
|
"title": "D4Science production Infrastructure",
|
|
"href": "infrastructures/d4science-prod-swarm"
|
|
}
|
|
],
|
|
"keywords": [
|
|
"gatecloud"
|
|
],
|
|
"id": "1a45d3ba-02f2-4562-b59e-e9a805dccf64"
|
|
}
|