168 lines
6.3 KiB
JSON
168 lines
6.3 KiB
JSON
{
|
|
"title": "Gate Cloud Stance Classification Multilingual",
|
|
"description": "Service that processes threads of short texts such as tweets or forum postings and attempts to determine the stance (support, deny, question or comment) of each reply towards the post it is replying to. Input is a CSV with at least three columns; the text of each post, and identifier for that post, and in the case of replies the identifier of the post that is being replied to. Output is a CSV with the stance classification for each reply. This method uses a multilingual BERT-based model and is able to classify threads where the reply and target posts are in the same or different languages - for full details see https://cloud.gate.ac.uk/shopfront/displayItem/stance-classification-multilingual",
|
|
"version": "1.0.0",
|
|
"jobControlOptions": "async-execute",
|
|
"metadata": [
|
|
{
|
|
"title": "Marco Lettere",
|
|
"role": "author",
|
|
"href": "https://accounts.d4science.org/auth/admin/realms/d4science/users/09138708-9a19-4724-93d1-8c721d591da2"
|
|
},
|
|
{
|
|
"role": "category",
|
|
"title": "Text_Analytics"
|
|
}
|
|
],
|
|
"inputs": {
|
|
"ccpimage": {
|
|
"id": "ccpimage",
|
|
"title": "Runtime",
|
|
"description": "The image of the runtime to use for method execution. This depends on the infrastructure specific protocol for interacting with registries.",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "url",
|
|
"contentMediaType": "text/plain",
|
|
"default": "hub.dev.d4science.org/ccp-runtimes/gatecloud-base:latest",
|
|
"readOnly": true
|
|
}
|
|
},
|
|
"inputFile": {
|
|
"id": "inputFile",
|
|
"title": "inputFile",
|
|
"description": "Input CSV file - see the documentation for full details of the expected file format.(https://cloud.gate.ac.uk/info/help/sobigdata/stance.html)",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "remotefile",
|
|
"default": "",
|
|
"contentMediaType": "text/csv"
|
|
}
|
|
},
|
|
"columnSeparator": {
|
|
"id": "columnSeparator",
|
|
"title": "columnSeparator",
|
|
"description": "Separator between columns - comma for a normal CSV file but tab is also supported()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"enum": [
|
|
"comma",
|
|
"tab"
|
|
],
|
|
"default": "comma"
|
|
}
|
|
},
|
|
"hasHeaders": {
|
|
"id": "hasHeaders",
|
|
"title": "hasHeaders",
|
|
"description": "Does the CSV have a header row?()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "boolean",
|
|
"default": "true"
|
|
}
|
|
},
|
|
"textColumn": {
|
|
"id": "textColumn",
|
|
"title": "textColumn",
|
|
"description": "Column name (if hasHeaders) or index (1-based) that contains the text of each document()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "none",
|
|
"default": "text"
|
|
}
|
|
},
|
|
"idColumn": {
|
|
"id": "idColumn",
|
|
"title": "idColumn",
|
|
"description": "Column name (if hasHeaders) or index (1-based) that contains the identifier of this document (required for all rows)()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "none",
|
|
"default": "id"
|
|
}
|
|
},
|
|
"replyToColumn": {
|
|
"id": "replyToColumn",
|
|
"title": "replyToColumn",
|
|
"description": "Column name (if hasHeaders) or index (1-based) that contains the identifier of the document to which this is a reply (may be empty if this row represents an original document rather than a reply)()",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"schema": {
|
|
"type": "string",
|
|
"format": "none",
|
|
"default": "in_reply_to"
|
|
}
|
|
}
|
|
},
|
|
"outputs": {
|
|
"outputCsv": {
|
|
"id": "outputCsv",
|
|
"title": "outputCsv",
|
|
"description": "Output file with one row per reply giving the stance of this reply towards its target row",
|
|
"minOccurs": 1,
|
|
"maxOccurs": 1,
|
|
"metadata": [
|
|
{
|
|
"title": "result.csv",
|
|
"role": "file",
|
|
"href": "/ccp_data/output/result.csv"
|
|
}
|
|
],
|
|
"schema": {
|
|
"type": "string",
|
|
"contentEncoding": "binary",
|
|
"contentMediaType": "text/csv"
|
|
}
|
|
}
|
|
},
|
|
"additionalParameters": {
|
|
"parameters": [
|
|
{
|
|
"name": "deploy-script",
|
|
"value": [
|
|
"./download.sh {{inputFile}}"
|
|
]
|
|
},
|
|
{
|
|
"name": "execute-script",
|
|
"value": [
|
|
"python build/stance-classification/stance_classification_service.py /ccp_data/inputFile.csv {{columnSeparator}} {{hasHeaders}} '{{textColumn}}' '{{idColumn}}' '{{replyToColumn}}'",
|
|
"cp -f result.csv /ccp_data/"
|
|
]
|
|
},
|
|
{
|
|
"name": "undeploy-script",
|
|
"value": []
|
|
}
|
|
]
|
|
},
|
|
"links": [
|
|
{
|
|
"rel": "compatibleWith",
|
|
"title": "D4Science development Infrastructure",
|
|
"href": "infrastructures/d4science-dev-swarm"
|
|
},
|
|
{
|
|
"rel": "compatibleWith",
|
|
"title": "D4Science production Infrastructure",
|
|
"href": "infrastructures/d4science-prod-swarm"
|
|
}
|
|
],
|
|
"keywords": [
|
|
"gatecloud"
|
|
],
|
|
"id": "e816c96a-c385-4109-9cf7-75cd4f401bdf"
|
|
}
|