ccp.docs/source/methods_ccp/Ariadne Dutch Dendrochronol...

219 lines
7.7 KiB
JSON

{
"title": "Ariadne Dutch Dendrochronology Entity Recognizer",
"description": "Identifies terms and phrases in Dutch for analysing archaeological text. The method delivers named entities of archaeological elements, wood material, sample, and date, with all apart from dates linked to concept labels of the AAT Getty thesaurus. This method was supplied by the Ariadne Infrastructure that integrates archaeological research data across Europe - for full details see https://cloud.gate.ac.uk/shopfront/displayItem/dendrochronology-entities-recognizer-nl",
"version": "1.0.0",
"jobControlOptions": "async-execute",
"metadata": [
{
"title": "Marco Lettere",
"role": "author",
"href": "https://accounts.d4science.org/auth/admin/realms/d4science/users/09138708-9a19-4724-93d1-8c721d591da2"
},
{
"role": "category",
"title": "Archaeological_Text_Processing"
}
],
"inputs": {
"ccpimage": {
"id": "ccpimage",
"title": "Runtime",
"description": "The image of the runtime to use for method execution. This depends on the infrastructure specific protocol for interacting with registries.",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "url",
"contentMediaType": "text/plain",
"default": "hub.dev.d4science.org/ccp-runtimes/gatecloud-base:latest",
"readOnly": true
}
},
"inputFile": {
"id": "inputFile",
"title": "inputFile",
"description": "Input CSV file()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "remotefile",
"default": "",
"contentMediaType": "text/csv"
}
},
"columnSeparator": {
"id": "columnSeparator",
"title": "columnSeparator",
"description": "Separator between columns - comma for a normal CSV file but tab is also supported()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"enum": [
"comma",
"tab"
],
"default": "comma"
}
},
"hasHeaders": {
"id": "hasHeaders",
"title": "hasHeaders",
"description": "Does the CSV have a header row?()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "boolean",
"default": "false"
}
},
"textColumn": {
"id": "textColumn",
"title": "textColumn",
"description": "Column name (if hasHeaders) or index (1-based) that contains the text to process()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "none",
"default": "1"
}
},
"copyColumns": {
"id": "copyColumns",
"title": "copyColumns",
"description": "Comma-separated list of column names (if hasHeaders is true) or indexes (1-based) that should be copied from the input to output, for example a column representing the document identifier. 0 means do not copy any columns to the output.()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "none",
"default": "0"
}
},
"output1": {
"id": "output1",
"title": "output1",
"description": "Output column definition (3 pre-configured options available, select as many as you wish, or if you prefer you can also add custom output column definitions below)()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"enum": [
"none",
"archElements Identifier",
"woodMaterials Identifier",
"Date"
],
"default": "none"
}
},
"output2": {
"id": "output2",
"title": "output2",
"description": "Output column definition()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"enum": [
"none",
"archElements Identifier",
"woodMaterials Identifier",
"Date"
],
"default": "none"
}
},
"output3": {
"id": "output3",
"title": "output3",
"description": "Output column definition()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"enum": [
"none",
"archElements Identifier",
"woodMaterials Identifier",
"Date"
],
"default": "none"
}
},
"moreOutput": {
"id": "moreOutput",
"title": "moreOutput",
"description": "More output column definitions, using the output specification language. Enter none if you do not require any extra outputs [a sequence of values separated by #](https://cloud.gate.ac.uk/info/help/sobigdata/#output-spec)",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "none",
"default": "none"
}
}
},
"outputs": {
"result": {
"id": "result",
"title": "result",
"description": "Result CSV file",
"minOccurs": 1,
"maxOccurs": 1,
"metadata": [
{
"title": "result.csv",
"role": "file",
"href": "/ccp_data/output/result.csv"
}
],
"schema": {
"type": "string",
"contentEncoding": "binary",
"contentMediaType": "text/csv"
}
}
},
"additionalParameters": {
"parameters": [
{
"name": "deploy-script",
"value": [
"./download.sh {{inputFile}}"
]
},
{
"name": "execute-script",
"value": [
"python build/dendrochronology-entities-recognizer-nl/standard_service.py /ccp_data/inputFile.csv {{columnSeparator}} {{hasHeaders}} '{{textColumn}}' '{{copyColumns}}' '{{output1}}' '{{output2}}' '{{output3}}' '{{moreOutput}}'",
"cp -f result.csv /ccp_data/"
]
},
{
"name": "undeploy-script",
"value": []
}
]
},
"links": [
{
"rel": "compatibleWith",
"title": "D4Science development Infrastructure",
"href": "infrastructures/d4science-dev-swarm"
},
{
"rel": "compatibleWith",
"title": "D4Science production Infrastructure",
"href": "infrastructures/d4science-prod-swarm"
}
],
"keywords": [
"gatecloud"
],
"id": "da7c84d9-3748-4ff5-8831-6d6520d6ff55"
}