ccp.docs/source/methods_ccp/Ariadne English Archaeology...

288 lines
10 KiB
JSON
Raw Normal View History

{
"title": "Ariadne English Archaeology Named Entity Recognizer",
"description": "Identifies terms and phrases in English for analysing archaeological text. The method delivers named entities of archaeological context, physical object, material, time appellation and structure, linked to concept labels of the National Cultural Heritage Thesauri (UK). This method was supplied by the Ariadne Infrastructure that integrates archaeological research data across Europe - for full details see https://cloud.gate.ac.uk/shopfront/displayItem/archaeology-ner-en",
"version": "1.0.0",
"jobControlOptions": "async-execute",
"metadata": [
{
"title": "Marco Lettere",
"role": "author",
2024-10-07 19:59:13 +02:00
"href": "https://accounts.d4science.org/auth/admin/realms/d4science/users/09138708-9a19-4724-93d1-8c721d591da2"
},
{
"role": "category",
"title": "Archaeological_Text_Processing"
}
],
"inputs": {
"ccpimage": {
"id": "ccpimage",
"title": "Runtime",
"description": "The image of the runtime to use for method execution. This depends on the infrastructure specific protocol for interacting with registries.",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "url",
"contentMediaType": "text/plain",
"default": "hub.dev.d4science.org/ccp-runtimes/gatecloud-base:latest",
"readOnly": true
}
},
"inputFile": {
"id": "inputFile",
"title": "inputFile",
"description": "Input CSV file()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "remotefile",
"default": "",
"contentMediaType": "text/csv"
}
},
"columnSeparator": {
"id": "columnSeparator",
"title": "columnSeparator",
"description": "Separator between columns - comma for a normal CSV file but tab is also supported()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"enum": [
"comma",
"tab"
],
"default": "comma"
}
},
"hasHeaders": {
"id": "hasHeaders",
"title": "hasHeaders",
"description": "Does the CSV have a header row?()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "boolean",
"default": "false"
}
},
"textColumn": {
"id": "textColumn",
"title": "textColumn",
"description": "Column name (if hasHeaders) or index (1-based) that contains the text to process()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "none",
"default": "1"
}
},
"copyColumns": {
"id": "copyColumns",
"title": "copyColumns",
"description": "Comma-separated list of column names (if hasHeaders is true) or indexes (1-based) that should be copied from the input to output, for example a column representing the document identifier. 0 means do not copy any columns to the output.()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "none",
"default": "0"
}
},
"output1": {
"id": "output1",
"title": "output1",
"description": "Output column definition (6 pre-configured options available, select as many as you wish, or if you prefer you can also add custom output column definitions below)()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"enum": [
"none",
"PhysicalObject Identifier",
"Structure Identifier",
"Material Identifier",
"ArchaeologicalContext Identifier",
"TimeAppellation Identifier",
"Activity Identifier"
],
"default": "none"
}
},
"output2": {
"id": "output2",
"title": "output2",
"description": "Output column definition()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"enum": [
"none",
"PhysicalObject Identifier",
"Structure Identifier",
"Material Identifier",
"ArchaeologicalContext Identifier",
"TimeAppellation Identifier",
"Activity Identifier"
],
"default": "none"
}
},
"output3": {
"id": "output3",
"title": "output3",
"description": "Output column definition()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"enum": [
"none",
"PhysicalObject Identifier",
"Structure Identifier",
"Material Identifier",
"ArchaeologicalContext Identifier",
"TimeAppellation Identifier",
"Activity Identifier"
],
"default": "none"
}
},
"output4": {
"id": "output4",
"title": "output4",
"description": "Output column definition()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"enum": [
"none",
"PhysicalObject Identifier",
"Structure Identifier",
"Material Identifier",
"ArchaeologicalContext Identifier",
"TimeAppellation Identifier",
"Activity Identifier"
],
"default": "none"
}
},
"output5": {
"id": "output5",
"title": "output5",
"description": "Output column definition()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"enum": [
"none",
"PhysicalObject Identifier",
"Structure Identifier",
"Material Identifier",
"ArchaeologicalContext Identifier",
"TimeAppellation Identifier",
"Activity Identifier"
],
"default": "none"
}
},
"output6": {
"id": "output6",
"title": "output6",
"description": "Output column definition()",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"enum": [
"none",
"PhysicalObject Identifier",
"Structure Identifier",
"Material Identifier",
"ArchaeologicalContext Identifier",
"TimeAppellation Identifier",
"Activity Identifier"
],
"default": "none"
}
},
"moreOutput": {
"id": "moreOutput",
"title": "moreOutput",
"description": "More output column definitions, using the output specification language. Enter none if you do not require any extra outputs [a sequence of values separated by #](https://cloud.gate.ac.uk/info/help/sobigdata/#output-spec)",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "none",
"default": "none"
}
}
},
"outputs": {
"result": {
"id": "result",
"title": "result",
"description": "Result CSV file",
"minOccurs": 1,
"maxOccurs": 1,
"metadata": [
{
"title": "result.csv",
"role": "file",
"href": "/ccp_data/output/result.csv"
}
],
"schema": {
"type": "string",
"contentEncoding": "binary",
"contentMediaType": "text/csv"
}
}
},
"additionalParameters": {
"parameters": [
{
"name": "deploy-script",
"value": [
"./download.sh {{inputFile}}"
]
},
{
"name": "execute-script",
"value": [
"python build/archaeology-ner-en/standard_service.py /ccp_data/inputFile.csv {{columnSeparator}} {{hasHeaders}} '{{textColumn}}' '{{copyColumns}}' '{{output1}}' '{{output2}}' '{{output3}}' '{{output4}}' '{{output5}}' '{{output6}}' '{{moreOutput}}'",
"cp -f result.csv /ccp_data/"
]
},
{
"name": "undeploy-script",
"value": []
}
]
},
"links": [
{
"rel": "compatibleWith",
"title": "D4Science development Infrastructure",
"href": "infrastructures/d4science-dev-swarm"
2024-10-07 19:59:13 +02:00
},
{
"rel": "compatibleWith",
"title": "D4Science production Infrastructure",
"href": "infrastructures/d4science-prod-swarm"
}
],
"keywords": [
"gatecloud"
],
"id": "9c9cd5fc-3a92-4185-8f0c-9f10ecba6ab6"
}