ccp.docs/source/developermanual/ccp_methods/Kaldi Italian Automatic Spe...

122 lines
4.6 KiB
JSON

{
"title": "Kaldi Italian Automatic Speech Recognizer",
"description": "A Large Vocabulary Automatic Speech Recognizer for Italian, developed with the KALDI toolkit (Povey et al., 2011) based on the VoxForge and Apasci acoustic corpora and the Paisa' text collection for language modelling. It was developed based on a several initiatives for Italian ASR building with traditional architectures (e.g. Cosi, 2015; Varga, 2017, Coro et al., 2021). The preferred audio input format is PCM 16bit 16kHz (or 8kHz) mono. The output is a text file containing the trascription. Audio example tinyurl.com/y3nrpx47",
"version": "1.0.0",
"jobControlOptions": "async-execute",
"metadata": [
{
"role": "category",
"title": "Automatic Speech Recognition"
},
{
"title": "Marco Lettere",
"role": "author",
"href": "https://accounts.dev.d4science.org/auth/admin/realms/d4science/users/88c76e47-5881-4716-a2bf-02d3b4073574"
}
],
"inputs": {
"ccpimage": {
"id": "ccpimage",
"title": "Runtime",
"description": "The image of the runtime to use for method execution. This depends on the infrastructure specific protocol for interacting with registries.",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "url",
"contentMediaType": "text/plain",
"default": "gianpaolocoro/kaldi_asr_voxforge_apasci_ita:v1.0",
"readOnly": true
}
},
"inputAudioFile": {
"id": "inputAudioFile",
"title": "inputAudioFile",
"description": "A new input field",
"minOccurs": 1,
"maxOccurs": 1,
"schema": {
"type": "string",
"format": "remotefile",
"contentMediaType": "text/plain",
"default": ""
}
}
},
"outputs": {
"Transcription": {
"id": "Transcription",
"title": "Audio transcription",
"description": "Audio transcription",
"minOccurs": 1,
"maxOccurs": 1,
"metadata": [
{
"role": "file",
"title": "transcription.txt",
"href": "transcription.txt"
}
],
"schema": {
"type": "string",
"contentMediaType": "text/plain"
}
}
},
"additionalParameters": {
"parameters": [
{
"name": "deploy-script",
"value": [
"wget {{inputAudioFile}} -O /opt/kaldi/egs/custom_asr/test.wav"
]
},
{
"name": "execute-script",
"value": [
"cd /opt/kaldi/egs/custom_asr/&& ./recognizeNN1.sh test.wav > transcription3.txt&& cat transcription3.txt |grep 'test '>/ccp_data/transcription4.txt",
"tt=$(cat /ccp_data/transcription4.txt)",
"string=${tt##*test hyp}",
"echo $string",
"echo $string>/ccp_data/transcription.txt",
"rm /ccp_data/transcription4.txt"
]
},
{
"name": "undeploy-script",
"value": []
},
{
"name": "lifecycle",
"value": [
{
"time": "2024-06-12T14:15:09.98Z",
"type": "imported",
"user": "https://accounts.dev.d4science.org/auth/admin/realms/d4science/users/88c76e47-5881-4716-a2bf-02d3b4073574",
"context": "%2Fgcube%2Fdevsec%2FCCP"
},
{
"time": "2024-06-12T14:16:12.708Z",
"type": "updated",
"user": "https://accounts.dev.d4science.org/auth/admin/realms/d4science/users/88c76e47-5881-4716-a2bf-02d3b4073574",
"context": "%2Fgcube%2Fdevsec%2FCCP"
}
]
}
]
},
"links": [
{
"rel": "compatibleWith",
"title": "D4Science development Infrastructure",
"href": "infrastructures/d4science-dev-swarm"
}
],
"keywords": [
"ASR",
"Speech",
"Automatic Speech Recognition"
],
"id": "185c9d26-91cb-4b10-8600-90c0482c07a1"
}