dnet-hadoop/dhp-workflows/dhp-bmuse/src/main/resources/eu/dnetlib/dhp/bmuse/bioschema/generate_dataset.json

44 lines
1.2 KiB
JSON

[
{
"paramName": "n",
"paramLongName": "nameNode",
"paramDescription": "the Name Node URI",
"paramRequired": true
},
{
"paramName": "w",
"paramLongName": "workingPath",
"paramDescription": "the working path",
"paramRequired": true
},
{
"paramName": "r",
"paramLongName": "rdfOutput",
"paramDescription": "the working path",
"paramRequired": true
},
{
"paramName": "u",
"paramLongName": "sitemapUrl",
"paramDescription": "the sitemap url",
"paramRequired": true
},
{
"paramName": "k",
"paramLongName": "sitemapURLKey",
"paramDescription": "the sitemap file contains a list of xml entries, each one has a tag identified with sitemapURLKey with the url as value",
"paramRequired": true
},
{
"paramName": "d",
"paramLongName": "dynamic",
"paramDescription": "the dynamic boolean determines if the scraper should start using selenium or JSOUP to scrape the information (dynamic and static respectively)",
"paramRequired": false
},
{
"paramName": "m",
"paramLongName": "maxScrapedPages",
"paramDescription": "max number of pages that will be scraped, default: no limit",
"paramRequired": false
}
]