17 lines
785 B
JSON
17 lines
785 B
JSON
{
|
|
"type": "record",
|
|
"namespace": "eu.openaire.urls_controller",
|
|
"name": "eu.openaire.urls_controller.Payload",
|
|
"fields": [
|
|
{"name": "id", "type": "string"},
|
|
{"name": "original_url", "type": "string"},
|
|
{"name": "actual_url", "type": "string"}, // This should NOT be null, since only the "found" pdf-publications are processed in parquet.
|
|
{"name": "date", "type" : {"type": "long", "logicalType": "timestamp-millis"}},
|
|
{"name": "mimetype", "type": "string"},
|
|
{"name": "size", "type": ["null","string"]},
|
|
{"name": "hash", "type": "string"},
|
|
{"name": "location", "type": "string"}, // This is not null, a check is added before processing any record.
|
|
{"name": "provenance", "type": "string"}
|
|
]
|
|
}
|