- Fix creating the "assignment" table, always in the testDatabase.

- Code polishing.
This commit is contained in:
Lampros Smyrnaios 2022-12-07 14:58:38 +02:00
parent 3c5f4c6464
commit 95c38c4a24
3 changed files with 4 additions and 4 deletions

View File

@ -9,5 +9,5 @@ To install and run the application:
- Provide the **S3 Object Store** related configurations, inside the *src/main/resources/application.properties* file.<br>
- Execute the ```installAndRun.sh``` script which builds and runs the app.<br>
If you want to just run the app, then run the script with the argument "1": ```./installAndRun.sh 1```.<br>
If you want to build and run the app on a docker container, then run the script with the argument "0" followed by the argument "1": ```./installAndRun.sh 0 1```.<br>
If you want to build and run the app on a **Docker Container**, then run the script with the argument "0" followed by the argument "1": ```./installAndRun.sh 0 1```.<br>
<br>

View File

@ -80,8 +80,8 @@ public class ImpalaConnector {
// Drop the "current_assignment" table. It is a temporary table which is created on-demand during execution.
jdbcTemplate.execute("DROP TABLE IF EXISTS " + databaseName + ".current_assignment PURGE");
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".assignment (id string, original_url string, workerid string, `date` bigint) stored as parquet");
jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".assignment");
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + databaseName + ".assignment (id string, original_url string, workerid string, `date` bigint) stored as parquet");
jdbcTemplate.execute("COMPUTE STATS " + databaseName + ".assignment");
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + databaseName + ".attempt (id string, original_url string, `date` bigint, status string, error_class string, error_message string) stored as parquet");
jdbcTemplate.execute("COMPUTE STATS " + databaseName + ".attempt");

View File

@ -5,7 +5,7 @@
"fields": [
{"name": "id", "type": "string"},
{"name": "original_url", "type": "string"},
{"name": "actual_url", "type": "string"}, // This should not be null, since only the "found" pdf-publications are processed in parquet.
{"name": "actual_url", "type": "string"}, // This should NOT be null, since only the "found" pdf-publications are processed in parquet.
{"name": "date", "type" : {"type": "long", "logicalType": "timestamp-millis"}},
{"name": "mimetype", "type": "string"},
{"name": "size", "type": ["null","string"]},