- Change the parquet compression from "Snappy" to "Gzip", as there is an unhandleable exception when the app is running inside a Docker Container and uses the "Snappy" compression.
- Code polishing.
This commit is contained in:
parent
c8baf5a5fc
commit
0209d24068
|
@ -48,7 +48,7 @@ if [[ justInstall -eq 0 ]]; then
|
||||||
gradle clean build
|
gradle clean build
|
||||||
|
|
||||||
if [[ shouldRunInDocker -eq 1 ]]; then
|
if [[ shouldRunInDocker -eq 1 ]]; then
|
||||||
echo "Give the username for the Docker Hub:"
|
echo -e "\nGive the username for the Docker Hub:"
|
||||||
read -r username
|
read -r username
|
||||||
echo -e "\nBuilding docker image..\n"
|
echo -e "\nBuilding docker image..\n"
|
||||||
sudo docker --version || handle_error "Docker was not found!" 3
|
sudo docker --version || handle_error "Docker was not found!" 3
|
||||||
|
|
|
@ -8,7 +8,6 @@ import eu.openaire.urls_controller.payloads.requests.WorkerReport;
|
||||||
import eu.openaire.urls_controller.payloads.responces.AssignmentsResponse;
|
import eu.openaire.urls_controller.payloads.responces.AssignmentsResponse;
|
||||||
import eu.openaire.urls_controller.util.FileUtils;
|
import eu.openaire.urls_controller.util.FileUtils;
|
||||||
import eu.openaire.urls_controller.util.ParquetFileUtils;
|
import eu.openaire.urls_controller.util.ParquetFileUtils;
|
||||||
import org.apache.commons.io.FileDeleteStrategy;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
@ -20,7 +19,6 @@ import org.springframework.web.bind.annotation.*;
|
||||||
|
|
||||||
import javax.servlet.http.HttpServletRequest;
|
import javax.servlet.http.HttpServletRequest;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.sql.Connection;
|
import java.sql.Connection;
|
||||||
|
@ -299,16 +297,12 @@ public class UrlController {
|
||||||
logger.error(errorMsg, e);
|
logger.error(errorMsg, e);
|
||||||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
|
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
|
||||||
} finally {
|
} finally {
|
||||||
try {
|
logger.debug("Deleting directory: " + currentParquetPath);
|
||||||
logger.debug("Deleting directory: " + currentParquetPath);
|
fileUtils.deleteDirectory(new File(currentParquetPath));
|
||||||
FileDeleteStrategy.FORCE.delete(new File(currentParquetPath));
|
|
||||||
} catch (IOException e) {
|
|
||||||
logger.error("", e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.debug("Going to merge the parquet files for the tables which were altered.");
|
logger.debug("Going to merge the parquet files for the tables which were altered.");
|
||||||
// When the uploaded parquet files are "loaded" into the tables, ther are actually moved into the directory which contains the data of the table.
|
// When the uploaded parquet files are "loaded" into the tables, they are actually moved into the directory which contains the data of the table.
|
||||||
|
|
||||||
String mergeErrorMsg;
|
String mergeErrorMsg;
|
||||||
|
|
||||||
|
|
|
@ -314,7 +314,7 @@ public class ParquetFileUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
try (ParquetWriter<GenericRecord> writer = AvroParquetWriter.<GenericRecord>builder(outputFile).withSchema(schema)
|
try (ParquetWriter<GenericRecord> writer = AvroParquetWriter.<GenericRecord>builder(outputFile).withSchema(schema)
|
||||||
.withCompressionCodec(CompressionCodecName.SNAPPY).build())
|
.withCompressionCodec(CompressionCodecName.GZIP).build())
|
||||||
{
|
{
|
||||||
//logger.debug("Going to write to \"" + fullFilePath + "\" the record list: " + recordList); // DEBUG!
|
//logger.debug("Going to write to \"" + fullFilePath + "\" the record list: " + recordList); // DEBUG!
|
||||||
for ( GenericRecord record : recordList ) {
|
for ( GenericRecord record : recordList ) {
|
||||||
|
@ -322,7 +322,7 @@ public class ParquetFileUtils {
|
||||||
writer.write(record);
|
writer.write(record);
|
||||||
}
|
}
|
||||||
} catch (Throwable e) { // The simple "Exception" may not be thrown here, but an "Error" may be thrown. "Throwable" catches EVERYTHING!
|
} catch (Throwable e) { // The simple "Exception" may not be thrown here, but an "Error" may be thrown. "Throwable" catches EVERYTHING!
|
||||||
logger.error("Problem when creating the \"ParquetWriter\" object or when writing a record with it!", e);
|
logger.error("Problem when creating the \"ParquetWriter\" object or when writing the records with it!", e);
|
||||||
// At some point, I got an "NoSuchMethodError", because of a problem in the AvroSchema file: (java.lang.NoSuchMethodError: org.apache.avro.Schema.getLogicalType()Lorg/apache/avro/LogicalType;).
|
// At some point, I got an "NoSuchMethodError", because of a problem in the AvroSchema file: (java.lang.NoSuchMethodError: org.apache.avro.Schema.getLogicalType()Lorg/apache/avro/LogicalType;).
|
||||||
// The error was with the schema: {"name": "date", "type" : ["null", {"type" : "long", "logicalType" : "timestamp-millis"}]},
|
// The error was with the schema: {"name": "date", "type" : ["null", {"type" : "long", "logicalType" : "timestamp-millis"}]},
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -18,7 +18,7 @@ public class S3ObjectStore {
|
||||||
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(S3ObjectStore.class);
|
private static final Logger logger = LoggerFactory.getLogger(S3ObjectStore.class);
|
||||||
|
|
||||||
private String s3Protocol = "s3://";
|
private final String s3Protocol = "s3://";
|
||||||
@Value("${services.pdfaggregation.controller.s3.endpoint}")
|
@Value("${services.pdfaggregation.controller.s3.endpoint}")
|
||||||
private String endpoint = null; // This is useful to be "public", to test file-locations.
|
private String endpoint = null; // This is useful to be "public", to test file-locations.
|
||||||
@Value("${services.pdfaggregation.controller.s3.accessKey}")
|
@Value("${services.pdfaggregation.controller.s3.accessKey}")
|
||||||
|
|
Loading…
Reference in New Issue