- Fix the "baseFilesLocation" being null (there was no serious problem, but multiple directories were spawned in the project's directory).

- Make sure the given "baseFilesLocation" ends with a file-separator, before using it.
- Optimize the process of unzipping-files.
This commit is contained in:
Lampros Smyrnaios 2022-12-20 18:38:11 +02:00
parent e11afe5ab2
commit 4528d1f9be
2 changed files with 16 additions and 8 deletions

View File

@ -19,8 +19,9 @@ public class FileUnZipper {
// Iterate over the files in zip and unzip them.
ZipEntry zipEntry = zis.getNextEntry();
while ( zipEntry != null ) {
Path targetPath = zipSlipProtect(zipEntry, target);
if ( zipEntry.getName().endsWith(File.separator) ) // If we have a directory.
String zipEntryName = zipEntry.getName();
Path targetPath = zipSlipProtect(zipEntryName, target);
if ( zipEntryName.endsWith(File.separator) ) // If we have a directory.
Files.createDirectories(targetPath);
else {
// Some zip-files store only the file-paths and not separate directories. We need to create parent directories, e.g data/folder/file.txt
@ -37,13 +38,14 @@ public class FileUnZipper {
}
}
// Protect from a Zip Slip attack: https://snyk.io/research/zip-slip-vulnerability
public Path zipSlipProtect(ZipEntry zipEntry, Path targetDir) throws IOException {
Path targetDirResolved = targetDir.resolve(zipEntry.getName());
public Path zipSlipProtect(String zipEntryName, Path targetDir) throws IOException {
Path targetDirResolved = targetDir.resolve(zipEntryName);
// Make sure normalized file still has targetDir as its prefix, else throw an exception.
Path normalizePath = targetDirResolved.normalize();
if ( !normalizePath.startsWith(targetDir) ) {
throw new IOException("Bad zip entry: " + zipEntry.getName());
throw new IOException("Bad zip entry: " + zipEntryName);
}
return normalizePath;
}

View File

@ -50,6 +50,15 @@ public class FileUtils {
public enum UploadFullTextsResponse {successful, unsuccessful, databaseError}
public String baseFilesLocation;
public FileUtils (@Value("${services.pdfaggregation.controller.baseFilesLocation}") String baseFilesLocation) {
if ( !baseFilesLocation.endsWith(File.separator) )
baseFilesLocation += File.separator;
this.baseFilesLocation = baseFilesLocation;
}
/**
* In each insertion, a new parquet-file is created, so we end up with millions of files. Parquet is great for fast-select, so have to stick with it and merge those files..
@ -89,9 +98,6 @@ public class FileUtils {
}
@Value("${services.pdfaggregation.controller.baseFilesLocation}")
public static String baseFilesLocation;
public static DecimalFormat df = new DecimalFormat("0.00");
// The following regex might be usefull in a future scenario. It extracts the "plain-filename" and "file-ID" and the "file-extension".