forked from lsmyrnaios/UrlsController
- Fix the "baseFilesLocation" being null (there was no serious problem, but multiple directories were spawned in the project's directory).
- Make sure the given "baseFilesLocation" ends with a file-separator, before using it. - Optimize the process of unzipping-files.
This commit is contained in:
parent
e11afe5ab2
commit
4528d1f9be
|
@ -19,8 +19,9 @@ public class FileUnZipper {
|
||||||
// Iterate over the files in zip and unzip them.
|
// Iterate over the files in zip and unzip them.
|
||||||
ZipEntry zipEntry = zis.getNextEntry();
|
ZipEntry zipEntry = zis.getNextEntry();
|
||||||
while ( zipEntry != null ) {
|
while ( zipEntry != null ) {
|
||||||
Path targetPath = zipSlipProtect(zipEntry, target);
|
String zipEntryName = zipEntry.getName();
|
||||||
if ( zipEntry.getName().endsWith(File.separator) ) // If we have a directory.
|
Path targetPath = zipSlipProtect(zipEntryName, target);
|
||||||
|
if ( zipEntryName.endsWith(File.separator) ) // If we have a directory.
|
||||||
Files.createDirectories(targetPath);
|
Files.createDirectories(targetPath);
|
||||||
else {
|
else {
|
||||||
// Some zip-files store only the file-paths and not separate directories. We need to create parent directories, e.g data/folder/file.txt
|
// Some zip-files store only the file-paths and not separate directories. We need to create parent directories, e.g data/folder/file.txt
|
||||||
|
@ -37,13 +38,14 @@ public class FileUnZipper {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Protect from a Zip Slip attack: https://snyk.io/research/zip-slip-vulnerability
|
// Protect from a Zip Slip attack: https://snyk.io/research/zip-slip-vulnerability
|
||||||
public Path zipSlipProtect(ZipEntry zipEntry, Path targetDir) throws IOException {
|
public Path zipSlipProtect(String zipEntryName, Path targetDir) throws IOException {
|
||||||
Path targetDirResolved = targetDir.resolve(zipEntry.getName());
|
Path targetDirResolved = targetDir.resolve(zipEntryName);
|
||||||
// Make sure normalized file still has targetDir as its prefix, else throw an exception.
|
// Make sure normalized file still has targetDir as its prefix, else throw an exception.
|
||||||
Path normalizePath = targetDirResolved.normalize();
|
Path normalizePath = targetDirResolved.normalize();
|
||||||
if ( !normalizePath.startsWith(targetDir) ) {
|
if ( !normalizePath.startsWith(targetDir) ) {
|
||||||
throw new IOException("Bad zip entry: " + zipEntry.getName());
|
throw new IOException("Bad zip entry: " + zipEntryName);
|
||||||
}
|
}
|
||||||
return normalizePath;
|
return normalizePath;
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,6 +50,15 @@ public class FileUtils {
|
||||||
|
|
||||||
public enum UploadFullTextsResponse {successful, unsuccessful, databaseError}
|
public enum UploadFullTextsResponse {successful, unsuccessful, databaseError}
|
||||||
|
|
||||||
|
public String baseFilesLocation;
|
||||||
|
|
||||||
|
public FileUtils (@Value("${services.pdfaggregation.controller.baseFilesLocation}") String baseFilesLocation) {
|
||||||
|
if ( !baseFilesLocation.endsWith(File.separator) )
|
||||||
|
baseFilesLocation += File.separator;
|
||||||
|
|
||||||
|
this.baseFilesLocation = baseFilesLocation;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* In each insertion, a new parquet-file is created, so we end up with millions of files. Parquet is great for fast-select, so have to stick with it and merge those files..
|
* In each insertion, a new parquet-file is created, so we end up with millions of files. Parquet is great for fast-select, so have to stick with it and merge those files..
|
||||||
|
@ -89,9 +98,6 @@ public class FileUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Value("${services.pdfaggregation.controller.baseFilesLocation}")
|
|
||||||
public static String baseFilesLocation;
|
|
||||||
|
|
||||||
public static DecimalFormat df = new DecimalFormat("0.00");
|
public static DecimalFormat df = new DecimalFormat("0.00");
|
||||||
|
|
||||||
// The following regex might be usefull in a future scenario. It extracts the "plain-filename" and "file-ID" and the "file-extension".
|
// The following regex might be usefull in a future scenario. It extracts the "plain-filename" and "file-ID" and the "file-extension".
|
||||||
|
|
Loading…
Reference in New Issue