forked from lsmyrnaios/UrlsController
- Fix the "baseFilesLocation" being null (there was no serious problem, but multiple directories were spawned in the project's directory).
- Make sure the given "baseFilesLocation" ends with a file-separator, before using it. - Optimize the process of unzipping-files.
This commit is contained in:
parent
e11afe5ab2
commit
4528d1f9be
|
@ -19,8 +19,9 @@ public class FileUnZipper {
|
|||
// Iterate over the files in zip and unzip them.
|
||||
ZipEntry zipEntry = zis.getNextEntry();
|
||||
while ( zipEntry != null ) {
|
||||
Path targetPath = zipSlipProtect(zipEntry, target);
|
||||
if ( zipEntry.getName().endsWith(File.separator) ) // If we have a directory.
|
||||
String zipEntryName = zipEntry.getName();
|
||||
Path targetPath = zipSlipProtect(zipEntryName, target);
|
||||
if ( zipEntryName.endsWith(File.separator) ) // If we have a directory.
|
||||
Files.createDirectories(targetPath);
|
||||
else {
|
||||
// Some zip-files store only the file-paths and not separate directories. We need to create parent directories, e.g data/folder/file.txt
|
||||
|
@ -37,13 +38,14 @@ public class FileUnZipper {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
// Protect from a Zip Slip attack: https://snyk.io/research/zip-slip-vulnerability
|
||||
public Path zipSlipProtect(ZipEntry zipEntry, Path targetDir) throws IOException {
|
||||
Path targetDirResolved = targetDir.resolve(zipEntry.getName());
|
||||
public Path zipSlipProtect(String zipEntryName, Path targetDir) throws IOException {
|
||||
Path targetDirResolved = targetDir.resolve(zipEntryName);
|
||||
// Make sure normalized file still has targetDir as its prefix, else throw an exception.
|
||||
Path normalizePath = targetDirResolved.normalize();
|
||||
if ( !normalizePath.startsWith(targetDir) ) {
|
||||
throw new IOException("Bad zip entry: " + zipEntry.getName());
|
||||
throw new IOException("Bad zip entry: " + zipEntryName);
|
||||
}
|
||||
return normalizePath;
|
||||
}
|
||||
|
|
|
@ -50,6 +50,15 @@ public class FileUtils {
|
|||
|
||||
public enum UploadFullTextsResponse {successful, unsuccessful, databaseError}
|
||||
|
||||
public String baseFilesLocation;
|
||||
|
||||
public FileUtils (@Value("${services.pdfaggregation.controller.baseFilesLocation}") String baseFilesLocation) {
|
||||
if ( !baseFilesLocation.endsWith(File.separator) )
|
||||
baseFilesLocation += File.separator;
|
||||
|
||||
this.baseFilesLocation = baseFilesLocation;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* In each insertion, a new parquet-file is created, so we end up with millions of files. Parquet is great for fast-select, so have to stick with it and merge those files..
|
||||
|
@ -89,9 +98,6 @@ public class FileUtils {
|
|||
}
|
||||
|
||||
|
||||
@Value("${services.pdfaggregation.controller.baseFilesLocation}")
|
||||
public static String baseFilesLocation;
|
||||
|
||||
public static DecimalFormat df = new DecimalFormat("0.00");
|
||||
|
||||
// The following regex might be usefull in a future scenario. It extracts the "plain-filename" and "file-ID" and the "file-extension".
|
||||
|
|
Loading…
Reference in New Issue