From 4528d1f9be6927d76dabb154e859e17924a18ef2 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Tue, 20 Dec 2022 18:38:11 +0200 Subject: [PATCH] - Fix the "baseFilesLocation" being null (there was no serious problem, but multiple directories were spawned in the project's directory). - Make sure the given "baseFilesLocation" ends with a file-separator, before using it. - Optimize the process of unzipping-files. --- .../openaire/urls_controller/util/FileUnZipper.java | 12 +++++++----- .../eu/openaire/urls_controller/util/FileUtils.java | 12 +++++++++--- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/main/java/eu/openaire/urls_controller/util/FileUnZipper.java b/src/main/java/eu/openaire/urls_controller/util/FileUnZipper.java index 40d22dc..a179eb6 100644 --- a/src/main/java/eu/openaire/urls_controller/util/FileUnZipper.java +++ b/src/main/java/eu/openaire/urls_controller/util/FileUnZipper.java @@ -19,8 +19,9 @@ public class FileUnZipper { // Iterate over the files in zip and unzip them. ZipEntry zipEntry = zis.getNextEntry(); while ( zipEntry != null ) { - Path targetPath = zipSlipProtect(zipEntry, target); - if ( zipEntry.getName().endsWith(File.separator) ) // If we have a directory. + String zipEntryName = zipEntry.getName(); + Path targetPath = zipSlipProtect(zipEntryName, target); + if ( zipEntryName.endsWith(File.separator) ) // If we have a directory. Files.createDirectories(targetPath); else { // Some zip-files store only the file-paths and not separate directories. We need to create parent directories, e.g data/folder/file.txt @@ -37,13 +38,14 @@ public class FileUnZipper { } } + // Protect from a Zip Slip attack: https://snyk.io/research/zip-slip-vulnerability - public Path zipSlipProtect(ZipEntry zipEntry, Path targetDir) throws IOException { - Path targetDirResolved = targetDir.resolve(zipEntry.getName()); + public Path zipSlipProtect(String zipEntryName, Path targetDir) throws IOException { + Path targetDirResolved = targetDir.resolve(zipEntryName); // Make sure normalized file still has targetDir as its prefix, else throw an exception. Path normalizePath = targetDirResolved.normalize(); if ( !normalizePath.startsWith(targetDir) ) { - throw new IOException("Bad zip entry: " + zipEntry.getName()); + throw new IOException("Bad zip entry: " + zipEntryName); } return normalizePath; } diff --git a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java index 3d40dc3..415968e 100644 --- a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java +++ b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java @@ -50,6 +50,15 @@ public class FileUtils { public enum UploadFullTextsResponse {successful, unsuccessful, databaseError} + public String baseFilesLocation; + + public FileUtils (@Value("${services.pdfaggregation.controller.baseFilesLocation}") String baseFilesLocation) { + if ( !baseFilesLocation.endsWith(File.separator) ) + baseFilesLocation += File.separator; + + this.baseFilesLocation = baseFilesLocation; + } + /** * In each insertion, a new parquet-file is created, so we end up with millions of files. Parquet is great for fast-select, so have to stick with it and merge those files.. @@ -89,9 +98,6 @@ public class FileUtils { } - @Value("${services.pdfaggregation.controller.baseFilesLocation}") - public static String baseFilesLocation; - public static DecimalFormat df = new DecimalFormat("0.00"); // The following regex might be usefull in a future scenario. It extracts the "plain-filename" and "file-ID" and the "file-extension".