- Fix the "baseFilesLocation" being null (there was no serious problem, but multiple directories were spawned in the project's directory).

- Make sure the given "baseFilesLocation" ends with a file-separator, before using it. - Optimize the process of unzipping-files.
2022-12-20 18:38:11 +02:00 · 2022-12-20 18:38:11 +02:00 · 4528d1f9be
parent e11afe5ab2
commit 4528d1f9be
2 changed files with 16 additions and 8 deletions
--- a/src/main/java/eu/openaire/urls_controller/util/FileUnZipper.java
+++ b/src/main/java/eu/openaire/urls_controller/util/FileUnZipper.java
@ -19,8 +19,9 @@ public class FileUnZipper {
            // Iterate over the files in zip and unzip them.
            ZipEntry zipEntry = zis.getNextEntry();
            while ( zipEntry != null ) {
-                Path targetPath = zipSlipProtect(zipEntry, target);
+                String zipEntryName = zipEntry.getName();
-                if ( zipEntry.getName().endsWith(File.separator) )  // If we have a directory.
+                Path targetPath = zipSlipProtect(zipEntryName, target);
                if ( zipEntryName.endsWith(File.separator) )  // If we have a directory.
                    Files.createDirectories(targetPath);
                else {
                    // Some zip-files store only the file-paths and not separate directories. We need to create parent directories, e.g data/folder/file.txt
@ -37,13 +38,14 @@ public class FileUnZipper {
        }
    }
    // Protect from a Zip Slip attack:  https://snyk.io/research/zip-slip-vulnerability
-    public Path zipSlipProtect(ZipEntry zipEntry, Path targetDir) throws IOException {
+    public Path zipSlipProtect(String zipEntryName, Path targetDir) throws IOException {
-        Path targetDirResolved = targetDir.resolve(zipEntry.getName());
+        Path targetDirResolved = targetDir.resolve(zipEntryName);
        // Make sure normalized file still has targetDir as its prefix, else throw an exception.
        Path normalizePath = targetDirResolved.normalize();
        if ( !normalizePath.startsWith(targetDir) ) {
-            throw new IOException("Bad zip entry: " + zipEntry.getName());
+            throw new IOException("Bad zip entry: " + zipEntryName);
        }
        return normalizePath;
    }
--- a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java
+++ b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java
@ -50,6 +50,15 @@ public class FileUtils {
    public enum UploadFullTextsResponse {successful, unsuccessful, databaseError}
    public String baseFilesLocation;
    public FileUtils (@Value("${services.pdfaggregation.controller.baseFilesLocation}") String baseFilesLocation) {
        if ( !baseFilesLocation.endsWith(File.separator) )
            baseFilesLocation += File.separator;
        this.baseFilesLocation = baseFilesLocation;
    }
    /**
     * In each insertion, a new parquet-file is created, so we end up with millions of files. Parquet is great for fast-select, so have to stick with it and merge those files..
@ -89,9 +98,6 @@ public class FileUtils {
    }
    @Value("${services.pdfaggregation.controller.baseFilesLocation}")
    public static String baseFilesLocation;
    public static DecimalFormat df = new DecimalFormat("0.00");
    // The following regex might be usefull in a future scenario. It extracts the "plain-filename" and "file-ID" and the "file-extension".