package org.gcube.data.publishing.ckan2zenodo.model; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import java.nio.file.Files; import java.nio.file.StandardCopyOption; import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.regex.Matcher; import java.util.regex.Pattern; import lombok.*; import lombok.extern.slf4j.Slf4j; @RequiredArgsConstructor @Slf4j public class DownloadedFile { private static final Pattern FILENAME_IN_DEPOSITION_REGEXP = Pattern.compile("(?<=filename\\=\\\").*(?=\\\")"); @NonNull @Getter private CkanResource source; //private File f; private File f=null; private String MD5=null; private String remoteFileName=null; @Override public String toString() { return "DownloadedFile{" + "source=" + source + ", f=" + f + ", MD5='" + MD5 + '\'' + ", remoteFileName='" + remoteFileName + '\'' + '}'; } public String getToUseFileName() throws Exception { if (getExtension(source.getName())!=null){ // source contains extension return source.getName(); }else { if(remoteFileName == null){ initRemoteFileName(); } String evaluatedExtension=getExtension(remoteFileName); if(evaluatedExtension!=null) return source.getName()+evaluatedExtension; else return source.getName(); // No extension } } public File getFile() throws Exception { if(f==null){ download(); } return f; } public String getMD5() throws Exception { if(MD5==null){ download(); } return MD5; } static final String getExtension(String filename){ int lastIndexOf = filename.lastIndexOf("."); if (lastIndexOf == -1) { return null; // no extension } return filename.substring(lastIndexOf); } /** * Actually downlaods the file INIT File, MD5 and remoteFilename. */ private void download() throws Exception { log.info("Downloading {} from {}",source.getName(),source.getUrl()); URL url=new URL(source.getUrl()); f=File.createTempFile("zenodo_", ".tmp"); MessageDigest md = MessageDigest.getInstance("MD5"); // Multiple tries InputStream is=null; int attempt=0; Exception lastException=null; while(is==null&&attempt<5) { try { attempt++; is=url.openStream(); if(remoteFileName == null) remoteFileName = getFilenameFromURL(url); }catch(Exception e) { lastException=e; try{ Thread.sleep(500*attempt); }catch(InterruptedException e1) {} } } if(is==null) throw new Exception("Unable to download "+source.getUrl(),lastException); if(remoteFileName == null) remoteFileName = ""; // Unable to evaluate from HEAD DigestInputStream dis = new DigestInputStream(is, md); MD5 = dis.getMessageDigest().toString(); // Download long size= Files.copy(is, f.toPath(), StandardCopyOption.REPLACE_EXISTING); log.info("Received {} bytes for {} ",size,source.getName()); } /** * Performs HTTP HEAD and INIT remoteFileName */ private void initRemoteFileName() throws Exception { int attempt =0; Exception lastException=null; URL url=new URL(source.getUrl()); while(remoteFileName==null&&attempt<5) { try { attempt++; remoteFileName = getFilenameFromURL(url); }catch(Exception e) { lastException=e; try{ Thread.sleep(500*attempt); }catch(InterruptedException e1) {} } } if(remoteFileName == null) { remoteFileName = ""; // Unable to evaluate from HEAD log.warn("Unable to get remote file name from {} [resource Name {}]",source.getUrl(),source.getName(),lastException); } } private static final String getFilenameFromURL(URL url) throws IOException { HttpURLConnection con = (HttpURLConnection) url.openConnection(); con.setRequestMethod("GET"); String contentDisp= con.getHeaderField("Content-Disposition"); Matcher m = FILENAME_IN_DEPOSITION_REGEXP.matcher(contentDisp); m.find(); return m.group(0); } }