ckan2zenodo-library/src/main/java/org/gcube/data/publishing/ckan2zenodo/commons/Net.java

84 lines
2.2 KiB
Java

package org.gcube.data.publishing.ckan2zenodo.commons;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
import org.gcube.data.publishing.ckan2zenodo.model.DownloadedFile;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class Net {
private static final Pattern FILENAME_IN_DEPOSITION_REGEXP = Pattern.compile("(?<=filename\\=\\\").*(?=\\\")");
public static DownloadedFile download(CkanResource toDownload) throws Exception {
String urlString=toDownload.getUrl();
log.debug("Downloading "+urlString);
//Download locally into temp
URL url=new URL(urlString);
File temp=File.createTempFile("zenodo_", ".tmp");
MessageDigest md = MessageDigest.getInstance("MD5");
// Multiple tries
InputStream is=null;
int attempt=0;
Exception lastException=null;
String remoteFileName=null;
while(is==null&&attempt<5) {
try {
attempt++;
is=url.openStream();
if(remoteFileName == null)
remoteFileName = getFilenameFromURL(url);
}catch(Exception e) {
lastException=e;
try{
Thread.sleep(500*attempt);
}catch(InterruptedException e1) {}
}
}
if(is==null) throw new Exception("Unable to download "+urlString,lastException);
if(remoteFileName == null)
remoteFileName = ""; // Unable to evaluate from HEAD
DigestInputStream dis = new DigestInputStream(is, md);
// Download
long size=Files.copy(is, temp.toPath(),StandardCopyOption.REPLACE_EXISTING);
return new DownloadedFile(toDownload,temp,dis.getMessageDigest().toString(),remoteFileName);
}
private static final String getFilenameFromURL(URL url) throws IOException {
HttpURLConnection con = (HttpURLConnection) url.openConnection();
con.setRequestMethod("GET");
String contentDisp= con.getHeaderField("Content-Disposition");
Matcher m = FILENAME_IN_DEPOSITION_REGEXP.matcher(contentDisp);
m.find();
return m.group(0);
}
}