84 lines
2.2 KiB
Java
84 lines
2.2 KiB
Java
package org.gcube.data.publishing.ckan2zenodo.commons;
|
|
|
|
import java.io.File;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.net.HttpURLConnection;
|
|
import java.net.URL;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.StandardCopyOption;
|
|
import java.security.DigestInputStream;
|
|
import java.security.MessageDigest;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
|
|
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
|
|
import org.gcube.data.publishing.ckan2zenodo.model.DownloadedFile;
|
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
|
@Slf4j
|
|
public class Net {
|
|
|
|
private static final Pattern FILENAME_IN_DEPOSITION_REGEXP = Pattern.compile("(?<=filename\\=\\\").*(?=\\\")");
|
|
|
|
|
|
public static DownloadedFile download(CkanResource toDownload) throws Exception {
|
|
String urlString=toDownload.getUrl();
|
|
log.debug("Downloading "+urlString);
|
|
//Download locally into temp
|
|
URL url=new URL(urlString);
|
|
File temp=File.createTempFile("zenodo_", ".tmp");
|
|
MessageDigest md = MessageDigest.getInstance("MD5");
|
|
|
|
// Multiple tries
|
|
InputStream is=null;
|
|
int attempt=0;
|
|
Exception lastException=null;
|
|
|
|
String remoteFileName=null;
|
|
|
|
while(is==null&&attempt<5) {
|
|
try {
|
|
attempt++;
|
|
is=url.openStream();
|
|
|
|
if(remoteFileName == null)
|
|
remoteFileName = getFilenameFromURL(url);
|
|
|
|
}catch(Exception e) {
|
|
lastException=e;
|
|
try{
|
|
Thread.sleep(500*attempt);
|
|
}catch(InterruptedException e1) {}
|
|
}
|
|
}
|
|
if(is==null) throw new Exception("Unable to download "+urlString,lastException);
|
|
if(remoteFileName == null)
|
|
remoteFileName = ""; // Unable to evaluate from HEAD
|
|
|
|
|
|
DigestInputStream dis = new DigestInputStream(is, md);
|
|
|
|
|
|
// Download
|
|
long size=Files.copy(is, temp.toPath(),StandardCopyOption.REPLACE_EXISTING);
|
|
|
|
return new DownloadedFile(toDownload,temp,dis.getMessageDigest().toString(),remoteFileName);
|
|
|
|
|
|
}
|
|
|
|
|
|
private static final String getFilenameFromURL(URL url) throws IOException {
|
|
HttpURLConnection con = (HttpURLConnection) url.openConnection();
|
|
con.setRequestMethod("GET");
|
|
String contentDisp= con.getHeaderField("Content-Disposition");
|
|
|
|
Matcher m = FILENAME_IN_DEPOSITION_REGEXP.matcher(contentDisp);
|
|
m.find();
|
|
return m.group(0);
|
|
}
|
|
|
|
}
|