168 lines
3.9 KiB
Java
168 lines
3.9 KiB
Java
package org.gcube.data.publishing.ckan2zenodo.model;
|
|
|
|
import java.io.File;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.net.HttpURLConnection;
|
|
import java.net.URL;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.StandardCopyOption;
|
|
import java.security.DigestInputStream;
|
|
import java.security.MessageDigest;
|
|
import java.security.NoSuchAlgorithmException;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
|
|
import lombok.*;
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
|
|
|
@RequiredArgsConstructor
|
|
@Slf4j
|
|
public class DownloadedFile {
|
|
|
|
private static final Pattern FILENAME_IN_DEPOSITION_REGEXP = Pattern.compile("(?<=filename\\=\\\").*(?=\\\")");
|
|
|
|
@NonNull
|
|
@Getter
|
|
private CkanResource source;
|
|
//private File f;
|
|
|
|
private File f=null;
|
|
private String MD5=null;
|
|
private String remoteFileName=null;
|
|
|
|
@Override
|
|
public String toString() {
|
|
return "DownloadedFile{" +
|
|
"source=" + source +
|
|
", f=" + f +
|
|
", MD5='" + MD5 + '\'' +
|
|
", remoteFileName='" + remoteFileName + '\'' +
|
|
'}';
|
|
}
|
|
|
|
public String getToUseFileName() throws Exception {
|
|
if (getExtension(source.getName())!=null){
|
|
// source contains extension
|
|
return source.getName();
|
|
}else {
|
|
if(remoteFileName == null){
|
|
initRemoteFileName();
|
|
}
|
|
String evaluatedExtension=getExtension(remoteFileName);
|
|
if(evaluatedExtension!=null)
|
|
return source.getName()+evaluatedExtension;
|
|
else return source.getName(); // No extension
|
|
}
|
|
}
|
|
|
|
|
|
|
|
public File getFile() throws Exception {
|
|
if(f==null){
|
|
download();
|
|
}
|
|
return f;
|
|
}
|
|
|
|
public String getMD5() throws Exception {
|
|
if(MD5==null){
|
|
download();
|
|
}
|
|
return MD5;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static final String getExtension(String filename){
|
|
int lastIndexOf = filename.lastIndexOf(".");
|
|
if (lastIndexOf == -1) {
|
|
return null; // no extension
|
|
}
|
|
return filename.substring(lastIndexOf);
|
|
}
|
|
|
|
|
|
/**
|
|
* Actually downlaods the file INIT File, MD5 and remoteFilename.
|
|
*/
|
|
private void download() throws Exception {
|
|
log.info("Downloading {} from {}",source.getName(),source.getUrl());
|
|
URL url=new URL(source.getUrl());
|
|
f=File.createTempFile("zenodo_", ".tmp");
|
|
MessageDigest md = MessageDigest.getInstance("MD5");
|
|
|
|
// Multiple tries
|
|
InputStream is=null;
|
|
int attempt=0;
|
|
Exception lastException=null;
|
|
|
|
|
|
while(is==null&&attempt<5) {
|
|
try {
|
|
attempt++;
|
|
is=url.openStream();
|
|
|
|
if(remoteFileName == null)
|
|
remoteFileName = getFilenameFromURL(url);
|
|
|
|
}catch(Exception e) {
|
|
lastException=e;
|
|
try{
|
|
Thread.sleep(500*attempt);
|
|
}catch(InterruptedException e1) {}
|
|
}
|
|
}
|
|
if(is==null) throw new Exception("Unable to download "+source.getUrl(),lastException);
|
|
|
|
if(remoteFileName == null)
|
|
remoteFileName = ""; // Unable to evaluate from HEAD
|
|
|
|
|
|
DigestInputStream dis = new DigestInputStream(is, md);
|
|
MD5 = dis.getMessageDigest().toString();
|
|
|
|
// Download
|
|
long size= Files.copy(is, f.toPath(), StandardCopyOption.REPLACE_EXISTING);
|
|
log.info("Received {} bytes for {} ",size,source.getName());
|
|
}
|
|
|
|
/**
|
|
* Performs HTTP HEAD and INIT remoteFileName
|
|
*/
|
|
private void initRemoteFileName() throws Exception {
|
|
int attempt =0;
|
|
Exception lastException=null;
|
|
URL url=new URL(source.getUrl());
|
|
while(remoteFileName==null&&attempt<5) {
|
|
try {
|
|
attempt++;
|
|
remoteFileName = getFilenameFromURL(url);
|
|
}catch(Exception e) {
|
|
lastException=e;
|
|
try{
|
|
Thread.sleep(500*attempt);
|
|
}catch(InterruptedException e1) {}
|
|
}
|
|
}
|
|
if(remoteFileName == null) {
|
|
remoteFileName = ""; // Unable to evaluate from HEAD
|
|
log.warn("Unable to get remote file name from {} [resource Name {}]",source.getUrl(),source.getName(),lastException);
|
|
}
|
|
}
|
|
|
|
|
|
private static final String getFilenameFromURL(URL url) throws IOException {
|
|
HttpURLConnection con = (HttpURLConnection) url.openConnection();
|
|
con.setRequestMethod("GET");
|
|
String contentDisp= con.getHeaderField("Content-Disposition");
|
|
|
|
Matcher m = FILENAME_IN_DEPOSITION_REGEXP.matcher(contentDisp);
|
|
m.find();
|
|
return m.group(0);
|
|
}
|
|
}
|