ckan2zenodo-library/src/main/java/org/gcube/data/publishing/ckan2zenodo/model/DownloadedFile.java

168 lines
3.9 KiB
Java

package org.gcube.data.publishing.ckan2zenodo.model;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.*;
import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor
@Slf4j
public class DownloadedFile {
private static final Pattern FILENAME_IN_DEPOSITION_REGEXP = Pattern.compile("(?<=filename\\=\\\").*(?=\\\")");
@NonNull
@Getter
private CkanResource source;
//private File f;
private File f=null;
private String MD5=null;
private String remoteFileName=null;
@Override
public String toString() {
return "DownloadedFile{" +
"source=" + source +
", f=" + f +
", MD5='" + MD5 + '\'' +
", remoteFileName='" + remoteFileName + '\'' +
'}';
}
public String getToUseFileName() throws Exception {
if (getExtension(source.getName())!=null){
// source contains extension
return source.getName();
}else {
if(remoteFileName == null){
initRemoteFileName();
}
String evaluatedExtension=getExtension(remoteFileName);
if(evaluatedExtension!=null)
return source.getName()+evaluatedExtension;
else return source.getName(); // No extension
}
}
public File getFile() throws Exception {
if(f==null){
download();
}
return f;
}
public String getMD5() throws Exception {
if(MD5==null){
download();
}
return MD5;
}
static final String getExtension(String filename){
int lastIndexOf = filename.lastIndexOf(".");
if (lastIndexOf == -1) {
return null; // no extension
}
return filename.substring(lastIndexOf);
}
/**
* Actually downlaods the file INIT File, MD5 and remoteFilename.
*/
private void download() throws Exception {
log.info("Downloading {} from {}",source.getName(),source.getUrl());
URL url=new URL(source.getUrl());
f=File.createTempFile("zenodo_", ".tmp");
MessageDigest md = MessageDigest.getInstance("MD5");
// Multiple tries
InputStream is=null;
int attempt=0;
Exception lastException=null;
while(is==null&&attempt<5) {
try {
attempt++;
is=url.openStream();
if(remoteFileName == null)
remoteFileName = getFilenameFromURL(url);
}catch(Exception e) {
lastException=e;
try{
Thread.sleep(500*attempt);
}catch(InterruptedException e1) {}
}
}
if(is==null) throw new Exception("Unable to download "+source.getUrl(),lastException);
if(remoteFileName == null)
remoteFileName = ""; // Unable to evaluate from HEAD
DigestInputStream dis = new DigestInputStream(is, md);
MD5 = dis.getMessageDigest().toString();
// Download
long size= Files.copy(is, f.toPath(), StandardCopyOption.REPLACE_EXISTING);
log.info("Received {} bytes for {} ",size,source.getName());
}
/**
* Performs HTTP HEAD and INIT remoteFileName
*/
private void initRemoteFileName() throws Exception {
int attempt =0;
Exception lastException=null;
URL url=new URL(source.getUrl());
while(remoteFileName==null&&attempt<5) {
try {
attempt++;
remoteFileName = getFilenameFromURL(url);
}catch(Exception e) {
lastException=e;
try{
Thread.sleep(500*attempt);
}catch(InterruptedException e1) {}
}
}
if(remoteFileName == null) {
remoteFileName = ""; // Unable to evaluate from HEAD
log.warn("Unable to get remote file name from {} [resource Name {}]",source.getUrl(),source.getName(),lastException);
}
}
private static final String getFilenameFromURL(URL url) throws IOException {
HttpURLConnection con = (HttpURLConnection) url.openConnection();
con.setRequestMethod("GET");
String contentDisp= con.getHeaderField("Content-Disposition");
Matcher m = FILENAME_IN_DEPOSITION_REGEXP.matcher(contentDisp);
m.find();
return m.group(0);
}
}