Moved net logic to Downloaded file
This commit is contained in:
parent
7ce2c222e3
commit
90466b296c
|
@ -1,10 +1,9 @@
|
|||
package org.gcube.data.publishing.ckan2zenodo.clients;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.util.Collection;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
import org.gcube.data.publishing.ckan2zenodo.commons.Net;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.DownloadedFile;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.FileDeposition;
|
||||
|
@ -40,64 +39,55 @@ public class UploadFilesCall implements Callable<ZenodoDeposition>{
|
|||
|
||||
log.debug("Removing not referenced files..");
|
||||
|
||||
HashMap<String,DownloadedFile> resourceMap=new HashMap<>();
|
||||
for(CkanResource r:toUpload) {
|
||||
DownloadedFile d=new DownloadedFile(r);
|
||||
resourceMap.put(d.getToUseFileName(),d);
|
||||
}
|
||||
Set<String> alreadyExistingFiles=new HashSet<>();
|
||||
|
||||
for(FileDeposition f:dep.getFiles()) {
|
||||
CkanResource found=null;
|
||||
for(CkanResource r:toUpload)
|
||||
if(r.getName().equals(f.getFilename())) {
|
||||
found=r;
|
||||
break;
|
||||
}
|
||||
if(found==null) // File not present in current toUpload set
|
||||
if(resourceMap.containsKey(f.getFilename())){
|
||||
alreadyExistingFiles.add(f.getFilename());
|
||||
try{
|
||||
// check for update
|
||||
DownloadedFile downloaded = resourceMap.get(f.getFilename());
|
||||
if(!downloaded.getMD5().equals(f.getChecksum())) {
|
||||
log.debug("MD5 differ, going to update : "+downloaded+" - "+f);
|
||||
z.deleteFile(dep, f);
|
||||
z.uploadFile(dep, found.getName(), downloaded.getFile());
|
||||
}
|
||||
}catch (Throwable t){
|
||||
log.warn("Unable to update "+f,t);
|
||||
}
|
||||
}else {
|
||||
try{
|
||||
// remove File not present in current toUpload set
|
||||
log.debug("Remote file "+f+" is not in requested set. Deleting it..");
|
||||
z.deleteFile(dep, f);
|
||||
}catch(Throwable t) {
|
||||
log.warn("Unable to delete "+f,t);
|
||||
}
|
||||
else {
|
||||
// File present, checking for update
|
||||
DownloadedFile downloaded=null;
|
||||
try {
|
||||
log.debug("Found already existing remote file "+f);
|
||||
downloaded=Net.download(found);
|
||||
if(!downloaded.getMD5().equals(f.getChecksum())) {
|
||||
log.debug("MD5 differ, going to update : "+downloaded+" - "+f);
|
||||
z.deleteFile(dep, f);
|
||||
z.uploadFile(dep, found.getName(), downloaded.getF());
|
||||
}
|
||||
}catch(Throwable t) {
|
||||
log.warn("Unable to update "+f,t);
|
||||
}finally {
|
||||
if(downloaded!=null) Files.deleteIfExists(downloaded.getF().toPath());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
log.debug("Going to push additional resources for "+deposition.getTitle()+" ID : "+deposition.getId());
|
||||
|
||||
for(CkanResource r:toUpload) {
|
||||
DownloadedFile downloaded=null;
|
||||
try {
|
||||
boolean found=false;
|
||||
|
||||
for(FileDeposition f:dep.getFiles())
|
||||
if(f.getFilename().equals(r.getName())) {
|
||||
found=true;
|
||||
break;
|
||||
}
|
||||
if(!found) {
|
||||
downloaded=Net.download(r);
|
||||
z.uploadFile(dep, r.getName(),downloaded.getF());
|
||||
}
|
||||
for(Map.Entry<String,DownloadedFile> e : resourceMap.entrySet()){
|
||||
DownloadedFile downloadedFile=e.getValue();
|
||||
if(!alreadyExistingFiles.contains(e.getKey()))
|
||||
try{
|
||||
// Upload new file
|
||||
z.uploadFile(dep,downloadedFile.getToUseFileName(),downloadedFile.getFile());
|
||||
}catch(Throwable t) {
|
||||
log.warn("Unable to upload "+r.getName()+".",t);
|
||||
log.warn("Unable to upload "+downloadedFile.getSource().getName(),t);
|
||||
}
|
||||
}
|
||||
|
||||
return z.readDeposition(dep.getId());
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -1,83 +0,0 @@
|
|||
package org.gcube.data.publishing.ckan2zenodo.commons;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.security.DigestInputStream;
|
||||
import java.security.MessageDigest;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.DownloadedFile;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class Net {
|
||||
|
||||
private static final Pattern FILENAME_IN_DEPOSITION_REGEXP = Pattern.compile("(?<=filename\\=\\\").*(?=\\\")");
|
||||
|
||||
|
||||
public static DownloadedFile download(CkanResource toDownload) throws Exception {
|
||||
String urlString=toDownload.getUrl();
|
||||
log.debug("Downloading "+urlString);
|
||||
//Download locally into temp
|
||||
URL url=new URL(urlString);
|
||||
File temp=File.createTempFile("zenodo_", ".tmp");
|
||||
MessageDigest md = MessageDigest.getInstance("MD5");
|
||||
|
||||
// Multiple tries
|
||||
InputStream is=null;
|
||||
int attempt=0;
|
||||
Exception lastException=null;
|
||||
|
||||
String remoteFileName=null;
|
||||
|
||||
while(is==null&&attempt<5) {
|
||||
try {
|
||||
attempt++;
|
||||
is=url.openStream();
|
||||
|
||||
if(remoteFileName == null)
|
||||
remoteFileName = getFilenameFromURL(url);
|
||||
|
||||
}catch(Exception e) {
|
||||
lastException=e;
|
||||
try{
|
||||
Thread.sleep(500*attempt);
|
||||
}catch(InterruptedException e1) {}
|
||||
}
|
||||
}
|
||||
if(is==null) throw new Exception("Unable to download "+urlString,lastException);
|
||||
if(remoteFileName == null)
|
||||
remoteFileName = ""; // Unable to evaluate from HEAD
|
||||
|
||||
|
||||
DigestInputStream dis = new DigestInputStream(is, md);
|
||||
|
||||
|
||||
// Download
|
||||
long size=Files.copy(is, temp.toPath(),StandardCopyOption.REPLACE_EXISTING);
|
||||
|
||||
return new DownloadedFile(toDownload,temp,dis.getMessageDigest().toString(),remoteFileName);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static final String getFilenameFromURL(URL url) throws IOException {
|
||||
HttpURLConnection con = (HttpURLConnection) url.openConnection();
|
||||
con.setRequestMethod("GET");
|
||||
String contentDisp= con.getHeaderField("Content-Disposition");
|
||||
|
||||
Matcher m = FILENAME_IN_DEPOSITION_REGEXP.matcher(contentDisp);
|
||||
m.find();
|
||||
return m.group(0);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,29 +1,55 @@
|
|||
package org.gcube.data.publishing.ckan2zenodo.model;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.security.DigestInputStream;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import lombok.ToString;
|
||||
import lombok.*;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@AllArgsConstructor
|
||||
@ToString
|
||||
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class DownloadedFile {
|
||||
|
||||
private static final Pattern FILENAME_IN_DEPOSITION_REGEXP = Pattern.compile("(?<=filename\\=\\\").*(?=\\\")");
|
||||
|
||||
@NonNull
|
||||
@Getter
|
||||
private CkanResource source;
|
||||
private File f;
|
||||
private String MD5;
|
||||
private String remoteFileName;
|
||||
//private File f;
|
||||
|
||||
private File f=null;
|
||||
private String MD5=null;
|
||||
private String remoteFileName=null;
|
||||
|
||||
public String getToUseFileName(){
|
||||
@Override
|
||||
public String toString() {
|
||||
return "DownloadedFile{" +
|
||||
"source=" + source +
|
||||
", f=" + f +
|
||||
", MD5='" + MD5 + '\'' +
|
||||
", remoteFileName='" + remoteFileName + '\'' +
|
||||
'}';
|
||||
}
|
||||
|
||||
public String getToUseFileName() throws Exception {
|
||||
if (getExtension(source.getName())!=null){
|
||||
// source contains extension
|
||||
return source.getName();
|
||||
}else {
|
||||
if(remoteFileName == null){
|
||||
initRemoteFileName();
|
||||
}
|
||||
String evaluatedExtension=getExtension(remoteFileName);
|
||||
if(evaluatedExtension!=null)
|
||||
return source.getName()+evaluatedExtension;
|
||||
|
@ -31,6 +57,26 @@ public class DownloadedFile {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
public File getFile() throws Exception {
|
||||
if(f==null){
|
||||
download();
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
public String getMD5() throws Exception {
|
||||
if(MD5==null){
|
||||
download();
|
||||
}
|
||||
return MD5;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static final String getExtension(String filename){
|
||||
int lastIndexOf = filename.lastIndexOf(".");
|
||||
if (lastIndexOf == -1) {
|
||||
|
@ -38,4 +84,84 @@ public class DownloadedFile {
|
|||
}
|
||||
return filename.substring(lastIndexOf);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Actually downlaods the file INIT File, MD5 and remoteFilename.
|
||||
*/
|
||||
private void download() throws Exception {
|
||||
log.info("Downloading {} from {}",source.getName(),source.getUrl());
|
||||
URL url=new URL(source.getUrl());
|
||||
f=File.createTempFile("zenodo_", ".tmp");
|
||||
MessageDigest md = MessageDigest.getInstance("MD5");
|
||||
|
||||
// Multiple tries
|
||||
InputStream is=null;
|
||||
int attempt=0;
|
||||
Exception lastException=null;
|
||||
|
||||
|
||||
while(is==null&&attempt<5) {
|
||||
try {
|
||||
attempt++;
|
||||
is=url.openStream();
|
||||
|
||||
if(remoteFileName == null)
|
||||
remoteFileName = getFilenameFromURL(url);
|
||||
|
||||
}catch(Exception e) {
|
||||
lastException=e;
|
||||
try{
|
||||
Thread.sleep(500*attempt);
|
||||
}catch(InterruptedException e1) {}
|
||||
}
|
||||
}
|
||||
if(is==null) throw new Exception("Unable to download "+source.getUrl(),lastException);
|
||||
|
||||
if(remoteFileName == null)
|
||||
remoteFileName = ""; // Unable to evaluate from HEAD
|
||||
|
||||
|
||||
DigestInputStream dis = new DigestInputStream(is, md);
|
||||
MD5 = dis.getMessageDigest().toString();
|
||||
|
||||
// Download
|
||||
long size= Files.copy(is, f.toPath(), StandardCopyOption.REPLACE_EXISTING);
|
||||
log.info("Received {} bytes for {} ",size,source.getName());
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs HTTP HEAD and INIT remoteFileName
|
||||
*/
|
||||
private void initRemoteFileName() throws Exception {
|
||||
int attempt =0;
|
||||
Exception lastException=null;
|
||||
URL url=new URL(source.getUrl());
|
||||
while(remoteFileName==null&&attempt<5) {
|
||||
try {
|
||||
attempt++;
|
||||
remoteFileName = getFilenameFromURL(url);
|
||||
}catch(Exception e) {
|
||||
lastException=e;
|
||||
try{
|
||||
Thread.sleep(500*attempt);
|
||||
}catch(InterruptedException e1) {}
|
||||
}
|
||||
}
|
||||
if(remoteFileName == null) {
|
||||
remoteFileName = ""; // Unable to evaluate from HEAD
|
||||
log.warn("Unable to get remote file name from {} [resource Name {}]",source.getUrl(),source.getName(),lastException);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static final String getFilenameFromURL(URL url) throws IOException {
|
||||
HttpURLConnection con = (HttpURLConnection) url.openConnection();
|
||||
con.setRequestMethod("GET");
|
||||
String contentDisp= con.getHeaderField("Content-Disposition");
|
||||
|
||||
Matcher m = FILENAME_IN_DEPOSITION_REGEXP.matcher(contentDisp);
|
||||
m.find();
|
||||
return m.group(0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,7 +10,6 @@ import org.gcube.common.resources.gcore.GenericResource;
|
|||
import org.gcube.common.resources.gcore.Resources;
|
||||
import org.gcube.data.publishing.ckan2zenodo.clients.Zenodo;
|
||||
import org.gcube.data.publishing.ckan2zenodo.commons.IS;
|
||||
import org.gcube.data.publishing.ckan2zenodo.commons.Net;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.CkanRelatedIdentifier;
|
||||
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
|
||||
|
@ -88,8 +87,8 @@ public class ZenodoTests {
|
|||
CkanItemDescriptor desc=new CkanItemDescriptor(json);
|
||||
|
||||
for(CkanResource cRes:tran.filterResources(desc)) {
|
||||
DownloadedFile f=Net.download(cRes);
|
||||
FileDeposition file=z.uploadFile(dep, cRes.getName(), f.getF());
|
||||
DownloadedFile f=new DownloadedFile(cRes);
|
||||
FileDeposition file=z.uploadFile(dep, cRes.getName(), f.getFile());
|
||||
System.out.println("Published "+file);
|
||||
}
|
||||
|
||||
|
@ -116,9 +115,9 @@ public class ZenodoTests {
|
|||
CkanItemDescriptor desc=new CkanItemDescriptor(json);
|
||||
|
||||
for(CkanResource cRes:tran.filterResources(desc)) {
|
||||
DownloadedFile f=Net.download(cRes);
|
||||
DownloadedFile f=new DownloadedFile(cRes);
|
||||
|
||||
FileDeposition file=z.uploadFile(dep, cRes.getName(), f.getF());
|
||||
FileDeposition file=z.uploadFile(dep, cRes.getName(), f.getFile());
|
||||
System.out.println("Published "+file);
|
||||
}
|
||||
dep=z.publish(dep);
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
package org.gcube.data.publishing.ckan2zenodo.model;
|
||||
|
||||
import org.gcube.data.publishing.ckan2zenodo.commons.Net;
|
||||
import org.junit.Test;
|
||||
|
||||
import static junit.framework.TestCase.assertTrue;
|
||||
|
@ -17,28 +16,28 @@ public class NetTests {
|
|||
|
||||
// PDF URL
|
||||
res.setUrl("https://data-pre.d4science.net/RgA7");
|
||||
check(Net.download(res),"Deliverable.pdf",true);
|
||||
check(new DownloadedFile(res),"Deliverable.pdf",true);
|
||||
|
||||
|
||||
// Do not use HEAD if extension in resource name
|
||||
res.setName("Deliverable.rtf");
|
||||
check(Net.download(res),res.getName(),true);
|
||||
check(new DownloadedFile(res),res.getName(),true);
|
||||
|
||||
//Check invalid urls i.e. folder url == UNABLE TO GET FILENAME FROM HEAD
|
||||
res.setUrl("http://data-pre.d4science.org/workspace-explorer-app?folderId=UjV1MTJ4K2lvQU5MRE1MT2NCOEVGWDkvMG5SL2dwY3A0QmpWZmdRVEFxR3Njd2cwcUxUQ3BBZzZxa1FhN3JQTQ");
|
||||
// Still should use resource name
|
||||
check(Net.download(res),res.getName(),true);
|
||||
check(new DownloadedFile(res),res.getName(),true);
|
||||
|
||||
//Check invalid urls i.e. folder url == UNABLE TO GET FILENAME FROM HEAD
|
||||
res.setName("Deliverable");
|
||||
//Shouldn't have extension
|
||||
check(Net.download(res),res.getName(),false);
|
||||
check(new DownloadedFile(res),res.getName(),false);
|
||||
}
|
||||
|
||||
|
||||
private static final void check(DownloadedFile f, String expectedFilename, boolean expectExtension){
|
||||
private static final void check(DownloadedFile f, String expectedFilename, boolean expectExtension) throws Exception {
|
||||
System.out.println(f);
|
||||
System.out.println("Resulting filename is : "+f.getToUseFileName());
|
||||
assertTrue(f.getRemoteFileName()!=null);
|
||||
assertTrue(f.getToUseFileName()!=null);
|
||||
if(expectExtension)
|
||||
assertTrue(DownloadedFile.getExtension(f.getToUseFileName())!=null);
|
||||
|
|
Loading…
Reference in New Issue