MD5 Management

This commit is contained in:
Fabio Sinibaldi 2020-01-31 14:56:51 +01:00
parent 1fd5ff53e9
commit a17af1d1a4
8 changed files with 173 additions and 109 deletions

View File

@ -70,17 +70,6 @@ public interface Ckan2Zenodo {
*/ */
public Future<ZenodoDeposition> uploadFiles(Set<CkanResource> toUpload,ZenodoDeposition deposition) throws ZenodoException, ConfigurationException; public Future<ZenodoDeposition> uploadFiles(Set<CkanResource> toUpload,ZenodoDeposition deposition) throws ZenodoException, ConfigurationException;
/**
* Uploads @param toUpload resources associating them to given @param deposition
*
* @param toUpload
* @param deposition
* @param policy
* @return
* @throws ZenodoException
* @throws ConfigurationException
*/
public Future<ZenodoDeposition> uploadFiles(Set<CkanResource> toUpload,ZenodoDeposition deposition,UploadPolicy policy) throws ZenodoException, ConfigurationException;
/** /**
* Publishes @param dep, setting/updateing DOI reference into @param toUpdate * Publishes @param dep, setting/updateing DOI reference into @param toUpdate

View File

@ -51,7 +51,7 @@ public class Ckan2ZenodoImpl implements Ckan2Zenodo{
CkanRelatedIdentifier doi=desc.getZenodoDoi(); CkanRelatedIdentifier doi=desc.getZenodoDoi();
if(doi!=null) { if(doi!=null) {
Zenodo z=getZenodo(); Zenodo z=getZenodo();
toUpdate=z.readDeposition(doi.getZenodoId()); toUpdate=z.readDeposition(doi.getZenodoId());
} }
Translator tr=new TransformerManager().getByProfile(desc.getProfile()); Translator tr=new TransformerManager().getByProfile(desc.getProfile());
return tr.transform(desc, toUpdate); return tr.transform(desc, toUpdate);
@ -84,22 +84,16 @@ public class Ckan2ZenodoImpl implements Ckan2Zenodo{
@Override @Override
public Future<ZenodoDeposition> uploadFiles(Set<CkanResource> toUpload, ZenodoDeposition deposition) throws ZenodoException, ConfigurationException { public Future<ZenodoDeposition> uploadFiles(Set<CkanResource> toUpload, ZenodoDeposition deposition) throws ZenodoException, ConfigurationException {
return uploadFiles(toUpload, deposition, UploadPolicy.DELETE_ALL);
}
@Override
public Future<ZenodoDeposition> uploadFiles(Set<CkanResource> toUpload, ZenodoDeposition deposition,
UploadPolicy policy) throws ZenodoException, ConfigurationException {
final Zenodo z=getZenodo(); final Zenodo z=getZenodo();
if(deposition.getSubmitted()) UploadFilesCall call=new UploadFilesCall(toUpload,deposition,z);
deposition=z.newVersion(deposition.getId());
UploadFilesCall call=new UploadFilesCall(policy,toUpload,deposition,z);
return FileUploaderManager.submitForDeposition(call); return FileUploaderManager.submitForDeposition(call);
} }
@Override @Override
public ZenodoDeposition publish(ZenodoDeposition dep, CkanItemDescriptor toUpdate) throws ZenodoException, ConfigurationException, InvalidItemException, MalformedURLException { public ZenodoDeposition publish(ZenodoDeposition dep, CkanItemDescriptor toUpdate) throws ZenodoException, ConfigurationException, InvalidItemException, MalformedURLException {
Zenodo z=getZenodo(); Zenodo z=getZenodo();

View File

@ -1,26 +1,30 @@
package org.gcube.data.publishing.ckan2zenodo.clients; package org.gcube.data.publishing.ckan2zenodo.clients;
import java.nio.file.Files;
import java.util.Collection; import java.util.Collection;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import org.gcube.data.publishing.ckan2zenodo.commons.Net;
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource; import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
import org.gcube.data.publishing.ckan2zenodo.model.UploadPolicy; import org.gcube.data.publishing.ckan2zenodo.model.DownloadedFile;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.FileDeposition; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.FileDeposition;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition; import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class UploadFilesCall implements Callable<ZenodoDeposition>{ public class UploadFilesCall implements Callable<ZenodoDeposition>{
private UploadPolicy policy;
private Collection<CkanResource> toUpload; private Collection<CkanResource> toUpload;
private ZenodoDeposition deposition; private ZenodoDeposition deposition;
private Zenodo z; private Zenodo z;
public UploadFilesCall(UploadPolicy policy, Collection<CkanResource> toUpload, ZenodoDeposition deposition, public UploadFilesCall(Collection<CkanResource> toUpload, ZenodoDeposition deposition,
Zenodo z) { Zenodo z) {
super(); super();
this.policy = policy;
this.toUpload = toUpload; this.toUpload = toUpload;
this.deposition = deposition; this.deposition = deposition;
this.z = z; this.z = z;
@ -31,51 +35,69 @@ public class UploadFilesCall implements Callable<ZenodoDeposition>{
@Override @Override
public ZenodoDeposition call() throws Exception { public ZenodoDeposition call() throws Exception {
ZenodoDeposition dep=deposition; ZenodoDeposition dep=deposition;
if(policy.equals(UploadPolicy.DELETE_ALL)) {
for(FileDeposition f:dep.getFiles()) {
try{
z.deleteFile(dep.getId(), f);
}catch(Throwable t) {
throw new Exception("Unable to delete "+f,t);
}
}
}
log.debug("Starting file transfer for deposition "+deposition.getTitle()+" id : "+deposition.getId());
for(CkanResource r:toUpload) { log.debug("Removing not referenced files..");
try {
switch(policy) { for(FileDeposition f:dep.getFiles()) {
case SKIP_EXISTING : { CkanResource found=null;
boolean found=false; for(CkanResource r:toUpload)
for(FileDeposition f:dep.getFiles()) { if(r.getName().equals(f.getFilename())) {
if(f.getFilename().equals(r.getName())) { found=r;
found=true;
break;
}
}
if(!found) z.uploadFile(dep, r.getName(), r.getUrl());
break; break;
} }
case DELETE_EXISTING : { if(found==null) // File not present in current toUpload set
for(FileDeposition f:dep.getFiles()) { try{
if(f.getFilename().equals(r.getName())) { log.debug("Remote file "+f+" is not in requested set. Deleting it..");
z.deleteFile(dep.getId(), f); z.deleteFile(dep, f);
break; }catch(Throwable t) {
} throw new Exception("Unable to delete "+f,t);
}
// continue with default
} }
default : { else {
z.uploadFile(dep, r.getName(), r.getUrl()); // File present, checking for update
DownloadedFile downloaded=null;
try {
log.debug("Found already existing remote file "+f);
downloaded=Net.download(found);
if(!downloaded.getMD5().equals(f.getChecksum())) {
log.debug("MD5 differ, going to update : "+downloaded+" - "+f);
z.deleteFile(dep, f);
z.uploadFile(dep, found.getName(), downloaded.getF());
}
}catch(Throwable t) {
throw new Exception("Unable to update "+f,t);
}finally {
if(downloaded!=null) Files.deleteIfExists(downloaded.getF().toPath());
} }
}
}
}
log.debug("Going to push additional resources for "+deposition.getTitle()+" ID : "+deposition.getId());
for(CkanResource r:toUpload) {
DownloadedFile downloaded=null;
try {
boolean found=false;
for(FileDeposition f:dep.getFiles())
if(f.getFilename().equals(r.getName())) {
found=true;
break;
}
if(!found) {
downloaded=Net.download(r);
z.uploadFile(dep, r.getName(),downloaded.getF());
}
}catch(Throwable t) { }catch(Throwable t) {
throw new Exception("Unable to upload "+r.getName()+".",t); throw new Exception("Unable to upload "+r.getName()+".",t);
} }
} }
return z.readDeposition(dep.getId()); return z.readDeposition(dep.getId());
} }
} }

View File

@ -131,49 +131,23 @@ public class Zenodo {
return updateMetadata(dep.getId(), dep.getMetadata()); return updateMetadata(dep.getId(), dep.getMetadata());
} }
public FileDeposition uploadFile(ZenodoDeposition dep, String toUploadName,String urlString) throws ZenodoException { public FileDeposition uploadFile(ZenodoDeposition deposition, String toUploadName,File toUpload) throws ZenodoException {
final ZenodoDeposition dep=(deposition.getSubmitted())?newVersion(deposition.getId()):deposition;
Callable<Response> call=new Callable<Response>() { Callable<Response> call=new Callable<Response>() {
@Override @Override
public Response call() throws Exception { public Response call() throws Exception {
File temp=null;
try { try {
log.debug("Downloading "+urlString);
//Download locally into temp
URL url=new URL(urlString);
temp=File.createTempFile("zenodo_", ".tmp");
// Multiple tries
InputStream is=null;
int attempt=0;
Exception lastException=null;
while(is==null&&attempt<5) {
try {
attempt++;
is=url.openStream();
}catch(Exception e) {
lastException=e;
try{
Thread.sleep(500*attempt);
}catch(InterruptedException e1) {}
}
}
if(is==null) throw new Exception("Unable to download "+urlString,lastException);
// Download
long size=Files.copy(is, temp.toPath(),StandardCopyOption.REPLACE_EXISTING);
//upload //upload
FormDataMultiPart multi=new FormDataMultiPart(); FormDataMultiPart multi=new FormDataMultiPart();
FileDataBodyPart fileDataBodyPart = new FileDataBodyPart("file", FileDataBodyPart fileDataBodyPart = new FileDataBodyPart("file",
temp,MediaType.APPLICATION_OCTET_STREAM_TYPE); toUpload,MediaType.APPLICATION_OCTET_STREAM_TYPE);
multi.field("name", toUploadName); multi.field("name", toUploadName);
multi.bodyPart(fileDataBodyPart); multi.bodyPart(fileDataBodyPart);
log.debug("Starting transfer of "+toUploadName+" ("+urlString+") into "+dep.getId()); log.debug("Starting transfer of "+toUploadName+" into "+dep.getId());
Response toReturn=getWebClient().target(credentials.getBaseUrl()). Response toReturn=getWebClient().target(credentials.getBaseUrl()).
path(DEPOSITION_BASE_URL).path(dep.getId()+"").path("files"). path(DEPOSITION_BASE_URL).path(dep.getId()+"").path("files").
queryParam(ACCESS_TOKEN, credentials.getKey()).request(CONTENT_TYPE) queryParam(ACCESS_TOKEN, credentials.getKey()).request(CONTENT_TYPE)
@ -183,17 +157,13 @@ public class Zenodo {
return toReturn; return toReturn;
}catch(Throwable e) { }catch(Throwable e) {
throw new ZenodoException("Unable to transfer file "+toUploadName+" url : "+urlString,e); throw new ZenodoException("Unable to transfer file "+toUploadName,e);
}finally {
//finally delete temp
if(temp!=null) Files.deleteIfExists(temp.toPath());
} }
} }
}; };
log.debug("Submitting request to upload "+urlString+" to Manager"); log.debug("Submitting request to upload "+toUploadName+" to Manager");
Future<Response> resp=FileUploaderManager.submitForResponse(call); Future<Response> resp=FileUploaderManager.submitForResponse(call);
try { try {
@ -209,10 +179,13 @@ public class Zenodo {
} }
public void deleteFile(Integer depositionId,FileDeposition toDelete) throws ZenodoException { public void deleteFile(ZenodoDeposition dep,FileDeposition toDelete) throws ZenodoException {
if(dep.getSubmitted())
dep=newVersion(dep.getId());
Response resp = getWebClient().target(credentials.getBaseUrl()). Response resp = getWebClient().target(credentials.getBaseUrl()).
path(DEPOSITION_BASE_URL). path(DEPOSITION_BASE_URL).
path(depositionId+""). path(dep.getId()+"").
path("files"). path("files").
path(toDelete.getId()). path(toDelete.getId()).
queryParam(ACCESS_TOKEN, credentials.getKey()).request(CONTENT_TYPE) queryParam(ACCESS_TOKEN, credentials.getKey()).request(CONTENT_TYPE)

View File

@ -0,0 +1,56 @@
package org.gcube.data.publishing.ckan2zenodo.commons;
import java.io.File;
import java.io.InputStream;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
import org.gcube.data.publishing.ckan2zenodo.model.DownloadedFile;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class Net {
public static DownloadedFile download(CkanResource toDownload) throws Exception {
String urlString=toDownload.getUrl();
log.debug("Downloading "+urlString);
//Download locally into temp
URL url=new URL(urlString);
File temp=File.createTempFile("zenodo_", ".tmp");
MessageDigest md = MessageDigest.getInstance("MD5");
// Multiple tries
InputStream is=null;
int attempt=0;
Exception lastException=null;
while(is==null&&attempt<5) {
try {
attempt++;
is=url.openStream();
}catch(Exception e) {
lastException=e;
try{
Thread.sleep(500*attempt);
}catch(InterruptedException e1) {}
}
}
if(is==null) throw new Exception("Unable to download "+urlString,lastException);
DigestInputStream dis = new DigestInputStream(is, md);
// Download
long size=Files.copy(is, temp.toPath(),StandardCopyOption.REPLACE_EXISTING);
return new DownloadedFile(toDownload,temp,dis.getMessageDigest().toString());
}
}

View File

@ -0,0 +1,20 @@
package org.gcube.data.publishing.ckan2zenodo.model;
import java.io.File;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.Setter;
import lombok.ToString;
@Getter
@Setter
@AllArgsConstructor
@ToString
public class DownloadedFile {
private CkanResource source;
private File f;
private String MD5;
}

View File

@ -8,8 +8,10 @@ import java.util.Date;
import com.fasterxml.jackson.annotation.JsonFormat; import com.fasterxml.jackson.annotation.JsonFormat;
import lombok.Data; import lombok.Data;
import lombok.extern.slf4j.Slf4j;
@Data @Data
@Slf4j
public class ZenodoDeposition { public class ZenodoDeposition {
@JsonFormat(pattern = Commons.ISO_DATE_PATTERN) @JsonFormat(pattern = Commons.ISO_DATE_PATTERN)
@ -29,9 +31,10 @@ public class ZenodoDeposition {
private String title; private String title;
public URL getDOIUrl() throws MalformedURLException { public URL getDOIUrl() throws MalformedURLException {
try { try {
return new URL(links.getConceptdoi()); return new URL(links.getConceptdoi());
}catch(Throwable t) { }catch(Throwable t) {
log.warn("Returned concept link is broken, forming it from doi..");
return new URL("https://doi.org/"+doi); return new URL("https://doi.org/"+doi);
} }
} }

View File

@ -9,9 +9,11 @@ import org.gcube.common.resources.gcore.Resources;
import org.gcube.data.publishing.ckan2zenodo.Translator; import org.gcube.data.publishing.ckan2zenodo.Translator;
import org.gcube.data.publishing.ckan2zenodo.clients.Zenodo; import org.gcube.data.publishing.ckan2zenodo.clients.Zenodo;
import org.gcube.data.publishing.ckan2zenodo.commons.IS; import org.gcube.data.publishing.ckan2zenodo.commons.IS;
import org.gcube.data.publishing.ckan2zenodo.commons.Net;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor; import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
import org.gcube.data.publishing.ckan2zenodo.model.CkanRelatedIdentifier; import org.gcube.data.publishing.ckan2zenodo.model.CkanRelatedIdentifier;
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource; import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
import org.gcube.data.publishing.ckan2zenodo.model.DownloadedFile;
import org.gcube.data.publishing.ckan2zenodo.model.ZenodoCredentials; import org.gcube.data.publishing.ckan2zenodo.model.ZenodoCredentials;
import org.gcube.data.publishing.ckan2zenodo.model.faults.ConfigurationException; import org.gcube.data.publishing.ckan2zenodo.model.faults.ConfigurationException;
import org.gcube.data.publishing.ckan2zenodo.model.faults.ZenodoException; import org.gcube.data.publishing.ckan2zenodo.model.faults.ZenodoException;
@ -47,7 +49,7 @@ public class ZenodoTests {
Zenodo z=new Zenodo(credentials); Zenodo z=new Zenodo(credentials);
System.out.println(z.readDeposition(426312)); System.out.println(z.readDeposition(472904));
} }
@ -84,7 +86,8 @@ public class ZenodoTests {
CkanItemDescriptor desc=new CkanItemDescriptor(json); CkanItemDescriptor desc=new CkanItemDescriptor(json);
for(CkanResource cRes:tran.filterResources(desc)) { for(CkanResource cRes:tran.filterResources(desc)) {
FileDeposition file=z.uploadFile(dep, cRes.getName(), cRes.getUrl()); DownloadedFile f=Net.download(cRes);
FileDeposition file=z.uploadFile(dep, cRes.getName(), f.getF());
System.out.println("Published "+file); System.out.println("Published "+file);
} }
@ -111,10 +114,14 @@ public class ZenodoTests {
CkanItemDescriptor desc=new CkanItemDescriptor(json); CkanItemDescriptor desc=new CkanItemDescriptor(json);
for(CkanResource cRes:tran.filterResources(desc)) { for(CkanResource cRes:tran.filterResources(desc)) {
FileDeposition file=z.uploadFile(dep, cRes.getName(), cRes.getUrl()); DownloadedFile f=Net.download(cRes);
FileDeposition file=z.uploadFile(dep, cRes.getName(), f.getF());
System.out.println("Published "+file); System.out.println("Published "+file);
} }
dep=z.publish(dep); dep=z.publish(dep);
Assert.assertTrue("Invalid deposition state after publishing ",dep.getState().equals("done"));
Assert.assertTrue("Invalid submitted state after publishing", dep.getSubmitted());
desc.setZenodoDoi(CkanRelatedIdentifier.getZenodo(dep.getDOIUrl())); desc.setZenodoDoi(CkanRelatedIdentifier.getZenodo(dep.getDOIUrl()));