Merge pull request '22889' (!4) from 22889 into master

Reviewed-on: #4
This commit is contained in:
Fabio Sinibaldi 2023-03-28 12:35:24 +02:00
commit 6c2588eba0
18 changed files with 262 additions and 148 deletions

View File

@ -1,6 +1,11 @@
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
# Changelog for org.gcube.data.publishing.ckan2zenodo-library
## [v1.0.3] 2023-03-28
- Extensions evaluated from URL [#22889](https://support.d4science.org/issues/22889)
## [v1.0.2] 2021-07-30
- Introduced environemnt check [#19990](https://support.d4science.org/issues/19990)

View File

@ -8,7 +8,7 @@
</parent>
<groupId>org.gcube.data.publishing</groupId>
<artifactId>ckan2zenodo-library</artifactId>
<version>1.0.2</version>
<version>1.0.3</version>
<name>CKAN 2 Zenodo Library</name>
<description>Library to publish d4science CKAN items into Zenodo</description>
@ -31,7 +31,7 @@
<dependency>
<groupId>org.gcube.distribution</groupId>
<artifactId>gcube-bom</artifactId>
<version>2.0.1</version>
<version>2.1.0</version>
<type>pom</type>
<scope>import</scope>
</dependency>
@ -59,6 +59,7 @@
<version>1.14.8</version>
</dependency>
<dependency>
<groupId>org.glassfish.jersey.media</groupId>
<artifactId>jersey-media-json-jackson</artifactId>

View File

@ -35,8 +35,7 @@ public class TransformerManager {
else return new Translator(m);
}
}
throw new ConfigurationException("No specific mapping for the catalogue item has been configured. "
+ "By continuing with the upload some metadata might not be upload to Zenodo.");
return new Translator();
}

View File

@ -1,10 +1,9 @@
package org.gcube.data.publishing.ckan2zenodo.clients;
import java.nio.file.Files;
import java.util.Collection;
import java.util.*;
import java.util.concurrent.Callable;
import org.gcube.data.publishing.ckan2zenodo.commons.Net;
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
import org.gcube.data.publishing.ckan2zenodo.model.DownloadedFile;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.FileDeposition;
@ -40,64 +39,55 @@ public class UploadFilesCall implements Callable<ZenodoDeposition>{
log.debug("Removing not referenced files..");
HashMap<String,DownloadedFile> resourceMap=new HashMap<>();
for(CkanResource r:toUpload) {
DownloadedFile d=new DownloadedFile(r);
resourceMap.put(d.getToUseFileName(),d);
}
Set<String> alreadyExistingFiles=new HashSet<>();
for(FileDeposition f:dep.getFiles()) {
CkanResource found=null;
for(CkanResource r:toUpload)
if(r.getName().equals(f.getFilename())) {
found=r;
break;
}
if(found==null) // File not present in current toUpload set
if(resourceMap.containsKey(f.getFilename())){
alreadyExistingFiles.add(f.getFilename());
try{
// check for update
DownloadedFile downloaded = resourceMap.get(f.getFilename());
if(!downloaded.getMD5().equals(f.getChecksum())) {
log.debug("MD5 differ, going to update : "+downloaded+" - "+f);
z.deleteFile(dep, f);
z.uploadFile(dep, found.getName(), downloaded.getFile());
}
}catch (Throwable t){
log.warn("Unable to update "+f,t);
}
}else {
try{
// remove File not present in current toUpload set
log.debug("Remote file "+f+" is not in requested set. Deleting it..");
z.deleteFile(dep, f);
}catch(Throwable t) {
log.warn("Unable to delete "+f,t);
}
else {
// File present, checking for update
DownloadedFile downloaded=null;
try {
log.debug("Found already existing remote file "+f);
downloaded=Net.download(found);
if(!downloaded.getMD5().equals(f.getChecksum())) {
log.debug("MD5 differ, going to update : "+downloaded+" - "+f);
z.deleteFile(dep, f);
z.uploadFile(dep, found.getName(), downloaded.getF());
}
}catch(Throwable t) {
log.warn("Unable to update "+f,t);
}finally {
if(downloaded!=null) Files.deleteIfExists(downloaded.getF().toPath());
}
}
}
log.debug("Going to push additional resources for "+deposition.getTitle()+" ID : "+deposition.getId());
for(CkanResource r:toUpload) {
DownloadedFile downloaded=null;
try {
boolean found=false;
for(FileDeposition f:dep.getFiles())
if(f.getFilename().equals(r.getName())) {
found=true;
break;
}
if(!found) {
downloaded=Net.download(r);
z.uploadFile(dep, r.getName(),downloaded.getF());
}
for(Map.Entry<String,DownloadedFile> e : resourceMap.entrySet()){
DownloadedFile downloadedFile=e.getValue();
if(!alreadyExistingFiles.contains(e.getKey()))
try{
// Upload new file
z.uploadFile(dep,downloadedFile.getToUseFileName(),downloadedFile.getFile());
}catch(Throwable t) {
log.warn("Unable to upload "+r.getName()+".",t);
log.warn("Unable to upload "+downloadedFile.getSource().getName(),t);
}
}
return z.readDeposition(dep.getId());
}
}

View File

@ -129,8 +129,9 @@ public class Zenodo {
}
public FileDeposition uploadFile(ZenodoDeposition deposition, String toUploadName,File toUpload) throws ZenodoException {
final ZenodoDeposition dep=(deposition.getSubmitted())?newVersion(deposition.getId()):deposition;
log.info("Pushing File {} to Deposition {}",toUploadName,dep);
Callable<Response> call=new Callable<Response>() {
@Override

View File

@ -1,56 +0,0 @@
package org.gcube.data.publishing.ckan2zenodo.commons;
import java.io.File;
import java.io.InputStream;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
import org.gcube.data.publishing.ckan2zenodo.model.DownloadedFile;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class Net {
public static DownloadedFile download(CkanResource toDownload) throws Exception {
String urlString=toDownload.getUrl();
log.debug("Downloading "+urlString);
//Download locally into temp
URL url=new URL(urlString);
File temp=File.createTempFile("zenodo_", ".tmp");
MessageDigest md = MessageDigest.getInstance("MD5");
// Multiple tries
InputStream is=null;
int attempt=0;
Exception lastException=null;
while(is==null&&attempt<5) {
try {
attempt++;
is=url.openStream();
}catch(Exception e) {
lastException=e;
try{
Thread.sleep(500*attempt);
}catch(InterruptedException e1) {}
}
}
if(is==null) throw new Exception("Unable to download "+urlString,lastException);
DigestInputStream dis = new DigestInputStream(is, md);
// Download
long size=Files.copy(is, temp.toPath(),StandardCopyOption.REPLACE_EXISTING);
return new DownloadedFile(toDownload,temp,dis.getMessageDigest().toString());
}
}

View File

@ -1,20 +1,167 @@
package org.gcube.data.publishing.ckan2zenodo.model;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.Setter;
import lombok.ToString;
import lombok.*;
import lombok.extern.slf4j.Slf4j;
@Getter
@Setter
@AllArgsConstructor
@ToString
@RequiredArgsConstructor
@Slf4j
public class DownloadedFile {
private static final Pattern FILENAME_IN_DEPOSITION_REGEXP = Pattern.compile("(?<=filename\\=\\\").*(?=\\\")");
@NonNull
@Getter
private CkanResource source;
private File f;
private String MD5;
//private File f;
private File f=null;
private String MD5=null;
private String remoteFileName=null;
@Override
public String toString() {
return "DownloadedFile{" +
"source=" + source +
", f=" + f +
", MD5='" + MD5 + '\'' +
", remoteFileName='" + remoteFileName + '\'' +
'}';
}
public String getToUseFileName() throws Exception {
if (getExtension(source.getName())!=null){
// source contains extension
return source.getName();
}else {
if(remoteFileName == null){
initRemoteFileName();
}
String evaluatedExtension=getExtension(remoteFileName);
if(evaluatedExtension!=null)
return source.getName()+evaluatedExtension;
else return source.getName(); // No extension
}
}
public File getFile() throws Exception {
if(f==null){
download();
}
return f;
}
public String getMD5() throws Exception {
if(MD5==null){
download();
}
return MD5;
}
static final String getExtension(String filename){
int lastIndexOf = filename.lastIndexOf(".");
if (lastIndexOf == -1) {
return null; // no extension
}
return filename.substring(lastIndexOf);
}
/**
* Actually downlaods the file INIT File, MD5 and remoteFilename.
*/
private void download() throws Exception {
log.info("Downloading {} from {}",source.getName(),source.getUrl());
URL url=new URL(source.getUrl());
f=File.createTempFile("zenodo_", ".tmp");
MessageDigest md = MessageDigest.getInstance("MD5");
// Multiple tries
InputStream is=null;
int attempt=0;
Exception lastException=null;
while(is==null&&attempt<5) {
try {
attempt++;
is=url.openStream();
if(remoteFileName == null)
remoteFileName = getFilenameFromURL(url);
}catch(Exception e) {
lastException=e;
try{
Thread.sleep(500*attempt);
}catch(InterruptedException e1) {}
}
}
if(is==null) throw new Exception("Unable to download "+source.getUrl(),lastException);
if(remoteFileName == null)
remoteFileName = ""; // Unable to evaluate from HEAD
DigestInputStream dis = new DigestInputStream(is, md);
MD5 = dis.getMessageDigest().toString();
// Download
long size= Files.copy(is, f.toPath(), StandardCopyOption.REPLACE_EXISTING);
log.info("Received {} bytes for {} ",size,source.getName());
}
/**
* Performs HTTP HEAD and INIT remoteFileName
*/
private void initRemoteFileName() throws Exception {
int attempt =0;
Exception lastException=null;
URL url=new URL(source.getUrl());
while(remoteFileName==null&&attempt<5) {
try {
attempt++;
remoteFileName = getFilenameFromURL(url);
}catch(Exception e) {
lastException=e;
try{
Thread.sleep(500*attempt);
}catch(InterruptedException e1) {}
}
}
if(remoteFileName == null) {
remoteFileName = ""; // Unable to evaluate from HEAD
log.warn("Unable to get remote file name from {} [resource Name {}]",source.getUrl(),source.getName(),lastException);
}
}
private static final String getFilenameFromURL(URL url) throws IOException {
HttpURLConnection con = (HttpURLConnection) url.openConnection();
con.setRequestMethod("GET");
String contentDisp= con.getHeaderField("Content-Disposition");
Matcher m = FILENAME_IN_DEPOSITION_REGEXP.matcher(contentDisp);
m.find();
return m.group(0);
}
}

View File

@ -1,7 +1,5 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import org.gcube.data.publishing.ckan2zenodo.Ckan2Zenodo;
import org.gcube.data.publishing.ckan2zenodo.Ckan2ZenodoImpl;
import org.gcube.data.publishing.ckan2zenodo.model.faults.*;
import org.gcube.data.publishing.ckan2zenodo.model.report.EnvironmentReport;
import org.junit.Assume;

View File

@ -1,4 +1,4 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import java.net.MalformedURLException;

View File

@ -1,4 +1,4 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import org.gcube.data.publishing.ckan2zenodo.clients.Zenodo;
import org.gcube.data.publishing.ckan2zenodo.model.ZenodoCredentials;

View File

@ -1,4 +1,4 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import java.net.MalformedURLException;
import java.util.ArrayList;
@ -7,8 +7,6 @@ import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import org.gcube.data.publishing.ckan2zenodo.Ckan2Zenodo;
import org.gcube.data.publishing.ckan2zenodo.Ckan2ZenodoImpl;
import org.gcube.data.publishing.ckan2zenodo.clients.GCat;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;

View File

@ -1,26 +1,19 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import org.gcube.data.publishing.ckan2zenodo.Fixer;
import org.gcube.data.publishing.ckan2zenodo.commons.Parsing;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
import org.gcube.data.publishing.ckan2zenodo.model.CkanRelatedIdentifier;
import org.gcube.data.publishing.ckan2zenodo.model.faults.ConfigurationException;
import org.gcube.data.publishing.ckan2zenodo.model.faults.InvalidItemException;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.RelatedIdentifier;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.ObjectMapper;

View File

@ -1,14 +1,10 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import org.gcube.common.resources.gcore.GenericResource;
import org.gcube.common.resources.gcore.Resources;
import org.gcube.data.publishing.ckan2zenodo.Translator;
import org.gcube.data.publishing.ckan2zenodo.commons.IS;
import org.gcube.data.publishing.ckan2zenodo.model.faults.ConfigurationException;
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Mappings;
import org.junit.Test;

View File

@ -1,4 +1,4 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import java.io.IOException;
import java.io.PrintStream;
@ -7,8 +7,6 @@ import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import org.gcube.data.publishing.ckan2zenodo.Fixer;
import org.gcube.data.publishing.ckan2zenodo.Translator;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
import org.gcube.data.publishing.ckan2zenodo.model.ZenodoCredentials;

View File

@ -1,4 +1,4 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import java.util.Properties;

View File

@ -1,4 +1,4 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import static org.junit.Assert.assertTrue;
@ -9,7 +9,6 @@ import java.util.Map.Entry;
import org.gcube.common.resources.gcore.GenericResource;
import org.gcube.common.resources.gcore.Resources;
import org.gcube.data.publishing.ckan2zenodo.Translator;
import org.gcube.data.publishing.ckan2zenodo.commons.IS;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
@ -17,7 +16,6 @@ import org.gcube.data.publishing.ckan2zenodo.model.faults.ConfigurationException
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Filter;
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Mapping;
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Mappings;
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Regexp;
import org.junit.BeforeClass;
import org.junit.Test;

View File

@ -1,4 +1,4 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import java.io.IOException;
import java.util.Arrays;
@ -8,10 +8,8 @@ import java.util.Map.Entry;
import org.gcube.common.resources.gcore.GenericResource;
import org.gcube.common.resources.gcore.Resources;
import org.gcube.data.publishing.ckan2zenodo.Translator;
import org.gcube.data.publishing.ckan2zenodo.clients.Zenodo;
import org.gcube.data.publishing.ckan2zenodo.commons.IS;
import org.gcube.data.publishing.ckan2zenodo.commons.Net;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
import org.gcube.data.publishing.ckan2zenodo.model.CkanRelatedIdentifier;
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
@ -89,8 +87,8 @@ public class ZenodoTests {
CkanItemDescriptor desc=new CkanItemDescriptor(json);
for(CkanResource cRes:tran.filterResources(desc)) {
DownloadedFile f=Net.download(cRes);
FileDeposition file=z.uploadFile(dep, cRes.getName(), f.getF());
DownloadedFile f=new DownloadedFile(cRes);
FileDeposition file=z.uploadFile(dep, cRes.getName(), f.getFile());
System.out.println("Published "+file);
}
@ -117,9 +115,9 @@ public class ZenodoTests {
CkanItemDescriptor desc=new CkanItemDescriptor(json);
for(CkanResource cRes:tran.filterResources(desc)) {
DownloadedFile f=Net.download(cRes);
DownloadedFile f=new DownloadedFile(cRes);
FileDeposition file=z.uploadFile(dep, cRes.getName(), f.getF());
FileDeposition file=z.uploadFile(dep, cRes.getName(), f.getFile());
System.out.println("Published "+file);
}
dep=z.publish(dep);

View File

@ -0,0 +1,48 @@
package org.gcube.data.publishing.ckan2zenodo.model;
import org.junit.Test;
import static junit.framework.TestCase.assertTrue;
public class NetTests {
@Test
public void testfileNames() throws Exception {
CkanResource res=new CkanResource();
res.setName("Deliverable");
res.setDescription("My description");
res.setId("resource_id");
// PDF URL
res.setUrl("https://data-pre.d4science.net/RgA7");
check(new DownloadedFile(res),"Deliverable.pdf",true);
// Do not use HEAD if extension in resource name
res.setName("Deliverable.rtf");
check(new DownloadedFile(res),res.getName(),true);
//Check invalid urls i.e. folder url == UNABLE TO GET FILENAME FROM HEAD
res.setUrl("http://data-pre.d4science.org/workspace-explorer-app?folderId=UjV1MTJ4K2lvQU5MRE1MT2NCOEVGWDkvMG5SL2dwY3A0QmpWZmdRVEFxR3Njd2cwcUxUQ3BBZzZxa1FhN3JQTQ");
// Still should use resource name
check(new DownloadedFile(res),res.getName(),true);
//Check invalid urls i.e. folder url == UNABLE TO GET FILENAME FROM HEAD
res.setName("Deliverable");
//Shouldn't have extension
check(new DownloadedFile(res),res.getName(),false);
}
private static final void check(DownloadedFile f, String expectedFilename, boolean expectExtension) throws Exception {
System.out.println(f);
System.out.println("Resulting filename is : "+f.getToUseFileName());
assertTrue(f.getToUseFileName()!=null);
if(expectExtension)
assertTrue(DownloadedFile.getExtension(f.getToUseFileName())!=null);
else assertTrue(DownloadedFile.getExtension(f.getToUseFileName())==null);
assertTrue(f.getToUseFileName().equals(expectedFilename));
}
}