22889 #4

Merged
fabio.sinibaldi merged 8 commits from 22889 into master 2023-03-28 12:35:25 +02:00
15 changed files with 124 additions and 35 deletions
Showing only changes of commit d4799492a3 - Show all commits

View File

@ -1,6 +1,11 @@
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
# Changelog for org.gcube.data.publishing.ckan2zenodo-library
## [v1.0.3-SNAPSHOT] 2022-03-01
- Extensions from URL [#22889](https://support.d4science.org/issues/22889)
## [v1.0.2] 2021-07-30
- Introduced environemnt check [#19990](https://support.d4science.org/issues/19990)

View File

@ -8,7 +8,7 @@
</parent>
<groupId>org.gcube.data.publishing</groupId>
<artifactId>ckan2zenodo-library</artifactId>
<version>1.0.2</version>
<version>1.0.3-SNAPSHOT</version>
<name>CKAN 2 Zenodo Library</name>
<description>Library to publish d4science CKAN items into Zenodo</description>
@ -59,6 +59,13 @@
<version>1.14.8</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>1.4.0</version>
</dependency>
<dependency>
<groupId>org.glassfish.jersey.media</groupId>
<artifactId>jersey-media-json-jackson</artifactId>

View File

@ -1,12 +1,16 @@
package org.gcube.data.publishing.ckan2zenodo.commons;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
import org.gcube.data.publishing.ckan2zenodo.model.DownloadedFile;
@ -16,6 +20,9 @@ import lombok.extern.slf4j.Slf4j;
@Slf4j
public class Net {
private static final Pattern FILENAME_IN_DEPOSITION_REGEXP = Pattern.compile("(?<=filename\\=\\\").*(?=\\\")");
public static DownloadedFile download(CkanResource toDownload) throws Exception {
String urlString=toDownload.getUrl();
log.debug("Downloading "+urlString);
@ -28,10 +35,17 @@ public class Net {
InputStream is=null;
int attempt=0;
Exception lastException=null;
String remoteFileName=null;
while(is==null&&attempt<5) {
try {
attempt++;
is=url.openStream();
if(remoteFileName == null)
remoteFileName = getFilenameFromURL(url);
}catch(Exception e) {
lastException=e;
try{
@ -40,17 +54,30 @@ public class Net {
}
}
if(is==null) throw new Exception("Unable to download "+urlString,lastException);
if(remoteFileName == null)
remoteFileName = ""; // Unable to evaluate from HEAD
DigestInputStream dis = new DigestInputStream(is, md);
// Download
long size=Files.copy(is, temp.toPath(),StandardCopyOption.REPLACE_EXISTING);
return new DownloadedFile(toDownload,temp,dis.getMessageDigest().toString());
return new DownloadedFile(toDownload,temp,dis.getMessageDigest().toString(),remoteFileName);
}
private static final String getFilenameFromURL(URL url) throws IOException {
HttpURLConnection con = (HttpURLConnection) url.openConnection();
con.setRequestMethod("GET");
String contentDisp= con.getHeaderField("Content-Disposition");
Matcher m = FILENAME_IN_DEPOSITION_REGEXP.matcher(contentDisp);
m.find();
return m.group(0);
}
}

View File

@ -16,5 +16,26 @@ public class DownloadedFile {
private CkanResource source;
private File f;
private String MD5;
private String remoteFileName;
public String getToUseFileName(){
if (getExtension(source.getName())!=null){
// source contains extension
return source.getName();
}else {
String evaluatedExtension=getExtension(remoteFileName);
if(evaluatedExtension!=null)
return source.getName()+evaluatedExtension;
else return source.getName(); // No extension
}
}
static final String getExtension(String filename){
int lastIndexOf = filename.lastIndexOf(".");
if (lastIndexOf == -1) {
return null; // no extension
}
return filename.substring(lastIndexOf);
}
}

View File

@ -1,7 +1,5 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import org.gcube.data.publishing.ckan2zenodo.Ckan2Zenodo;
import org.gcube.data.publishing.ckan2zenodo.Ckan2ZenodoImpl;
import org.gcube.data.publishing.ckan2zenodo.model.faults.*;
import org.gcube.data.publishing.ckan2zenodo.model.report.EnvironmentReport;
import org.junit.Assume;

View File

@ -1,4 +1,4 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import java.net.MalformedURLException;

View File

@ -1,4 +1,4 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import org.gcube.data.publishing.ckan2zenodo.clients.Zenodo;
import org.gcube.data.publishing.ckan2zenodo.model.ZenodoCredentials;

View File

@ -1,4 +1,4 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import java.net.MalformedURLException;
import java.util.ArrayList;
@ -7,8 +7,6 @@ import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import org.gcube.data.publishing.ckan2zenodo.Ckan2Zenodo;
import org.gcube.data.publishing.ckan2zenodo.Ckan2ZenodoImpl;
import org.gcube.data.publishing.ckan2zenodo.clients.GCat;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;

View File

@ -1,26 +1,19 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import org.gcube.data.publishing.ckan2zenodo.Fixer;
import org.gcube.data.publishing.ckan2zenodo.commons.Parsing;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
import org.gcube.data.publishing.ckan2zenodo.model.CkanRelatedIdentifier;
import org.gcube.data.publishing.ckan2zenodo.model.faults.ConfigurationException;
import org.gcube.data.publishing.ckan2zenodo.model.faults.InvalidItemException;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.DepositionMetadata;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.RelatedIdentifier;
import org.gcube.data.publishing.ckan2zenodo.model.zenodo.ZenodoDeposition;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.ObjectMapper;

View File

@ -1,14 +1,10 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import org.gcube.common.resources.gcore.GenericResource;
import org.gcube.common.resources.gcore.Resources;
import org.gcube.data.publishing.ckan2zenodo.Translator;
import org.gcube.data.publishing.ckan2zenodo.commons.IS;
import org.gcube.data.publishing.ckan2zenodo.model.faults.ConfigurationException;
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Mappings;
import org.junit.Test;

View File

@ -1,4 +1,4 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import java.io.IOException;
import java.io.PrintStream;
@ -7,8 +7,6 @@ import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import org.gcube.data.publishing.ckan2zenodo.Fixer;
import org.gcube.data.publishing.ckan2zenodo.Translator;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
import org.gcube.data.publishing.ckan2zenodo.model.ZenodoCredentials;

View File

@ -1,4 +1,4 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import java.util.Properties;

View File

@ -1,4 +1,4 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import static org.junit.Assert.assertTrue;
@ -9,7 +9,6 @@ import java.util.Map.Entry;
import org.gcube.common.resources.gcore.GenericResource;
import org.gcube.common.resources.gcore.Resources;
import org.gcube.data.publishing.ckan2zenodo.Translator;
import org.gcube.data.publishing.ckan2zenodo.commons.IS;
import org.gcube.data.publishing.ckan2zenodo.model.CkanItemDescriptor;
import org.gcube.data.publishing.ckan2zenodo.model.CkanResource;
@ -17,7 +16,6 @@ import org.gcube.data.publishing.ckan2zenodo.model.faults.ConfigurationException
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Filter;
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Mapping;
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Mappings;
import org.gcube.data.publishing.ckan2zenodo.model.parsing.Regexp;
import org.junit.BeforeClass;
import org.junit.Test;

View File

@ -1,4 +1,4 @@
package org.gcube.tests;
package org.gcube.data.publishing.ckan2zenodo;
import java.io.IOException;
import java.util.Arrays;
@ -8,7 +8,6 @@ import java.util.Map.Entry;
import org.gcube.common.resources.gcore.GenericResource;
import org.gcube.common.resources.gcore.Resources;
import org.gcube.data.publishing.ckan2zenodo.Translator;
import org.gcube.data.publishing.ckan2zenodo.clients.Zenodo;
import org.gcube.data.publishing.ckan2zenodo.commons.IS;
import org.gcube.data.publishing.ckan2zenodo.commons.Net;

View File

@ -0,0 +1,49 @@
package org.gcube.data.publishing.ckan2zenodo.model;
import org.gcube.data.publishing.ckan2zenodo.commons.Net;
import org.junit.Test;
import static junit.framework.TestCase.assertTrue;
public class NetTests {
@Test
public void testfileNames() throws Exception {
CkanResource res=new CkanResource();
res.setName("Deliverable");
res.setDescription("My description");
res.setId("resource_id");
// PDF URL
res.setUrl("https://data-pre.d4science.net/RgA7");
check(Net.download(res),"Deliverable.pdf",true);
// Do not use HEAD if extension in resource name
res.setName("Deliverable.rtf");
check(Net.download(res),res.getName(),true);
//Check invalid urls i.e. folder url == UNABLE TO GET FILENAME FROM HEAD
res.setUrl("http://data-pre.d4science.org/workspace-explorer-app?folderId=UjV1MTJ4K2lvQU5MRE1MT2NCOEVGWDkvMG5SL2dwY3A0QmpWZmdRVEFxR3Njd2cwcUxUQ3BBZzZxa1FhN3JQTQ");
// Still should use resource name
check(Net.download(res),res.getName(),true);
//Check invalid urls i.e. folder url == UNABLE TO GET FILENAME FROM HEAD
res.setName("Deliverable");
//Shouldn't have extension
check(Net.download(res),res.getName(),false);
}
private static final void check(DownloadedFile f, String expectedFilename, boolean expectExtension){
System.out.println(f);
System.out.println("Resulting filename is : "+f.getToUseFileName());
assertTrue(f.getRemoteFileName()!=null);
assertTrue(f.getToUseFileName()!=null);
if(expectExtension)
assertTrue(DownloadedFile.getExtension(f.getToUseFileName())!=null);
else assertTrue(DownloadedFile.getExtension(f.getToUseFileName())==null);
assertTrue(f.getToUseFileName().equals(expectedFilename));
}
}