OAI to CKAN
This commit is contained in:
parent
7d1a61cb6e
commit
fb14ae68c7
|
@ -84,7 +84,6 @@
|
|||
<dependency>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-mapper-asl</artifactId>
|
||||
<version>1.9.13</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
|
|
|
@ -93,7 +93,7 @@ public class GCatModel implements CatalogueFormatData {
|
|||
|
||||
}
|
||||
|
||||
|
||||
@Setter
|
||||
private String profile=profileXML;
|
||||
|
||||
private CkanItem item=null;
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
package org.gcube.data.publishing.gCatFeeder.utils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
public class Files {
|
||||
|
||||
public static File getFileFromResources(String fileName) {
|
||||
|
||||
ClassLoader classLoader =Files.class.getClassLoader();
|
||||
|
||||
URL resource = classLoader.getResource(fileName);
|
||||
if (resource == null) {
|
||||
throw new IllegalArgumentException("file is not found!");
|
||||
} else {
|
||||
return new File(resource.getFile());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public static String readFileAsString(String path, Charset encoding)
|
||||
throws IOException
|
||||
{
|
||||
byte[] encoded = java.nio.file.Files.readAllBytes(Paths.get(path));
|
||||
return new String(encoded, encoding);
|
||||
}
|
||||
|
||||
|
||||
public static String getName(String path) {
|
||||
|
||||
return path.substring((path.contains(File.separator)?path.lastIndexOf(File.separator)+1:0)
|
||||
,(path.contains(".")?path.lastIndexOf("."):path.length()));
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,116 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.gcube.data-publishing.gCat-Feeder</groupId>
|
||||
<artifactId>gCat-Feeder-Suite</artifactId>
|
||||
<version>1.0.1</version>
|
||||
</parent>
|
||||
<artifactId>oai-harvester</artifactId>
|
||||
<name>oai-harvester</name>
|
||||
|
||||
<properties>
|
||||
<gitBaseUrl>https://code-repo.d4science.org/gCubeSystem</gitBaseUrl>
|
||||
</properties>
|
||||
|
||||
<scm>
|
||||
<connection>scm:git:${gitBaseUrl}/gFeed</connection>
|
||||
<developerConnection>scm:git:${gitBaseUrl}/gFeed</developerConnection>
|
||||
<url>${gitBaseUrl}/gFeed</url>
|
||||
</scm>
|
||||
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.gcube.distribution</groupId>
|
||||
<artifactId>gcube-bom</artifactId>
|
||||
<version>1.4.0</version>
|
||||
<type>pom</type>
|
||||
<scope>import</scope>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
<dependencies>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.gcube.common</groupId>
|
||||
<artifactId>common-authorization</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.gcube.core</groupId>
|
||||
<artifactId>common-scope</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.gcube.data-publishing.gCat-Feeder</groupId>
|
||||
<artifactId>collectors-plugin-framework</artifactId>
|
||||
<version>[1.0.0,2.0.0-SNAPSHOT)</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.gcube.data-publishing</groupId>
|
||||
<artifactId>gcat-client</artifactId>
|
||||
<version>[1.2.0,2.0.0-SNAPSHOT)</version>
|
||||
</dependency>
|
||||
|
||||
|
||||
<!-- TEST -->
|
||||
<dependency>
|
||||
<groupId>org.gcube.data-publishing.gCat-Feeder</groupId>
|
||||
<artifactId>test-commons</artifactId>
|
||||
<version>[1.0.0,2.0.0-SNAPSHOT)</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-mapper-asl</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>make-uberjar</id>
|
||||
<phase>package</phase>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>make-servicearchive</id>
|
||||
<phase>package</phase>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
|
@ -0,0 +1,10 @@
|
|||
package org.gcube.data.publishing.gFeed.collectors.oai;
|
||||
|
||||
public class Constants {
|
||||
|
||||
|
||||
public static final String GCAT_TYPE="GCAT";
|
||||
|
||||
public static final String PLUGIN_ID="OAI_COLLECTOR";
|
||||
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
package org.gcube.data.publishing.gFeed.collectors.oai;
|
||||
|
||||
import org.gcube.data.publishing.gCatFeeder.model.CatalogueInstanceDescriptor;
|
||||
import org.gcube.data.publishing.gCatfeeder.collectors.CatalogueRetriever;
|
||||
|
||||
public class GCATRetriever implements CatalogueRetriever {
|
||||
|
||||
private static GCATRetriever instance=null;
|
||||
|
||||
static synchronized GCATRetriever get() {
|
||||
if(instance==null) instance =new GCATRetriever();
|
||||
return instance;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public CatalogueInstanceDescriptor getInstance() {
|
||||
// throw new RuntimeException("Implement this");
|
||||
// GCoreEndpoint ep=ISUtils.queryForGCoreEndpoint("NO", "NO");
|
||||
return new CatalogueInstanceDescriptor();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,126 @@
|
|||
package org.gcube.data.publishing.gFeed.collectors.oai;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
||||
import javax.ws.rs.client.Client;
|
||||
import javax.ws.rs.client.ClientBuilder;
|
||||
import javax.ws.rs.client.WebTarget;
|
||||
import javax.ws.rs.core.Response;
|
||||
import javax.xml.bind.JAXB;
|
||||
import javax.xml.bind.JAXBContext;
|
||||
import javax.xml.bind.JAXBElement;
|
||||
import javax.xml.bind.JAXBException;
|
||||
import javax.xml.bind.Unmarshaller;
|
||||
import javax.xml.transform.stream.StreamSource;
|
||||
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.DCRecordMetadata;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.MetadataHolder;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIInteractionException;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIMetadata;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIRecord;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH.Token;
|
||||
import org.glassfish.jersey.client.ClientProperties;
|
||||
|
||||
import lombok.NonNull;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
public class OAIClient {
|
||||
|
||||
private static JAXBContext jaxbContext=null;
|
||||
|
||||
|
||||
private static synchronized JAXBContext getContext() throws JAXBException {
|
||||
if(jaxbContext==null)
|
||||
jaxbContext = JAXBContext.newInstance(OAIRecord.class,
|
||||
MetadataHolder.class,
|
||||
OAIMetadata.class,
|
||||
DCRecordMetadata.class,
|
||||
OAI_PMH.class);
|
||||
return jaxbContext;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static final String DC_METADATA_PREFIX="oai_dc";
|
||||
|
||||
@NonNull
|
||||
private String baseUrl;
|
||||
|
||||
|
||||
Client client;
|
||||
|
||||
private synchronized Client getWebClient() {
|
||||
if(client==null) {
|
||||
client = ClientBuilder.newClient()
|
||||
.property(ClientProperties.SUPPRESS_HTTP_COMPLIANCE_VALIDATION, true);
|
||||
}
|
||||
return client;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public Collection<OAIRecord> getAll(String metadataPrefix) throws JAXBException, OAIInteractionException{
|
||||
ArrayList<OAIRecord> toReturn=new ArrayList<OAIRecord>();
|
||||
|
||||
String resumptionToken=null;
|
||||
|
||||
// call & iterate
|
||||
boolean isComplete=false;
|
||||
while(!isComplete) {
|
||||
|
||||
WebTarget target=getWebClient().target(baseUrl).
|
||||
queryParam("verb","ListRecords");
|
||||
|
||||
|
||||
|
||||
if(resumptionToken==null)
|
||||
target=target.queryParam("metadataPrefix",metadataPrefix);
|
||||
else
|
||||
target=target.queryParam("resumptionToken", resumptionToken);
|
||||
|
||||
|
||||
Response resp=target.request("application/xml").get();
|
||||
|
||||
OAI_PMH msg=check(resp);
|
||||
|
||||
if(msg.isError()) throw new OAIInteractionException(msg.getError().getCode()+ " : "+msg.getError().getMessage());
|
||||
|
||||
toReturn.addAll(msg.getResponseRecords().getRecords());
|
||||
|
||||
Token t=msg.getResponseRecords().getResumptionToken();
|
||||
log.debug("Obtained token : "+t);
|
||||
if(t!=null && t.getId()!=null && !t.getId().isEmpty()) {
|
||||
resumptionToken=t.getId();
|
||||
}else isComplete=true; //no token = completion
|
||||
}
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private static OAI_PMH check(Response resp) throws JAXBException {
|
||||
if(resp.getStatus()<200||resp.getStatus()>=300) {
|
||||
// exception
|
||||
throw new RuntimeException("Implement fault");
|
||||
}else {
|
||||
|
||||
|
||||
String respString=resp.readEntity(String.class);
|
||||
Unmarshaller jaxbUnmarshaller = getContext().createUnmarshaller();
|
||||
OAI_PMH obj=(OAI_PMH) jaxbUnmarshaller.unmarshal(new StringReader(respString));
|
||||
|
||||
return obj;
|
||||
|
||||
// OAI_PMH response = (OAI_PMH) jaxbUnmarshaller.unmarshal(
|
||||
// new StreamSource(new StringReader(respString)));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
package org.gcube.data.publishing.gFeed.collectors.oai;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.gcube.common.resources.gcore.ServiceEndpoint;
|
||||
import org.gcube.data.publishing.gCatFeeder.utils.ISUtils;
|
||||
import org.gcube.data.publishing.gCatfeeder.collectors.DataCollector;
|
||||
import org.gcube.data.publishing.gCatfeeder.collectors.model.faults.CollectorFault;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIRecord;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class OAICollector implements DataCollector<OAIRecord> {
|
||||
|
||||
@Override
|
||||
public Set<OAIRecord> collect() throws CollectorFault {
|
||||
try {
|
||||
HashSet<OAIRecord> toReturn=new HashSet<OAIRecord>();
|
||||
String oaiCategory="";
|
||||
String oaiPlatform="";
|
||||
for(ServiceEndpoint epr:ISUtils.queryForServiceEndpoints(oaiCategory, oaiPlatform)) {
|
||||
log.info("Found OAI Repo in resource "+epr.id()+" NAME : "+epr.profile().name());
|
||||
|
||||
String baseUrl=epr.profile().accessPoints().asCollection().iterator().next().address();
|
||||
log.debug("Address is "+baseUrl);
|
||||
OAIClient client = new OAIClient(baseUrl);
|
||||
|
||||
toReturn.addAll(client.getAll(OAIClient.DC_METADATA_PREFIX));
|
||||
}
|
||||
|
||||
return toReturn;
|
||||
}catch(Throwable t) {
|
||||
throw new CollectorFault(t);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
package org.gcube.data.publishing.gFeed.collectors.oai;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
|
||||
import org.gcube.data.publishing.gCatFeeder.model.CatalogueFormatData;
|
||||
import org.gcube.data.publishing.gCatFeeder.model.ControllerConfiguration;
|
||||
import org.gcube.data.publishing.gCatFeeder.model.EnvironmentConfiguration;
|
||||
import org.gcube.data.publishing.gCatfeeder.collectors.CatalogueRetriever;
|
||||
import org.gcube.data.publishing.gCatfeeder.collectors.CollectorPlugin;
|
||||
import org.gcube.data.publishing.gCatfeeder.collectors.DataCollector;
|
||||
import org.gcube.data.publishing.gCatfeeder.collectors.DataTransformer;
|
||||
import org.gcube.data.publishing.gCatfeeder.collectors.model.PluginDescriptor;
|
||||
import org.gcube.data.publishing.gCatfeeder.collectors.model.faults.CatalogueNotSupportedException;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIRecord;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.ckan.GCatTransformer;
|
||||
|
||||
public class OAIHarvester implements CollectorPlugin<OAIRecord>{
|
||||
|
||||
@Override
|
||||
public PluginDescriptor getDescriptor() {
|
||||
return new PluginDescriptor(Constants.PLUGIN_ID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CatalogueRetriever getRetrieverByCatalogueType(String catalogueType) throws CatalogueNotSupportedException {
|
||||
switch(catalogueType) {
|
||||
case Constants.GCAT_TYPE : return GCATRetriever.get();
|
||||
default : throw new CatalogueNotSupportedException("No support for "+catalogueType);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getSupportedCatalogueTypes() {
|
||||
return Collections.singleton(Constants.GCAT_TYPE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DataTransformer<? extends CatalogueFormatData, OAIRecord> getTransformerByCatalogueType(String catalogueType)
|
||||
throws CatalogueNotSupportedException {
|
||||
switch(catalogueType) {
|
||||
case Constants.GCAT_TYPE : return new GCatTransformer();
|
||||
default : throw new CatalogueNotSupportedException("No support for "+catalogueType);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DataCollector<OAIRecord> getCollector() {
|
||||
return new OAICollector();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ControllerConfiguration getPublisherControllerConfiguration(String catalogueType)
|
||||
throws CatalogueNotSupportedException {
|
||||
return new ControllerConfiguration();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init() throws Exception {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initInScope() throws Exception {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setEnvironmentConfiguration(EnvironmentConfiguration env) {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
package org.gcube.data.publishing.gFeed.collectors.oai.model;
|
||||
|
||||
import javax.xml.bind.annotation.XmlAccessType;
|
||||
import javax.xml.bind.annotation.XmlAccessorType;
|
||||
import javax.xml.bind.annotation.XmlElement;
|
||||
import javax.xml.bind.annotation.XmlRootElement;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import lombok.ToString;
|
||||
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@ToString
|
||||
@Getter
|
||||
@Setter
|
||||
@XmlAccessorType(XmlAccessType.FIELD)
|
||||
@XmlRootElement(name = "dc", namespace="http://www.openarchives.org/OAI/2.0/oai_dc/")
|
||||
public class DCRecordMetadata extends OAIMetadata{
|
||||
|
||||
public static final String NS="http://purl.org/dc/elements/1.1/";
|
||||
|
||||
@XmlElement(namespace=NS)
|
||||
private String contributor;
|
||||
@XmlElement(namespace=NS)
|
||||
private String coverage;
|
||||
@XmlElement(namespace=NS)
|
||||
private String creator;
|
||||
@XmlElement(namespace=NS)
|
||||
private String date;
|
||||
@XmlElement(namespace=NS)
|
||||
private String description;
|
||||
@XmlElement(namespace=NS)
|
||||
private String format;
|
||||
@XmlElement(namespace=NS)
|
||||
private String identifier;
|
||||
@XmlElement(namespace=NS)
|
||||
private String language;
|
||||
@XmlElement(namespace=NS)
|
||||
private String publisher;
|
||||
|
||||
@XmlElement(namespace=NS)
|
||||
private String relation;
|
||||
@XmlElement(namespace=NS)
|
||||
private String rights;
|
||||
@XmlElement(namespace=NS)
|
||||
private String source;
|
||||
@XmlElement(namespace=NS)
|
||||
private String subject;
|
||||
|
||||
@XmlElement(namespace=NS)
|
||||
private String title;
|
||||
@XmlElement(namespace=NS)
|
||||
private String type;
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
package org.gcube.data.publishing.gFeed.collectors.oai.model;
|
||||
|
||||
import javax.xml.bind.annotation.XmlAccessType;
|
||||
import javax.xml.bind.annotation.XmlAccessorType;
|
||||
import javax.xml.bind.annotation.XmlElement;
|
||||
import javax.xml.bind.annotation.XmlElementRef;
|
||||
import javax.xml.bind.annotation.XmlElements;
|
||||
import javax.xml.bind.annotation.XmlRootElement;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import lombok.ToString;
|
||||
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@ToString
|
||||
@Getter
|
||||
@Setter
|
||||
@XmlAccessorType(XmlAccessType.FIELD)
|
||||
@XmlRootElement(name = "metadata", namespace="http://www.openarchives.org/OAI/2.0/")
|
||||
public class MetadataHolder{
|
||||
@XmlElementRef
|
||||
// @XmlElements({
|
||||
// @XmlElement(name="dc",namespace="http://www.openarchives.org/OAI/2.0/oai_dc/", type=DCRecordMetadata.class)
|
||||
// })
|
||||
public OAIMetadata metadata;
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
package org.gcube.data.publishing.gFeed.collectors.oai.model;
|
||||
|
||||
public class Namespaces {
|
||||
|
||||
public static final String OAI_PMH_NS="http://www.openarchives.org/OAI/2.0/";
|
||||
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
package org.gcube.data.publishing.gFeed.collectors.oai.model;
|
||||
|
||||
public class OAIInteractionException extends Exception {
|
||||
|
||||
public OAIInteractionException() {
|
||||
// TODO Auto-generated constructor stub
|
||||
}
|
||||
|
||||
public OAIInteractionException(String message) {
|
||||
super(message);
|
||||
// TODO Auto-generated constructor stub
|
||||
}
|
||||
|
||||
public OAIInteractionException(Throwable cause) {
|
||||
super(cause);
|
||||
// TODO Auto-generated constructor stub
|
||||
}
|
||||
|
||||
public OAIInteractionException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
// TODO Auto-generated constructor stub
|
||||
}
|
||||
|
||||
public OAIInteractionException(String message, Throwable cause, boolean enableSuppression,
|
||||
boolean writableStackTrace) {
|
||||
super(message, cause, enableSuppression, writableStackTrace);
|
||||
// TODO Auto-generated constructor stub
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
package org.gcube.data.publishing.gFeed.collectors.oai.model;
|
||||
|
||||
import javax.xml.bind.annotation.XmlSeeAlso;
|
||||
|
||||
//@XmlSeeAlso({DCRecordMetadata.class})
|
||||
public abstract class OAIMetadata{
|
||||
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
package org.gcube.data.publishing.gFeed.collectors.oai.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import javax.xml.bind.annotation.XmlAccessType;
|
||||
import javax.xml.bind.annotation.XmlAccessorType;
|
||||
import javax.xml.bind.annotation.XmlElement;
|
||||
import javax.xml.bind.annotation.XmlRootElement;
|
||||
|
||||
import org.gcube.data.publishing.gCatfeeder.collectors.model.CustomData;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import lombok.ToString;
|
||||
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@ToString
|
||||
@XmlRootElement(name = "record", namespace=Namespaces.OAI_PMH_NS)
|
||||
@XmlAccessorType(XmlAccessType.FIELD)
|
||||
@Getter
|
||||
@Setter
|
||||
public class OAIRecord implements CustomData{
|
||||
|
||||
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@ToString
|
||||
@Getter
|
||||
@Setter
|
||||
@XmlAccessorType(XmlAccessType.FIELD)
|
||||
@XmlRootElement(namespace=Namespaces.OAI_PMH_NS)
|
||||
public static class Header{
|
||||
@XmlElement(namespace=Namespaces.OAI_PMH_NS)
|
||||
private String identifier;
|
||||
@XmlElement(name = "datestamp", namespace=Namespaces.OAI_PMH_NS)
|
||||
private String dateStamp;
|
||||
@XmlElement(namespace=Namespaces.OAI_PMH_NS)
|
||||
private List<String> setSpec;
|
||||
}
|
||||
|
||||
|
||||
@XmlElement(namespace=Namespaces.OAI_PMH_NS)
|
||||
private Header header;
|
||||
@XmlElement(name = "metadata", namespace=Namespaces.OAI_PMH_NS)
|
||||
public MetadataHolder metadata;
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
package org.gcube.data.publishing.gFeed.collectors.oai.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import javax.xml.bind.annotation.XmlAccessType;
|
||||
import javax.xml.bind.annotation.XmlAccessorType;
|
||||
import javax.xml.bind.annotation.XmlAttribute;
|
||||
import javax.xml.bind.annotation.XmlElement;
|
||||
import javax.xml.bind.annotation.XmlElementWrapper;
|
||||
import javax.xml.bind.annotation.XmlRootElement;
|
||||
import javax.xml.bind.annotation.XmlValue;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import lombok.ToString;
|
||||
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@ToString
|
||||
@XmlRootElement(name="OAI-PMH", namespace=Namespaces.OAI_PMH_NS)
|
||||
//@XmlRootElement(name="OAI-PMH")
|
||||
@XmlAccessorType(XmlAccessType.FIELD)
|
||||
@Getter
|
||||
@Setter
|
||||
public class OAI_PMH {
|
||||
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@ToString
|
||||
@Getter
|
||||
@Setter
|
||||
@XmlAccessorType(XmlAccessType.FIELD)
|
||||
public static class Request{
|
||||
@XmlAttribute
|
||||
private String metadataPrefix;
|
||||
@XmlAttribute
|
||||
private String verb;
|
||||
@XmlValue
|
||||
private String path;
|
||||
|
||||
}
|
||||
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@ToString
|
||||
@Getter
|
||||
@Setter
|
||||
@XmlAccessorType(XmlAccessType.FIELD)
|
||||
public static class Token{
|
||||
@XmlAttribute
|
||||
private int cursor;
|
||||
@XmlValue
|
||||
private String id;
|
||||
}
|
||||
|
||||
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@ToString
|
||||
@Getter
|
||||
@Setter
|
||||
@XmlAccessorType(XmlAccessType.FIELD)
|
||||
public static class ListRecords{
|
||||
@XmlElement(name = "record",namespace=Namespaces.OAI_PMH_NS)
|
||||
private List<OAIRecord> records;
|
||||
@XmlElement(name = "resumptionToken",namespace=Namespaces.OAI_PMH_NS)
|
||||
private Token resumptionToken;
|
||||
}
|
||||
|
||||
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@ToString
|
||||
@Getter
|
||||
@Setter
|
||||
@XmlAccessorType(XmlAccessType.FIELD)
|
||||
public static class Error{
|
||||
@XmlAttribute
|
||||
private String code;
|
||||
@XmlValue
|
||||
private String message;
|
||||
}
|
||||
|
||||
|
||||
@XmlElement(namespace=Namespaces.OAI_PMH_NS)
|
||||
private String responseDate;
|
||||
|
||||
@XmlElement(namespace=Namespaces.OAI_PMH_NS)
|
||||
private Request request;
|
||||
|
||||
@XmlElement(namespace=Namespaces.OAI_PMH_NS)
|
||||
private Error error;
|
||||
|
||||
@XmlElement(name="ListRecords", namespace=Namespaces.OAI_PMH_NS)
|
||||
private ListRecords responseRecords;
|
||||
|
||||
|
||||
public boolean isError() {
|
||||
return error!=null;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
package org.gcube.data.publishing.gFeed.collectors.oai.model.ckan;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.NonNull;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import lombok.ToString;
|
||||
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@NoArgsConstructor
|
||||
public class CkanItem {
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@ToString
|
||||
public static class CKanExtraField {
|
||||
|
||||
private String key;
|
||||
private String value;
|
||||
}
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@NoArgsConstructor
|
||||
@RequiredArgsConstructor
|
||||
@ToString
|
||||
public static class Tag{
|
||||
@NonNull
|
||||
private String name;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private String name;
|
||||
private String title;
|
||||
private String version;
|
||||
@JsonProperty("private")
|
||||
private Boolean privateFlag;
|
||||
private String license_id;
|
||||
private String author;
|
||||
private String maintainer;
|
||||
private String notes;
|
||||
private ArrayList<Tag> tags=new ArrayList<Tag>();
|
||||
|
||||
private ArrayList<CKanExtraField> extras=new ArrayList<>();
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
package org.gcube.data.publishing.gFeed.collectors.oai.model.ckan;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.gcube.data.publishing.gCatFeeder.model.CatalogueFormatData;
|
||||
import org.gcube.data.publishing.gCatFeeder.model.InternalConversionException;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class GCatModel implements CatalogueFormatData {
|
||||
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public static class CkanResource {
|
||||
|
||||
private String name;
|
||||
private String url;
|
||||
private String format;
|
||||
private String description;
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static ObjectMapper mapper=new ObjectMapper();
|
||||
|
||||
@Setter
|
||||
private String profile=null;
|
||||
|
||||
@Setter
|
||||
private CkanItem item;
|
||||
|
||||
@Setter
|
||||
private ArrayList<CkanResource> resources=new ArrayList<>();
|
||||
|
||||
@Override
|
||||
public String toCatalogueFormat() throws InternalConversionException {
|
||||
try{
|
||||
ByteArrayOutputStream baos=new ByteArrayOutputStream();
|
||||
mapper.writeValue(baos, this);
|
||||
return baos.toString();
|
||||
}catch(Throwable t) {
|
||||
throw new InternalConversionException("Unable to convert",t);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,139 @@
|
|||
package org.gcube.data.publishing.gFeed.collectors.oai.model.ckan;
|
||||
|
||||
import java.net.URL;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.gcube.data.publishing.gCatFeeder.utils.Files;
|
||||
import org.gcube.data.publishing.gCatfeeder.collectors.DataTransformer;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.DCRecordMetadata;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIMetadata;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIRecord;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.ckan.CkanItem.CKanExtraField;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.ckan.GCatModel.CkanResource;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class GCatTransformer implements DataTransformer<GCatModel,OAIRecord>{
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public Set<GCatModel> transform(Collection<OAIRecord> collectedData) {
|
||||
boolean useProfile=true;
|
||||
|
||||
HashSet<GCatModel> toReturn=new HashSet<>();
|
||||
for(OAIRecord record:collectedData) {
|
||||
GCatModel toPublish=translate(record,useProfile);
|
||||
if(useProfile) {
|
||||
useProfile=false;
|
||||
}
|
||||
toReturn.add(toPublish);
|
||||
}
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* (Common) Title
|
||||
* (Common) Description
|
||||
* (Common) Tags: free list of keywords
|
||||
* (Common) License
|
||||
* (Common) Visibility: either public or private
|
||||
* (Common) Version
|
||||
* (Common) Author: the creator of metadata. Only one occurrence is supported;
|
||||
* (Common) Maintainer:
|
||||
* (Method specific) Creator: the author of the method (with email and ORCID). Repeatable field;
|
||||
* (Method specific) Creation date: when the method has been released;
|
||||
* (Method specific) Input: Repeatable field;
|
||||
* (Method specific) Output: Repeatable field;
|
||||
* (Method specific) RelatedPaper: a reference to an associated paper;
|
||||
* (Method specific) Restrictions On Use: an optional text
|
||||
* (Method specific) Attribution requirements: the text to use to acknowledge method usage;
|
||||
*/
|
||||
private static GCatModel translate(OAIRecord toTranslate,Boolean useProfile) {
|
||||
GCatModel toReturn = new GCatModel();
|
||||
|
||||
CkanItem item=new CkanItem();
|
||||
|
||||
item.setName(toTranslate.getHeader().getIdentifier());
|
||||
OAIMetadata meta=toTranslate.getMetadata().getMetadata();
|
||||
if(meta instanceof DCRecordMetadata) {
|
||||
String profileID="Harvested Object";
|
||||
|
||||
DCRecordMetadata dcMeta=(DCRecordMetadata) meta;
|
||||
|
||||
item.setTitle(dcMeta.getTitle());
|
||||
item.setNotes(dcMeta.getDescription());
|
||||
item.setAuthor(dcMeta.getPublisher());
|
||||
item.setMaintainer(dcMeta.getPublisher());
|
||||
item.setVersion("n.a.");
|
||||
item.setPrivateFlag(false);
|
||||
item.setLicense_id("CC-BY-NC-SA-4.0");
|
||||
|
||||
|
||||
item.getExtras().add(new CKanExtraField("system:type", profileID));
|
||||
|
||||
if(dcMeta.getContributor()!=null)
|
||||
item.getExtras().add(new CKanExtraField(profileID+":contributor", dcMeta.getContributor()));
|
||||
if(dcMeta.getCoverage()!=null)
|
||||
item.getExtras().add(new CKanExtraField(profileID+":coverage", dcMeta.getCoverage()));
|
||||
if(dcMeta.getCreator()!=null)
|
||||
item.getExtras().add(new CKanExtraField(profileID+":creator", dcMeta.getCreator()));
|
||||
if(dcMeta.getDate()!=null)
|
||||
item.getExtras().add(new CKanExtraField(profileID+":date", dcMeta.getDate()));
|
||||
if(dcMeta.getDescription()!=null)
|
||||
item.getExtras().add(new CKanExtraField(profileID+":description", dcMeta.getDescription()));
|
||||
if(dcMeta.getFormat()!=null)
|
||||
item.getExtras().add(new CKanExtraField(profileID+":format", dcMeta.getFormat()));
|
||||
if(dcMeta.getIdentifier()!=null)
|
||||
item.getExtras().add(new CKanExtraField(profileID+":identifier", dcMeta.getIdentifier()));
|
||||
if(dcMeta.getLanguage()!=null)
|
||||
item.getExtras().add(new CKanExtraField(profileID+":language", dcMeta.getLanguage()));
|
||||
if(dcMeta.getPublisher()!=null)
|
||||
item.getExtras().add(new CKanExtraField(profileID+":publisher", dcMeta.getPublisher()));
|
||||
if(dcMeta.getRelation()!=null)
|
||||
item.getExtras().add(new CKanExtraField(profileID+":relation", dcMeta.getRelation()));
|
||||
if(dcMeta.getRights()!=null)
|
||||
item.getExtras().add(new CKanExtraField(profileID+":rights", dcMeta.getRights()));
|
||||
if(dcMeta.getSource()!=null)
|
||||
item.getExtras().add(new CKanExtraField(profileID+":source", dcMeta.getSource()));
|
||||
if(dcMeta.getSubject()!=null)
|
||||
item.getExtras().add(new CKanExtraField(profileID+":subject", dcMeta.getSubject()));
|
||||
if(dcMeta.getTitle()!=null)
|
||||
item.getExtras().add(new CKanExtraField(profileID+":title", dcMeta.getTitle()));
|
||||
if(dcMeta.getType()!=null)
|
||||
item.getExtras().add(new CKanExtraField(profileID+":type", dcMeta.getType()));
|
||||
|
||||
|
||||
try {
|
||||
URL url=new URL(dcMeta.getIdentifier());
|
||||
CkanResource res=new CkanResource("Record", url+"", url.getProtocol(), "Original record");
|
||||
ArrayList<CkanResource> list=new ArrayList<>();
|
||||
toReturn.setResources(list);
|
||||
}catch(Throwable t) {
|
||||
log.debug("Unable to set identifier "+dcMeta.getIdentifier()+"as resource ",t);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
toReturn.setItem(item);
|
||||
|
||||
try {
|
||||
if(useProfile)
|
||||
toReturn.setProfile(Files.readFileAsString(
|
||||
Files.getFileFromResources("HarvesterObject.xml").getAbsolutePath(),
|
||||
Charset.defaultCharset())); //"Harvested Object”
|
||||
}catch(Throwable t) {
|
||||
log.error("Unable to set profile ",t);
|
||||
|
||||
}
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
org.gcube.data.publishing.gFeed.collectors.oai.OAIHarvester.java
|
|
@ -0,0 +1,99 @@
|
|||
<!--
|
||||
/elements/1.1
|
||||
|
||||
contributor, coverage, creator, date, description, format,
|
||||
identifier, language, publisher, relation, rights, source, subject,
|
||||
title, type -->
|
||||
|
||||
<metadataformat type="Harvested Object">
|
||||
<metadatafield categoryref="Harvested Object">
|
||||
<fieldName>contributor</fieldName>
|
||||
<maxOccurs>*</maxOccurs>
|
||||
<mandatory>false</mandatory>
|
||||
<dataType>String</dataType>
|
||||
</metadatafield>
|
||||
<metadatafield categoryref="Harvested Object">
|
||||
<fieldName>coverage</fieldName>
|
||||
<maxOccurs>*</maxOccurs>
|
||||
<mandatory>false</mandatory>
|
||||
<dataType>String</dataType>
|
||||
</metadatafield>
|
||||
<metadatafield categoryref="Harvested Object">
|
||||
<fieldName>creator</fieldName>
|
||||
<maxOccurs>*</maxOccurs>
|
||||
<mandatory>false</mandatory>
|
||||
<dataType>String</dataType>
|
||||
</metadatafield>
|
||||
<metadatafield categoryref="Harvested Object">
|
||||
<fieldName>date</fieldName>
|
||||
<maxOccurs>*</maxOccurs>
|
||||
<mandatory>false</mandatory>
|
||||
<dataType>String</dataType>
|
||||
</metadatafield>
|
||||
<metadatafield categoryref="Harvested Object">
|
||||
<fieldName>description</fieldName>
|
||||
<maxOccurs>*</maxOccurs>
|
||||
<mandatory>false</mandatory>
|
||||
<dataType>String</dataType>
|
||||
</metadatafield>
|
||||
<metadatafield categoryref="Harvested Object">
|
||||
<fieldName>format</fieldName>
|
||||
<maxOccurs>*</maxOccurs>
|
||||
<mandatory>false</mandatory>
|
||||
<dataType>String</dataType>
|
||||
</metadatafield>
|
||||
<metadatafield categoryref="Harvested Object">
|
||||
<fieldName>identifier</fieldName>
|
||||
<maxOccurs>*</maxOccurs>
|
||||
<mandatory>false</mandatory>
|
||||
<dataType>String</dataType>
|
||||
</metadatafield>
|
||||
<metadatafield categoryref="Harvested Object">
|
||||
<fieldName>language</fieldName>
|
||||
<maxOccurs>*</maxOccurs>
|
||||
<mandatory>false</mandatory>
|
||||
<dataType>String</dataType>
|
||||
</metadatafield>
|
||||
<metadatafield categoryref="Harvested Object">
|
||||
<fieldName>publisher</fieldName>
|
||||
<maxOccurs>*</maxOccurs>
|
||||
<mandatory>false</mandatory>
|
||||
<dataType>String</dataType>
|
||||
</metadatafield>
|
||||
<metadatafield categoryref="Harvested Object">
|
||||
<fieldName>relation</fieldName>
|
||||
<maxOccurs>*</maxOccurs>
|
||||
<mandatory>false</mandatory>
|
||||
<dataType>String</dataType>
|
||||
</metadatafield>
|
||||
<metadatafield categoryref="Harvested Object">
|
||||
<fieldName>rights</fieldName>
|
||||
<maxOccurs>*</maxOccurs>
|
||||
<mandatory>false</mandatory>
|
||||
<dataType>String</dataType>
|
||||
</metadatafield>
|
||||
<metadatafield categoryref="Harvested Object">
|
||||
<fieldName>source</fieldName>
|
||||
<maxOccurs>*</maxOccurs>
|
||||
<mandatory>false</mandatory>
|
||||
<dataType>String</dataType>
|
||||
</metadatafield>
|
||||
<metadatafield categoryref="Harvested Object">
|
||||
<fieldName>subject</fieldName>
|
||||
<maxOccurs>*</maxOccurs>
|
||||
<mandatory>false</mandatory>
|
||||
<dataType>String</dataType>
|
||||
</metadatafield>
|
||||
<metadatafield categoryref="Harvested Object">
|
||||
<fieldName>title</fieldName>
|
||||
<maxOccurs>*</maxOccurs>
|
||||
<mandatory>false</mandatory>
|
||||
<dataType>String</dataType>
|
||||
</metadatafield>
|
||||
<metadatafield categoryref="Harvested Object">
|
||||
<fieldName>type</fieldName>
|
||||
<maxOccurs>*</maxOccurs>
|
||||
<mandatory>false</mandatory>
|
||||
<dataType>String</dataType>
|
||||
</metadatafield>
|
||||
</metadataformat>
|
|
@ -0,0 +1,51 @@
|
|||
package org.gcube.application.gfeed.oai;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.net.MalformedURLException;
|
||||
|
||||
import org.gcube.data.publishing.gCatFeeder.model.InternalConversionException;
|
||||
import org.gcube.data.publishing.gCatFeeder.tests.TokenSetter;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.DCRecordMetadata;
|
||||
import org.gcube.gcat.client.Item;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
|
||||
public class CKANTest {
|
||||
|
||||
|
||||
public static void main (String args[]) {
|
||||
TokenSetter.set("/pred4s/preprod/preVRE");
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static ObjectMapper mapper=new ObjectMapper();
|
||||
|
||||
|
||||
public static void updateItem(String name, String itemContent) throws MalformedURLException {
|
||||
new Item().update(name, itemContent);
|
||||
}
|
||||
|
||||
public static void createItem(String itemContent) throws MalformedURLException {
|
||||
new Item().create(itemContent);
|
||||
}
|
||||
|
||||
|
||||
public static void getItem(String name) throws MalformedURLException {
|
||||
new Item().read(name);
|
||||
}
|
||||
|
||||
public String toCatalogueFormat() throws InternalConversionException {
|
||||
try{
|
||||
ByteArrayOutputStream baos=new ByteArrayOutputStream();
|
||||
mapper.writeValue(baos, this);
|
||||
return baos.toString();
|
||||
}catch(Throwable t) {
|
||||
throw new InternalConversionException("Unable to convert",t);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
package org.gcube.application.gfeed.oai;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
import javax.xml.bind.JAXBException;
|
||||
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.OAIClient;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIInteractionException;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIRecord;
|
||||
|
||||
public class OAIClientTests {
|
||||
|
||||
|
||||
public static void main (String[] args) throws JAXBException, OAIInteractionException {
|
||||
String baseUrl="https://data.inrae.fr/oai";
|
||||
OAIClient client=new OAIClient(baseUrl);
|
||||
Collection <OAIRecord> records=client.getAll(OAIClient.DC_METADATA_PREFIX);
|
||||
|
||||
System.out.println("Records size = "+records.size());
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,89 @@
|
|||
package org.gcube.application.gfeed.oai;
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
import javax.xml.bind.JAXBContext;
|
||||
import javax.xml.bind.JAXBException;
|
||||
import javax.xml.bind.Unmarshaller;
|
||||
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.DCRecordMetadata;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.MetadataHolder;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIMetadata;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIRecord;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class ParsingTests {
|
||||
|
||||
static JAXBContext jaxbContext;
|
||||
|
||||
@BeforeClass
|
||||
public static void init() throws JAXBException {
|
||||
|
||||
jaxbContext = JAXBContext.newInstance(OAIRecord.class,
|
||||
MetadataHolder.class,
|
||||
OAIMetadata.class,
|
||||
DCRecordMetadata.class,
|
||||
OAI_PMH.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseDC() throws JAXBException {
|
||||
File toRead=getFileFromResources("resp_dc.xml");
|
||||
|
||||
Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
|
||||
|
||||
OAI_PMH response = (OAI_PMH) jaxbUnmarshaller.unmarshal(toRead);
|
||||
|
||||
Assert.assertNotNull(response);
|
||||
Assert.assertNotNull(response.getResponseRecords().getRecords());
|
||||
|
||||
assertTrue(response.getResponseRecords().getRecords().size()>0);
|
||||
|
||||
for(OAIRecord record : response.getResponseRecords().getRecords()) {
|
||||
log.debug("Record is "+record);
|
||||
}
|
||||
|
||||
// System.out.println(response);
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void marshal() throws JAXBException {
|
||||
// jaxbContext.createMarshaller().marshal(new OAI_PMH("dummy"),System.out);
|
||||
}
|
||||
|
||||
|
||||
public static File getFileFromResources(String fileName) {
|
||||
|
||||
ClassLoader classLoader =ParsingTests.class.getClassLoader();
|
||||
|
||||
URL resource = classLoader.getResource(fileName);
|
||||
if (resource == null) {
|
||||
throw new IllegalArgumentException("file is not found!");
|
||||
} else {
|
||||
return new File(resource.getFile());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public static String readFileAsString(String path, Charset encoding)
|
||||
throws IOException
|
||||
{
|
||||
byte[] encoded = java.nio.file.Files.readAllBytes(Paths.get(path));
|
||||
return new String(encoded, encoding);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
package org.gcube.application.gfeed.oai;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
|
||||
import org.codehaus.jackson.JsonGenerationException;
|
||||
import org.codehaus.jackson.map.JsonMappingException;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
import org.gcube.data.publishing.gCatFeeder.model.CatalogueFormatData;
|
||||
import org.gcube.data.publishing.gCatFeeder.model.InternalConversionException;
|
||||
import org.gcube.data.publishing.gCatFeeder.tests.BaseCollectorTest;
|
||||
import org.gcube.data.publishing.gCatfeeder.collectors.CollectorPlugin;
|
||||
import org.gcube.data.publishing.gCatfeeder.collectors.DataCollector;
|
||||
import org.gcube.data.publishing.gCatfeeder.collectors.DataTransformer;
|
||||
import org.gcube.data.publishing.gCatfeeder.collectors.model.faults.CatalogueNotSupportedException;
|
||||
import org.gcube.data.publishing.gCatfeeder.collectors.model.faults.CollectorFault;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.OAIHarvester;
|
||||
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIRecord;
|
||||
import org.junit.Assume;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TranslationTest extends BaseCollectorTest{
|
||||
|
||||
@Test
|
||||
public void testTranslation() throws CollectorFault, CatalogueNotSupportedException, JsonGenerationException, JsonMappingException, IOException, InternalConversionException {
|
||||
Assume.assumeTrue(isTestInfrastructureEnabled());
|
||||
|
||||
System.out.println("Entering Infrastructure enabled tests..");
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
|
||||
CollectorPlugin plugin=new OAIHarvester();
|
||||
plugin.setEnvironmentConfiguration(getEnvironmentConfiguration());
|
||||
DataCollector collector=plugin.getCollector();
|
||||
Collection collected=collector.collect();
|
||||
System.out.println("Found "+collected.size()+" elements");
|
||||
for(Object obj:collected)
|
||||
System.out.println(mapper.writeValueAsString(obj)+"\n");
|
||||
|
||||
for(String destinationcatalogue : (Set<String>)plugin.getSupportedCatalogueTypes()) {
|
||||
DataTransformer<? extends CatalogueFormatData, OAIRecord> transformer=plugin.getTransformerByCatalogueType(destinationcatalogue);
|
||||
for(Object data:transformer.transform(collected))
|
||||
System.out.println(((CatalogueFormatData)data).toCatalogueFormat());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
<record>
|
||||
<header>
|
||||
<identifier>doi:10.15454/000PKT</identifier>
|
||||
<datestamp>2019-10-28T10:32:34Z</datestamp>
|
||||
<setSpec>ALL</setSpec>
|
||||
<setSpec>UMR_AMAP</setSpec>
|
||||
</header>
|
||||
<metadata>
|
||||
<oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
|
||||
<dc:title>INRA:Beet:AKER_8354</dc:title>
|
||||
<dc:identifier>https://doi.org/10.15454/000PKT</dc:identifier>
|
||||
<dc:creator>GnpIS</dc:creator>
|
||||
<dc:publisher>Portail Data Inra</dc:publisher>
|
||||
<dc:description>Abstract:AKER_8354 is a Beet accession from GnpIS.</dc:description>
|
||||
<dc:subject>Genetic Resource</dc:subject>
|
||||
<dc:language>English</dc:language>
|
||||
<dc:date>2017-05-08</dc:date>
|
||||
<dc:contributor>Rinnova</dc:contributor>
|
||||
<dc:type>Physical Object</dc:type>
|
||||
</oai_dc:dc>
|
||||
</metadata>
|
||||
</record>
|
|
@ -0,0 +1,5 @@
|
|||
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
|
||||
<responseDate>2020-05-07T14:32:09Z</responseDate>
|
||||
<request metadataPrefix="oai_dc" resumptionToken="MToxMDB8Mjp8Mzp8NDp8NTpvYWlfZGM=" verb="ListRecords">https://data.inrae.fr/oai</request>
|
||||
<error code="badArgument">ResumptionToken cannot be sent together with from, until, metadataPrefix or set parameters</error>
|
||||
</OAI-PMH>
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue