OAI to CKAN

This commit is contained in:
FabioISTI 2020-05-07 17:43:22 +02:00
parent 7d1a61cb6e
commit fb14ae68c7
29 changed files with 3520 additions and 2 deletions

View File

@ -84,7 +84,6 @@
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
<version>1.9.13</version>
<scope>test</scope>
</dependency>

View File

@ -93,7 +93,7 @@ public class GCatModel implements CatalogueFormatData {
}
@Setter
private String profile=profileXML;
private CkanItem item=null;

View File

@ -0,0 +1,39 @@
package org.gcube.data.publishing.gCatFeeder.utils;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.file.Paths;
public class Files {
public static File getFileFromResources(String fileName) {
ClassLoader classLoader =Files.class.getClassLoader();
URL resource = classLoader.getResource(fileName);
if (resource == null) {
throw new IllegalArgumentException("file is not found!");
} else {
return new File(resource.getFile());
}
}
public static String readFileAsString(String path, Charset encoding)
throws IOException
{
byte[] encoded = java.nio.file.Files.readAllBytes(Paths.get(path));
return new String(encoded, encoding);
}
public static String getName(String path) {
return path.substring((path.contains(File.separator)?path.lastIndexOf(File.separator)+1:0)
,(path.contains(".")?path.lastIndexOf("."):path.length()));
}
}

116
oai-harvester/pom.xml Normal file
View File

@ -0,0 +1,116 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.gcube.data-publishing.gCat-Feeder</groupId>
<artifactId>gCat-Feeder-Suite</artifactId>
<version>1.0.1</version>
</parent>
<artifactId>oai-harvester</artifactId>
<name>oai-harvester</name>
<properties>
<gitBaseUrl>https://code-repo.d4science.org/gCubeSystem</gitBaseUrl>
</properties>
<scm>
<connection>scm:git:${gitBaseUrl}/gFeed</connection>
<developerConnection>scm:git:${gitBaseUrl}/gFeed</developerConnection>
<url>${gitBaseUrl}/gFeed</url>
</scm>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.gcube.distribution</groupId>
<artifactId>gcube-bom</artifactId>
<version>1.4.0</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>org.gcube.common</groupId>
<artifactId>common-authorization</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.gcube.core</groupId>
<artifactId>common-scope</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.gcube.data-publishing.gCat-Feeder</groupId>
<artifactId>collectors-plugin-framework</artifactId>
<version>[1.0.0,2.0.0-SNAPSHOT)</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.gcube.data-publishing</groupId>
<artifactId>gcat-client</artifactId>
<version>[1.2.0,2.0.0-SNAPSHOT)</version>
</dependency>
<!-- TEST -->
<dependency>
<groupId>org.gcube.data-publishing.gCat-Feeder</groupId>
<artifactId>test-commons</artifactId>
<version>[1.0.0,2.0.0-SNAPSHOT)</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<id>make-uberjar</id>
<phase>package</phase>
</execution>
<execution>
<id>make-servicearchive</id>
<phase>package</phase>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,10 @@
package org.gcube.data.publishing.gFeed.collectors.oai;
public class Constants {
public static final String GCAT_TYPE="GCAT";
public static final String PLUGIN_ID="OAI_COLLECTOR";
}

View File

@ -0,0 +1,24 @@
package org.gcube.data.publishing.gFeed.collectors.oai;
import org.gcube.data.publishing.gCatFeeder.model.CatalogueInstanceDescriptor;
import org.gcube.data.publishing.gCatfeeder.collectors.CatalogueRetriever;
public class GCATRetriever implements CatalogueRetriever {
private static GCATRetriever instance=null;
static synchronized GCATRetriever get() {
if(instance==null) instance =new GCATRetriever();
return instance;
}
@Override
public CatalogueInstanceDescriptor getInstance() {
// throw new RuntimeException("Implement this");
// GCoreEndpoint ep=ISUtils.queryForGCoreEndpoint("NO", "NO");
return new CatalogueInstanceDescriptor();
}
}

View File

@ -0,0 +1,126 @@
package org.gcube.data.publishing.gFeed.collectors.oai;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import javax.ws.rs.client.Client;
import javax.ws.rs.client.ClientBuilder;
import javax.ws.rs.client.WebTarget;
import javax.ws.rs.core.Response;
import javax.xml.bind.JAXB;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Unmarshaller;
import javax.xml.transform.stream.StreamSource;
import org.gcube.data.publishing.gFeed.collectors.oai.model.DCRecordMetadata;
import org.gcube.data.publishing.gFeed.collectors.oai.model.MetadataHolder;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIInteractionException;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIMetadata;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIRecord;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH.Token;
import org.glassfish.jersey.client.ClientProperties;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@RequiredArgsConstructor
public class OAIClient {
private static JAXBContext jaxbContext=null;
private static synchronized JAXBContext getContext() throws JAXBException {
if(jaxbContext==null)
jaxbContext = JAXBContext.newInstance(OAIRecord.class,
MetadataHolder.class,
OAIMetadata.class,
DCRecordMetadata.class,
OAI_PMH.class);
return jaxbContext;
}
public static final String DC_METADATA_PREFIX="oai_dc";
@NonNull
private String baseUrl;
Client client;
private synchronized Client getWebClient() {
if(client==null) {
client = ClientBuilder.newClient()
.property(ClientProperties.SUPPRESS_HTTP_COMPLIANCE_VALIDATION, true);
}
return client;
}
public Collection<OAIRecord> getAll(String metadataPrefix) throws JAXBException, OAIInteractionException{
ArrayList<OAIRecord> toReturn=new ArrayList<OAIRecord>();
String resumptionToken=null;
// call & iterate
boolean isComplete=false;
while(!isComplete) {
WebTarget target=getWebClient().target(baseUrl).
queryParam("verb","ListRecords");
if(resumptionToken==null)
target=target.queryParam("metadataPrefix",metadataPrefix);
else
target=target.queryParam("resumptionToken", resumptionToken);
Response resp=target.request("application/xml").get();
OAI_PMH msg=check(resp);
if(msg.isError()) throw new OAIInteractionException(msg.getError().getCode()+ " : "+msg.getError().getMessage());
toReturn.addAll(msg.getResponseRecords().getRecords());
Token t=msg.getResponseRecords().getResumptionToken();
log.debug("Obtained token : "+t);
if(t!=null && t.getId()!=null && !t.getId().isEmpty()) {
resumptionToken=t.getId();
}else isComplete=true; //no token = completion
}
return toReturn;
}
private static OAI_PMH check(Response resp) throws JAXBException {
if(resp.getStatus()<200||resp.getStatus()>=300) {
// exception
throw new RuntimeException("Implement fault");
}else {
String respString=resp.readEntity(String.class);
Unmarshaller jaxbUnmarshaller = getContext().createUnmarshaller();
OAI_PMH obj=(OAI_PMH) jaxbUnmarshaller.unmarshal(new StringReader(respString));
return obj;
// OAI_PMH response = (OAI_PMH) jaxbUnmarshaller.unmarshal(
// new StreamSource(new StringReader(respString)));
}
}
}

View File

@ -0,0 +1,42 @@
package org.gcube.data.publishing.gFeed.collectors.oai;
import java.util.HashSet;
import java.util.Set;
import org.gcube.common.resources.gcore.ServiceEndpoint;
import org.gcube.data.publishing.gCatFeeder.utils.ISUtils;
import org.gcube.data.publishing.gCatfeeder.collectors.DataCollector;
import org.gcube.data.publishing.gCatfeeder.collectors.model.faults.CollectorFault;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIRecord;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class OAICollector implements DataCollector<OAIRecord> {
@Override
public Set<OAIRecord> collect() throws CollectorFault {
try {
HashSet<OAIRecord> toReturn=new HashSet<OAIRecord>();
String oaiCategory="";
String oaiPlatform="";
for(ServiceEndpoint epr:ISUtils.queryForServiceEndpoints(oaiCategory, oaiPlatform)) {
log.info("Found OAI Repo in resource "+epr.id()+" NAME : "+epr.profile().name());
String baseUrl=epr.profile().accessPoints().asCollection().iterator().next().address();
log.debug("Address is "+baseUrl);
OAIClient client = new OAIClient(baseUrl);
toReturn.addAll(client.getAll(OAIClient.DC_METADATA_PREFIX));
}
return toReturn;
}catch(Throwable t) {
throw new CollectorFault(t);
}
}
}

View File

@ -0,0 +1,76 @@
package org.gcube.data.publishing.gFeed.collectors.oai;
import java.util.Collections;
import java.util.Set;
import org.gcube.data.publishing.gCatFeeder.model.CatalogueFormatData;
import org.gcube.data.publishing.gCatFeeder.model.ControllerConfiguration;
import org.gcube.data.publishing.gCatFeeder.model.EnvironmentConfiguration;
import org.gcube.data.publishing.gCatfeeder.collectors.CatalogueRetriever;
import org.gcube.data.publishing.gCatfeeder.collectors.CollectorPlugin;
import org.gcube.data.publishing.gCatfeeder.collectors.DataCollector;
import org.gcube.data.publishing.gCatfeeder.collectors.DataTransformer;
import org.gcube.data.publishing.gCatfeeder.collectors.model.PluginDescriptor;
import org.gcube.data.publishing.gCatfeeder.collectors.model.faults.CatalogueNotSupportedException;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIRecord;
import org.gcube.data.publishing.gFeed.collectors.oai.model.ckan.GCatTransformer;
public class OAIHarvester implements CollectorPlugin<OAIRecord>{
@Override
public PluginDescriptor getDescriptor() {
return new PluginDescriptor(Constants.PLUGIN_ID);
}
@Override
public CatalogueRetriever getRetrieverByCatalogueType(String catalogueType) throws CatalogueNotSupportedException {
switch(catalogueType) {
case Constants.GCAT_TYPE : return GCATRetriever.get();
default : throw new CatalogueNotSupportedException("No support for "+catalogueType);
}
}
@Override
public Set<String> getSupportedCatalogueTypes() {
return Collections.singleton(Constants.GCAT_TYPE);
}
@Override
public DataTransformer<? extends CatalogueFormatData, OAIRecord> getTransformerByCatalogueType(String catalogueType)
throws CatalogueNotSupportedException {
switch(catalogueType) {
case Constants.GCAT_TYPE : return new GCatTransformer();
default : throw new CatalogueNotSupportedException("No support for "+catalogueType);
}
}
@Override
public DataCollector<OAIRecord> getCollector() {
return new OAICollector();
}
@Override
public ControllerConfiguration getPublisherControllerConfiguration(String catalogueType)
throws CatalogueNotSupportedException {
return new ControllerConfiguration();
}
@Override
public void init() throws Exception {
// TODO Auto-generated method stub
}
@Override
public void initInScope() throws Exception {
// TODO Auto-generated method stub
}
@Override
public void setEnvironmentConfiguration(EnvironmentConfiguration env) {
// TODO Auto-generated method stub
}
}

View File

@ -0,0 +1,59 @@
package org.gcube.data.publishing.gFeed.collectors.oai.model;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.ToString;
@NoArgsConstructor
@AllArgsConstructor
@ToString
@Getter
@Setter
@XmlAccessorType(XmlAccessType.FIELD)
@XmlRootElement(name = "dc", namespace="http://www.openarchives.org/OAI/2.0/oai_dc/")
public class DCRecordMetadata extends OAIMetadata{
public static final String NS="http://purl.org/dc/elements/1.1/";
@XmlElement(namespace=NS)
private String contributor;
@XmlElement(namespace=NS)
private String coverage;
@XmlElement(namespace=NS)
private String creator;
@XmlElement(namespace=NS)
private String date;
@XmlElement(namespace=NS)
private String description;
@XmlElement(namespace=NS)
private String format;
@XmlElement(namespace=NS)
private String identifier;
@XmlElement(namespace=NS)
private String language;
@XmlElement(namespace=NS)
private String publisher;
@XmlElement(namespace=NS)
private String relation;
@XmlElement(namespace=NS)
private String rights;
@XmlElement(namespace=NS)
private String source;
@XmlElement(namespace=NS)
private String subject;
@XmlElement(namespace=NS)
private String title;
@XmlElement(namespace=NS)
private String type;
}

View File

@ -0,0 +1,29 @@
package org.gcube.data.publishing.gFeed.collectors.oai.model;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementRef;
import javax.xml.bind.annotation.XmlElements;
import javax.xml.bind.annotation.XmlRootElement;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.ToString;
@NoArgsConstructor
@AllArgsConstructor
@ToString
@Getter
@Setter
@XmlAccessorType(XmlAccessType.FIELD)
@XmlRootElement(name = "metadata", namespace="http://www.openarchives.org/OAI/2.0/")
public class MetadataHolder{
@XmlElementRef
// @XmlElements({
// @XmlElement(name="dc",namespace="http://www.openarchives.org/OAI/2.0/oai_dc/", type=DCRecordMetadata.class)
// })
public OAIMetadata metadata;
}

View File

@ -0,0 +1,7 @@
package org.gcube.data.publishing.gFeed.collectors.oai.model;
public class Namespaces {
public static final String OAI_PMH_NS="http://www.openarchives.org/OAI/2.0/";
}

View File

@ -0,0 +1,30 @@
package org.gcube.data.publishing.gFeed.collectors.oai.model;
public class OAIInteractionException extends Exception {
public OAIInteractionException() {
// TODO Auto-generated constructor stub
}
public OAIInteractionException(String message) {
super(message);
// TODO Auto-generated constructor stub
}
public OAIInteractionException(Throwable cause) {
super(cause);
// TODO Auto-generated constructor stub
}
public OAIInteractionException(String message, Throwable cause) {
super(message, cause);
// TODO Auto-generated constructor stub
}
public OAIInteractionException(String message, Throwable cause, boolean enableSuppression,
boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
// TODO Auto-generated constructor stub
}
}

View File

@ -0,0 +1,8 @@
package org.gcube.data.publishing.gFeed.collectors.oai.model;
import javax.xml.bind.annotation.XmlSeeAlso;
//@XmlSeeAlso({DCRecordMetadata.class})
public abstract class OAIMetadata{
}

View File

@ -0,0 +1,49 @@
package org.gcube.data.publishing.gFeed.collectors.oai.model;
import java.util.List;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;
import org.gcube.data.publishing.gCatfeeder.collectors.model.CustomData;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.ToString;
@NoArgsConstructor
@AllArgsConstructor
@ToString
@XmlRootElement(name = "record", namespace=Namespaces.OAI_PMH_NS)
@XmlAccessorType(XmlAccessType.FIELD)
@Getter
@Setter
public class OAIRecord implements CustomData{
@NoArgsConstructor
@AllArgsConstructor
@ToString
@Getter
@Setter
@XmlAccessorType(XmlAccessType.FIELD)
@XmlRootElement(namespace=Namespaces.OAI_PMH_NS)
public static class Header{
@XmlElement(namespace=Namespaces.OAI_PMH_NS)
private String identifier;
@XmlElement(name = "datestamp", namespace=Namespaces.OAI_PMH_NS)
private String dateStamp;
@XmlElement(namespace=Namespaces.OAI_PMH_NS)
private List<String> setSpec;
}
@XmlElement(namespace=Namespaces.OAI_PMH_NS)
private Header header;
@XmlElement(name = "metadata", namespace=Namespaces.OAI_PMH_NS)
public MetadataHolder metadata;
}

View File

@ -0,0 +1,103 @@
package org.gcube.data.publishing.gFeed.collectors.oai.model;
import java.util.List;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.bind.annotation.XmlValue;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.ToString;
@NoArgsConstructor
@AllArgsConstructor
@ToString
@XmlRootElement(name="OAI-PMH", namespace=Namespaces.OAI_PMH_NS)
//@XmlRootElement(name="OAI-PMH")
@XmlAccessorType(XmlAccessType.FIELD)
@Getter
@Setter
public class OAI_PMH {
@NoArgsConstructor
@AllArgsConstructor
@ToString
@Getter
@Setter
@XmlAccessorType(XmlAccessType.FIELD)
public static class Request{
@XmlAttribute
private String metadataPrefix;
@XmlAttribute
private String verb;
@XmlValue
private String path;
}
@NoArgsConstructor
@AllArgsConstructor
@ToString
@Getter
@Setter
@XmlAccessorType(XmlAccessType.FIELD)
public static class Token{
@XmlAttribute
private int cursor;
@XmlValue
private String id;
}
@NoArgsConstructor
@AllArgsConstructor
@ToString
@Getter
@Setter
@XmlAccessorType(XmlAccessType.FIELD)
public static class ListRecords{
@XmlElement(name = "record",namespace=Namespaces.OAI_PMH_NS)
private List<OAIRecord> records;
@XmlElement(name = "resumptionToken",namespace=Namespaces.OAI_PMH_NS)
private Token resumptionToken;
}
@NoArgsConstructor
@AllArgsConstructor
@ToString
@Getter
@Setter
@XmlAccessorType(XmlAccessType.FIELD)
public static class Error{
@XmlAttribute
private String code;
@XmlValue
private String message;
}
@XmlElement(namespace=Namespaces.OAI_PMH_NS)
private String responseDate;
@XmlElement(namespace=Namespaces.OAI_PMH_NS)
private Request request;
@XmlElement(namespace=Namespaces.OAI_PMH_NS)
private Error error;
@XmlElement(name="ListRecords", namespace=Namespaces.OAI_PMH_NS)
private ListRecords responseRecords;
public boolean isError() {
return error!=null;
}
}

View File

@ -0,0 +1,56 @@
package org.gcube.data.publishing.gFeed.collectors.oai.model.ckan;
import java.util.ArrayList;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.Setter;
import lombok.ToString;
@Getter
@Setter
@NoArgsConstructor
public class CkanItem {
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@ToString
public static class CKanExtraField {
private String key;
private String value;
}
@Getter
@Setter
@NoArgsConstructor
@RequiredArgsConstructor
@ToString
public static class Tag{
@NonNull
private String name;
}
private String name;
private String title;
private String version;
@JsonProperty("private")
private Boolean privateFlag;
private String license_id;
private String author;
private String maintainer;
private String notes;
private ArrayList<Tag> tags=new ArrayList<Tag>();
private ArrayList<CKanExtraField> extras=new ArrayList<>();
}

View File

@ -0,0 +1,58 @@
package org.gcube.data.publishing.gFeed.collectors.oai.model.ckan;
import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import org.gcube.data.publishing.gCatFeeder.model.CatalogueFormatData;
import org.gcube.data.publishing.gCatFeeder.model.InternalConversionException;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@NoArgsConstructor
@AllArgsConstructor
public class GCatModel implements CatalogueFormatData {
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
public static class CkanResource {
private String name;
private String url;
private String format;
private String description;
}
private static ObjectMapper mapper=new ObjectMapper();
@Setter
private String profile=null;
@Setter
private CkanItem item;
@Setter
private ArrayList<CkanResource> resources=new ArrayList<>();
@Override
public String toCatalogueFormat() throws InternalConversionException {
try{
ByteArrayOutputStream baos=new ByteArrayOutputStream();
mapper.writeValue(baos, this);
return baos.toString();
}catch(Throwable t) {
throw new InternalConversionException("Unable to convert",t);
}
}
}

View File

@ -0,0 +1,139 @@
package org.gcube.data.publishing.gFeed.collectors.oai.model.ckan;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import org.gcube.data.publishing.gCatFeeder.utils.Files;
import org.gcube.data.publishing.gCatfeeder.collectors.DataTransformer;
import org.gcube.data.publishing.gFeed.collectors.oai.model.DCRecordMetadata;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIMetadata;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIRecord;
import org.gcube.data.publishing.gFeed.collectors.oai.model.ckan.CkanItem.CKanExtraField;
import org.gcube.data.publishing.gFeed.collectors.oai.model.ckan.GCatModel.CkanResource;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class GCatTransformer implements DataTransformer<GCatModel,OAIRecord>{
@Override
public Set<GCatModel> transform(Collection<OAIRecord> collectedData) {
boolean useProfile=true;
HashSet<GCatModel> toReturn=new HashSet<>();
for(OAIRecord record:collectedData) {
GCatModel toPublish=translate(record,useProfile);
if(useProfile) {
useProfile=false;
}
toReturn.add(toPublish);
}
return toReturn;
}
/**
* (Common) Title
* (Common) Description
* (Common) Tags: free list of keywords
* (Common) License
* (Common) Visibility: either public or private
* (Common) Version
* (Common) Author: the creator of metadata. Only one occurrence is supported;
* (Common) Maintainer:
* (Method specific) Creator: the author of the method (with email and ORCID). Repeatable field;
* (Method specific) Creation date: when the method has been released;
* (Method specific) Input: Repeatable field;
* (Method specific) Output: Repeatable field;
* (Method specific) RelatedPaper: a reference to an associated paper;
* (Method specific) Restrictions On Use: an optional text
* (Method specific) Attribution requirements: the text to use to acknowledge method usage;
*/
private static GCatModel translate(OAIRecord toTranslate,Boolean useProfile) {
GCatModel toReturn = new GCatModel();
CkanItem item=new CkanItem();
item.setName(toTranslate.getHeader().getIdentifier());
OAIMetadata meta=toTranslate.getMetadata().getMetadata();
if(meta instanceof DCRecordMetadata) {
String profileID="Harvested Object";
DCRecordMetadata dcMeta=(DCRecordMetadata) meta;
item.setTitle(dcMeta.getTitle());
item.setNotes(dcMeta.getDescription());
item.setAuthor(dcMeta.getPublisher());
item.setMaintainer(dcMeta.getPublisher());
item.setVersion("n.a.");
item.setPrivateFlag(false);
item.setLicense_id("CC-BY-NC-SA-4.0");
item.getExtras().add(new CKanExtraField("system:type", profileID));
if(dcMeta.getContributor()!=null)
item.getExtras().add(new CKanExtraField(profileID+":contributor", dcMeta.getContributor()));
if(dcMeta.getCoverage()!=null)
item.getExtras().add(new CKanExtraField(profileID+":coverage", dcMeta.getCoverage()));
if(dcMeta.getCreator()!=null)
item.getExtras().add(new CKanExtraField(profileID+":creator", dcMeta.getCreator()));
if(dcMeta.getDate()!=null)
item.getExtras().add(new CKanExtraField(profileID+":date", dcMeta.getDate()));
if(dcMeta.getDescription()!=null)
item.getExtras().add(new CKanExtraField(profileID+":description", dcMeta.getDescription()));
if(dcMeta.getFormat()!=null)
item.getExtras().add(new CKanExtraField(profileID+":format", dcMeta.getFormat()));
if(dcMeta.getIdentifier()!=null)
item.getExtras().add(new CKanExtraField(profileID+":identifier", dcMeta.getIdentifier()));
if(dcMeta.getLanguage()!=null)
item.getExtras().add(new CKanExtraField(profileID+":language", dcMeta.getLanguage()));
if(dcMeta.getPublisher()!=null)
item.getExtras().add(new CKanExtraField(profileID+":publisher", dcMeta.getPublisher()));
if(dcMeta.getRelation()!=null)
item.getExtras().add(new CKanExtraField(profileID+":relation", dcMeta.getRelation()));
if(dcMeta.getRights()!=null)
item.getExtras().add(new CKanExtraField(profileID+":rights", dcMeta.getRights()));
if(dcMeta.getSource()!=null)
item.getExtras().add(new CKanExtraField(profileID+":source", dcMeta.getSource()));
if(dcMeta.getSubject()!=null)
item.getExtras().add(new CKanExtraField(profileID+":subject", dcMeta.getSubject()));
if(dcMeta.getTitle()!=null)
item.getExtras().add(new CKanExtraField(profileID+":title", dcMeta.getTitle()));
if(dcMeta.getType()!=null)
item.getExtras().add(new CKanExtraField(profileID+":type", dcMeta.getType()));
try {
URL url=new URL(dcMeta.getIdentifier());
CkanResource res=new CkanResource("Record", url+"", url.getProtocol(), "Original record");
ArrayList<CkanResource> list=new ArrayList<>();
toReturn.setResources(list);
}catch(Throwable t) {
log.debug("Unable to set identifier "+dcMeta.getIdentifier()+"as resource ",t);
}
}
toReturn.setItem(item);
try {
if(useProfile)
toReturn.setProfile(Files.readFileAsString(
Files.getFileFromResources("HarvesterObject.xml").getAbsolutePath(),
Charset.defaultCharset())); //"Harvested Object”
}catch(Throwable t) {
log.error("Unable to set profile ",t);
}
return toReturn;
}
}

View File

@ -0,0 +1 @@
org.gcube.data.publishing.gFeed.collectors.oai.OAIHarvester.java

View File

@ -0,0 +1,99 @@
<!--
/elements/1.1
contributor, coverage, creator, date, description, format,
identifier, language, publisher, relation, rights, source, subject,
title, type -->
<metadataformat type="Harvested Object">
<metadatafield categoryref="Harvested Object">
<fieldName>contributor</fieldName>
<maxOccurs>*</maxOccurs>
<mandatory>false</mandatory>
<dataType>String</dataType>
</metadatafield>
<metadatafield categoryref="Harvested Object">
<fieldName>coverage</fieldName>
<maxOccurs>*</maxOccurs>
<mandatory>false</mandatory>
<dataType>String</dataType>
</metadatafield>
<metadatafield categoryref="Harvested Object">
<fieldName>creator</fieldName>
<maxOccurs>*</maxOccurs>
<mandatory>false</mandatory>
<dataType>String</dataType>
</metadatafield>
<metadatafield categoryref="Harvested Object">
<fieldName>date</fieldName>
<maxOccurs>*</maxOccurs>
<mandatory>false</mandatory>
<dataType>String</dataType>
</metadatafield>
<metadatafield categoryref="Harvested Object">
<fieldName>description</fieldName>
<maxOccurs>*</maxOccurs>
<mandatory>false</mandatory>
<dataType>String</dataType>
</metadatafield>
<metadatafield categoryref="Harvested Object">
<fieldName>format</fieldName>
<maxOccurs>*</maxOccurs>
<mandatory>false</mandatory>
<dataType>String</dataType>
</metadatafield>
<metadatafield categoryref="Harvested Object">
<fieldName>identifier</fieldName>
<maxOccurs>*</maxOccurs>
<mandatory>false</mandatory>
<dataType>String</dataType>
</metadatafield>
<metadatafield categoryref="Harvested Object">
<fieldName>language</fieldName>
<maxOccurs>*</maxOccurs>
<mandatory>false</mandatory>
<dataType>String</dataType>
</metadatafield>
<metadatafield categoryref="Harvested Object">
<fieldName>publisher</fieldName>
<maxOccurs>*</maxOccurs>
<mandatory>false</mandatory>
<dataType>String</dataType>
</metadatafield>
<metadatafield categoryref="Harvested Object">
<fieldName>relation</fieldName>
<maxOccurs>*</maxOccurs>
<mandatory>false</mandatory>
<dataType>String</dataType>
</metadatafield>
<metadatafield categoryref="Harvested Object">
<fieldName>rights</fieldName>
<maxOccurs>*</maxOccurs>
<mandatory>false</mandatory>
<dataType>String</dataType>
</metadatafield>
<metadatafield categoryref="Harvested Object">
<fieldName>source</fieldName>
<maxOccurs>*</maxOccurs>
<mandatory>false</mandatory>
<dataType>String</dataType>
</metadatafield>
<metadatafield categoryref="Harvested Object">
<fieldName>subject</fieldName>
<maxOccurs>*</maxOccurs>
<mandatory>false</mandatory>
<dataType>String</dataType>
</metadatafield>
<metadatafield categoryref="Harvested Object">
<fieldName>title</fieldName>
<maxOccurs>*</maxOccurs>
<mandatory>false</mandatory>
<dataType>String</dataType>
</metadatafield>
<metadatafield categoryref="Harvested Object">
<fieldName>type</fieldName>
<maxOccurs>*</maxOccurs>
<mandatory>false</mandatory>
<dataType>String</dataType>
</metadatafield>
</metadataformat>

View File

@ -0,0 +1,51 @@
package org.gcube.application.gfeed.oai;
import java.io.ByteArrayOutputStream;
import java.net.MalformedURLException;
import org.gcube.data.publishing.gCatFeeder.model.InternalConversionException;
import org.gcube.data.publishing.gCatFeeder.tests.TokenSetter;
import org.gcube.data.publishing.gFeed.collectors.oai.model.DCRecordMetadata;
import org.gcube.gcat.client.Item;
import com.fasterxml.jackson.databind.ObjectMapper;
public class CKANTest {
public static void main (String args[]) {
TokenSetter.set("/pred4s/preprod/preVRE");
}
private static ObjectMapper mapper=new ObjectMapper();
public static void updateItem(String name, String itemContent) throws MalformedURLException {
new Item().update(name, itemContent);
}
public static void createItem(String itemContent) throws MalformedURLException {
new Item().create(itemContent);
}
public static void getItem(String name) throws MalformedURLException {
new Item().read(name);
}
public String toCatalogueFormat() throws InternalConversionException {
try{
ByteArrayOutputStream baos=new ByteArrayOutputStream();
mapper.writeValue(baos, this);
return baos.toString();
}catch(Throwable t) {
throw new InternalConversionException("Unable to convert",t);
}
}
}

View File

@ -0,0 +1,23 @@
package org.gcube.application.gfeed.oai;
import java.util.Collection;
import javax.xml.bind.JAXBException;
import org.gcube.data.publishing.gFeed.collectors.oai.OAIClient;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIInteractionException;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIRecord;
public class OAIClientTests {
public static void main (String[] args) throws JAXBException, OAIInteractionException {
String baseUrl="https://data.inrae.fr/oai";
OAIClient client=new OAIClient(baseUrl);
Collection <OAIRecord> records=client.getAll(OAIClient.DC_METADATA_PREFIX);
System.out.println("Records size = "+records.size());
}
}

View File

@ -0,0 +1,89 @@
package org.gcube.application.gfeed.oai;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.file.Paths;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Unmarshaller;
import org.gcube.data.publishing.gFeed.collectors.oai.model.DCRecordMetadata;
import org.gcube.data.publishing.gFeed.collectors.oai.model.MetadataHolder;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIMetadata;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIRecord;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAI_PMH;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class ParsingTests {
static JAXBContext jaxbContext;
@BeforeClass
public static void init() throws JAXBException {
jaxbContext = JAXBContext.newInstance(OAIRecord.class,
MetadataHolder.class,
OAIMetadata.class,
DCRecordMetadata.class,
OAI_PMH.class);
}
@Test
public void parseDC() throws JAXBException {
File toRead=getFileFromResources("resp_dc.xml");
Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
OAI_PMH response = (OAI_PMH) jaxbUnmarshaller.unmarshal(toRead);
Assert.assertNotNull(response);
Assert.assertNotNull(response.getResponseRecords().getRecords());
assertTrue(response.getResponseRecords().getRecords().size()>0);
for(OAIRecord record : response.getResponseRecords().getRecords()) {
log.debug("Record is "+record);
}
// System.out.println(response);
}
@Test
public void marshal() throws JAXBException {
// jaxbContext.createMarshaller().marshal(new OAI_PMH("dummy"),System.out);
}
public static File getFileFromResources(String fileName) {
ClassLoader classLoader =ParsingTests.class.getClassLoader();
URL resource = classLoader.getResource(fileName);
if (resource == null) {
throw new IllegalArgumentException("file is not found!");
} else {
return new File(resource.getFile());
}
}
public static String readFileAsString(String path, Charset encoding)
throws IOException
{
byte[] encoded = java.nio.file.Files.readAllBytes(Paths.get(path));
return new String(encoded, encoding);
}
}

View File

@ -0,0 +1,46 @@
package org.gcube.application.gfeed.oai;
import java.io.IOException;
import java.util.Collection;
import java.util.Set;
import org.codehaus.jackson.JsonGenerationException;
import org.codehaus.jackson.map.JsonMappingException;
import org.codehaus.jackson.map.ObjectMapper;
import org.gcube.data.publishing.gCatFeeder.model.CatalogueFormatData;
import org.gcube.data.publishing.gCatFeeder.model.InternalConversionException;
import org.gcube.data.publishing.gCatFeeder.tests.BaseCollectorTest;
import org.gcube.data.publishing.gCatfeeder.collectors.CollectorPlugin;
import org.gcube.data.publishing.gCatfeeder.collectors.DataCollector;
import org.gcube.data.publishing.gCatfeeder.collectors.DataTransformer;
import org.gcube.data.publishing.gCatfeeder.collectors.model.faults.CatalogueNotSupportedException;
import org.gcube.data.publishing.gCatfeeder.collectors.model.faults.CollectorFault;
import org.gcube.data.publishing.gFeed.collectors.oai.OAIHarvester;
import org.gcube.data.publishing.gFeed.collectors.oai.model.OAIRecord;
import org.junit.Assume;
import org.junit.Test;
public class TranslationTest extends BaseCollectorTest{
@Test
public void testTranslation() throws CollectorFault, CatalogueNotSupportedException, JsonGenerationException, JsonMappingException, IOException, InternalConversionException {
Assume.assumeTrue(isTestInfrastructureEnabled());
System.out.println("Entering Infrastructure enabled tests..");
ObjectMapper mapper = new ObjectMapper();
CollectorPlugin plugin=new OAIHarvester();
plugin.setEnvironmentConfiguration(getEnvironmentConfiguration());
DataCollector collector=plugin.getCollector();
Collection collected=collector.collect();
System.out.println("Found "+collected.size()+" elements");
for(Object obj:collected)
System.out.println(mapper.writeValueAsString(obj)+"\n");
for(String destinationcatalogue : (Set<String>)plugin.getSupportedCatalogueTypes()) {
DataTransformer<? extends CatalogueFormatData, OAIRecord> transformer=plugin.getTransformerByCatalogueType(destinationcatalogue);
for(Object data:transformer.transform(collected))
System.out.println(((CatalogueFormatData)data).toCatalogueFormat());
}
}
}

View File

@ -0,0 +1,25 @@
<record>
<header>
<identifier>doi:10.15454/000PKT</identifier>
<datestamp>2019-10-28T10:32:34Z</datestamp>
<setSpec>ALL</setSpec>
<setSpec>UMR_AMAP</setSpec>
</header>
<metadata>
<oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:title>INRA:Beet:AKER_8354</dc:title>
<dc:identifier>https://doi.org/10.15454/000PKT</dc:identifier>
<dc:creator>GnpIS</dc:creator>
<dc:publisher>Portail Data Inra</dc:publisher>
<dc:description>Abstract:AKER_8354 is a Beet accession from GnpIS.</dc:description>
<dc:subject>Genetic Resource</dc:subject>
<dc:language>English</dc:language>
<dc:date>2017-05-08</dc:date>
<dc:contributor>Rinnova</dc:contributor>
<dc:type>Physical Object</dc:type>
</oai_dc:dc>
</metadata>
</record>

View File

@ -0,0 +1,5 @@
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
<responseDate>2020-05-07T14:32:09Z</responseDate>
<request metadataPrefix="oai_dc" resumptionToken="MToxMDB8Mjp8Mzp8NDp8NTpvYWlfZGM=" verb="ListRecords">https://data.inrae.fr/oai</request>
<error code="badArgument">ResumptionToken cannot be sent together with from, until, metadataPrefix or set parameters</error>
</OAI-PMH>

File diff suppressed because it is too large Load Diff

View File

@ -33,6 +33,7 @@
<module>catalogue-plugin-framework</module>
<module>gCat-Controller</module>
<module>test-commons</module>
<module>oai-harvester</module>
</modules>
<dependencyManagement>