From cb0acf93d8844428fae24db3d61a8b9948e9f8cf Mon Sep 17 00:00:00 2001 From: Antonis Lempesis Date: Tue, 14 Jul 2020 11:55:45 +0000 Subject: [PATCH] first implementation of the new oai-pmh client --- pom.xml | 7 +- .../repo/manager/oaipmh/OAIPMHClient.java | 134 ++++++++++++++++++ .../repo/manager/oaipmh/OAIPMHException.java | 18 +++ 3 files changed, 157 insertions(+), 2 deletions(-) create mode 100644 src/main/java/eu/dnetlib/repo/manager/oaipmh/OAIPMHClient.java create mode 100644 src/main/java/eu/dnetlib/repo/manager/oaipmh/OAIPMHException.java diff --git a/pom.xml b/pom.xml index a7dccd0..ec2f399 100644 --- a/pom.xml +++ b/pom.xml @@ -300,8 +300,11 @@ 2.26.0 test - - + + org.apache.httpcomponents + httpclient + 4.5.3 + diff --git a/src/main/java/eu/dnetlib/repo/manager/oaipmh/OAIPMHClient.java b/src/main/java/eu/dnetlib/repo/manager/oaipmh/OAIPMHClient.java new file mode 100644 index 0000000..a6f4c58 --- /dev/null +++ b/src/main/java/eu/dnetlib/repo/manager/oaipmh/OAIPMHClient.java @@ -0,0 +1,134 @@ +package eu.dnetlib.repo.manager.oaipmh; + +import org.apache.http.HttpRequest; +import org.apache.http.HttpResponse; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpUriRequest; +import org.apache.http.client.methods.RequestBuilder; +import org.apache.http.impl.client.HttpClientBuilder; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +import javax.xml.XMLConstants; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.stream.StreamSource; +import javax.xml.validation.Schema; +import javax.xml.validation.SchemaFactory; +import javax.xml.validation.Validator; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.HashMap; +import java.util.Map; + +public class OAIPMHClient { + public void verifyURL(String url) throws OAIPMHException { + try { + HttpClient client = HttpClientBuilder.create().build(); + HttpUriRequest request = RequestBuilder.get().setUri(url). + addParameter("verb", "Identify").build(); + + HttpResponse response = client.execute(request); + + if (response.getStatusLine().getStatusCode() == 200) { + if (!validateResponse(response.getEntity().getContent())) { + + try { + getSets(url); + } catch (OAIPMHException e) { + throw e; + } + //throw new OAIPMHException("Response is not valid according to OAI-PMH schema"); + } + } else { + throw new OAIPMHException("Error getting response from server: '" + response.getStatusLine().getReasonPhrase()); + } + } catch (IOException e) { + throw new OAIPMHException("Error verifying URL ", e); + } + + } + + public Map getSets(String url) throws OAIPMHException { + Map result = new HashMap<>(); + + try { + HttpClient client = HttpClientBuilder.create().build(); + + HttpResponse response = client.execute(new HttpGet(url + "?verb=ListSets")); + + if (response.getStatusLine().getStatusCode() != 200) { + throw new OAIPMHException("Error getting response from server. Error: '" + response.getStatusLine().getReasonPhrase()); + } + + if (!validateResponse(response.getEntity().getContent())) { + throw new OAIPMHException("Response is not valid according to OAI-PMH schema"); + } + + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilder db = dbf.newDocumentBuilder(); + XPathFactory xpf = XPathFactory.newInstance(); + XPath xpath = xpf.newXPath(); + + String token = null; + do { + RequestBuilder builder = RequestBuilder.get().setUri(url).addParameter("verb", "ListSets"); + + if (token != null) + builder.addParameter("resumptionToken", token); + + HttpUriRequest request = builder.build(); + + Document xml = db.parse(client.execute(request).getEntity().getContent()); + NodeList sets = (NodeList) xpath.evaluate("//*[local-name()='setSpec']", xml, XPathConstants.NODESET); + + for (int i = 0; i < sets.getLength(); i++) { + String setSpec = sets.item(i).getTextContent(); + String setName = sets.item(i).getNextSibling().getTextContent(); + + System.out.println(setSpec); + result.put(setSpec, setName); + } + + token = ((Node) xpath.evaluate("//*[local-name()='resumptionToken']", xml, XPathConstants.NODE)).getTextContent(); + } while (token != null && !token.isEmpty()); + } catch (IOException | ParserConfigurationException | SAXException | XPathExpressionException e) { + throw new OAIPMHException("Error getting sets", e); + } + + return result; + } + + private boolean validateResponse(InputStream response) { + try { + SchemaFactory factory = + SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); + Schema schema = factory.newSchema(new StreamSource(new URL("http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd").openStream())); + Validator validator = schema.newValidator(); + validator.validate(new StreamSource(response)); + return true; + } catch(Exception ex) { + ex.printStackTrace(); + return false; + } + } + + public static void main(String[] args) throws OAIPMHException { +// new OAIPMHClient().verifyURL("http://digital.csic.es/dspace-oai/request"); + + Map sets = new OAIPMHClient().getSets("http://digital.csic.es/dspace-oai/request"); + + sets.entrySet().forEach(key -> { + System.out.println(sets.get(key)); + }); + } +} diff --git a/src/main/java/eu/dnetlib/repo/manager/oaipmh/OAIPMHException.java b/src/main/java/eu/dnetlib/repo/manager/oaipmh/OAIPMHException.java new file mode 100644 index 0000000..b6a82ae --- /dev/null +++ b/src/main/java/eu/dnetlib/repo/manager/oaipmh/OAIPMHException.java @@ -0,0 +1,18 @@ +package eu.dnetlib.repo.manager.oaipmh; + +public class OAIPMHException extends Exception { + public OAIPMHException() { + } + + public OAIPMHException(String message) { + super(message); + } + + public OAIPMHException(String message, Throwable cause) { + super(message, cause); + } + + public OAIPMHException(Throwable cause) { + super(cause); + } +}