first implementation of the new oai-pmh client

This commit is contained in:
Antonis Lempesis 2020-07-14 11:55:45 +00:00
parent 5b730d0870
commit cb0acf93d8
3 changed files with 157 additions and 2 deletions

View File

@ -300,8 +300,11 @@
<version>2.26.0</version> <version>2.26.0</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.3</version>
</dependency>
</dependencies> </dependencies>
<build> <build>

View File

@ -0,0 +1,134 @@
package eu.dnetlib.repo.manager.oaipmh;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.impl.client.HttpClientBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
public class OAIPMHClient {
public void verifyURL(String url) throws OAIPMHException {
try {
HttpClient client = HttpClientBuilder.create().build();
HttpUriRequest request = RequestBuilder.get().setUri(url).
addParameter("verb", "Identify").build();
HttpResponse response = client.execute(request);
if (response.getStatusLine().getStatusCode() == 200) {
if (!validateResponse(response.getEntity().getContent())) {
try {
getSets(url);
} catch (OAIPMHException e) {
throw e;
}
//throw new OAIPMHException("Response is not valid according to OAI-PMH schema");
}
} else {
throw new OAIPMHException("Error getting response from server: '" + response.getStatusLine().getReasonPhrase());
}
} catch (IOException e) {
throw new OAIPMHException("Error verifying URL ", e);
}
}
public Map<String, String> getSets(String url) throws OAIPMHException {
Map<String, String> result = new HashMap<>();
try {
HttpClient client = HttpClientBuilder.create().build();
HttpResponse response = client.execute(new HttpGet(url + "?verb=ListSets"));
if (response.getStatusLine().getStatusCode() != 200) {
throw new OAIPMHException("Error getting response from server. Error: '" + response.getStatusLine().getReasonPhrase());
}
if (!validateResponse(response.getEntity().getContent())) {
throw new OAIPMHException("Response is not valid according to OAI-PMH schema");
}
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
XPathFactory xpf = XPathFactory.newInstance();
XPath xpath = xpf.newXPath();
String token = null;
do {
RequestBuilder builder = RequestBuilder.get().setUri(url).addParameter("verb", "ListSets");
if (token != null)
builder.addParameter("resumptionToken", token);
HttpUriRequest request = builder.build();
Document xml = db.parse(client.execute(request).getEntity().getContent());
NodeList sets = (NodeList) xpath.evaluate("//*[local-name()='setSpec']", xml, XPathConstants.NODESET);
for (int i = 0; i < sets.getLength(); i++) {
String setSpec = sets.item(i).getTextContent();
String setName = sets.item(i).getNextSibling().getTextContent();
System.out.println(setSpec);
result.put(setSpec, setName);
}
token = ((Node) xpath.evaluate("//*[local-name()='resumptionToken']", xml, XPathConstants.NODE)).getTextContent();
} while (token != null && !token.isEmpty());
} catch (IOException | ParserConfigurationException | SAXException | XPathExpressionException e) {
throw new OAIPMHException("Error getting sets", e);
}
return result;
}
private boolean validateResponse(InputStream response) {
try {
SchemaFactory factory =
SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
Schema schema = factory.newSchema(new StreamSource(new URL("http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd").openStream()));
Validator validator = schema.newValidator();
validator.validate(new StreamSource(response));
return true;
} catch(Exception ex) {
ex.printStackTrace();
return false;
}
}
public static void main(String[] args) throws OAIPMHException {
// new OAIPMHClient().verifyURL("http://digital.csic.es/dspace-oai/request");
Map<String, String> sets = new OAIPMHClient().getSets("http://digital.csic.es/dspace-oai/request");
sets.entrySet().forEach(key -> {
System.out.println(sets.get(key));
});
}
}

View File

@ -0,0 +1,18 @@
package eu.dnetlib.repo.manager.oaipmh;
public class OAIPMHException extends Exception {
public OAIPMHException() {
}
public OAIPMHException(String message) {
super(message);
}
public OAIPMHException(String message, Throwable cause) {
super(message, cause);
}
public OAIPMHException(Throwable cause) {
super(cause);
}
}