package eu.dnetlib.data.collector.plugins.datasets; import java.io.IOException; import java.io.InputStream; import java.util.Iterator; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpPost; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import com.google.gson.Gson; import com.google.gson.GsonBuilder; /** * The Class JournalIterator. */ public class DatasetsIterator implements Iterable, Iterator { /** The logger. */ private static final Log log = LogFactory.getLog(DatasetsIterator.class); /** The base url template. */ private static String BASE_URL_TEMPLATE = "http://ws.pangaea.de/es/pangaea/panmd/_search?_source=xml&size=%d&from=%d"; /** The journal id. */ private String journalId = ""; /** The journal name. */ private String journalName = ""; /** The journal issn. */ private String journalISSN = ""; /** The openaire datasource. */ private String openaireDatasource = ""; /** The total. */ private long total; /** The from. */ private int from; /** The current iterator. */ private int currentIterator; /** The current response. */ private ElasticSearchResponse currentResponse; /** The request. */ private RequestField request; /** The default size. */ private static int DEFAULT_SIZE = 10; private String projectCordaId; private static String RECORD_TEMPLATE = "%s" + "%s"; /** * Instantiates a new journal iterator. * * @param request * the request */ public DatasetsIterator(final RequestField request, final String projectCordaId, final PangaeaJournalInfo info) { this.request = request; this.setProjectCordaId(projectCordaId); if (info != null) { this.setJournalId(info.getJournalId()); this.setJournalName(StringEscapeUtils.escapeXml(info.getJournalName())); this.setJournalISSN(info.getJournalISSN()); this.setOpenaireDatasource(info.getDatasourceId()); } log.debug("Start Iterator"); } /** * Execute query. * * @param from * the from * @param size * the size * @return the string */ private String executeQuery(final int from, final int size) { log.debug("executing query " + this.request.getQuery().getTerm()); log.debug(String.format("from:%d size:%d", from, size)); CloseableHttpResponse response = null; InputStream responseBody = null; CloseableHttpClient httpclient = HttpClients.createDefault(); try { HttpPost post = new HttpPost(String.format(BASE_URL_TEMPLATE, size, from)); Gson g = new GsonBuilder().disableHtmlEscaping().create(); StringEntity entry = new StringEntity(g.toJson(this.request)); post.setEntity(entry); long start = System.currentTimeMillis(); response = httpclient.execute(post); int statusCode = response.getStatusLine().getStatusCode(); if (statusCode == 200) { responseBody = response.getEntity().getContent(); String s = IOUtils.toString(responseBody); log.debug("Request done in " + (System.currentTimeMillis() - start) + " ms"); responseBody.close(); return s; } return null; } catch (Exception e) { log.error("Error on executing query :" + request.getQuery().getTerm(), e); return null; } finally { try { responseBody.close(); response.close(); httpclient.close(); } catch (IOException e) { log.error("Can't close connections gracefully", e); } } } /** * Gets the journal id. * * @return the journalId */ public String getJournalId() { return journalId; } /** * Sets the journal id. * * @param journalId * the journalId to set */ public void setJournalId(final String journalId) { this.journalId = journalId; } /* * (non-Javadoc) * * @see java.util.Iterator#hasNext() */ @Override public boolean hasNext() { return (from + currentIterator) < total; } /* * (non-Javadoc) * * @see java.util.Iterator#next() */ @Override public String next() { String xml = String.format(RECORD_TEMPLATE, this.projectCordaId, this.journalName, this.journalISSN, this.openaireDatasource, currentResponse .getXmlRecords().get(currentIterator)); currentIterator++; if (currentIterator == DEFAULT_SIZE) { getNextItem(); } return xml; } /* * (non-Javadoc) * * @see java.util.Iterator#remove() */ @Override public void remove() { throw new UnsupportedOperationException(); } /* * (non-Javadoc) * * @see java.lang.Iterable#iterator() */ @Override public Iterator iterator() { from = 0; total = 0; getNextItem(); return this; } /** * Gets the next item. * * @return the next item */ private void getNextItem() { from += currentIterator; currentResponse = ElasticSearchResponse.createNewResponse(executeQuery(from, DEFAULT_SIZE)); total = currentResponse == null ? 0 : currentResponse.getTotal(); log.debug("from : " + from + " total of the request is " + total); currentIterator = 0; } /** * @return the projectCordaId */ public String getProjectCordaId() { return projectCordaId; } /** * @param projectCordaId * the projectCordaId to set */ public void setProjectCordaId(final String projectCordaId) { this.projectCordaId = projectCordaId; } /** * @return the journalName */ public String getJournalName() { return journalName; } /** * @param journalName * the journalName to set */ public void setJournalName(final String journalName) { this.journalName = journalName; } /** * @return the journalISSN */ public String getJournalISSN() { return journalISSN; } /** * @param journalISSN * the journalISSN to set */ public void setJournalISSN(final String journalISSN) { this.journalISSN = journalISSN; } /** * @return the openaireDatasource */ public String getOpenaireDatasource() { return openaireDatasource; } /** * @param openaireDatasource * the openaireDatasource to set */ public void setOpenaireDatasource(final String openaireDatasource) { this.openaireDatasource = openaireDatasource; } }