275 lines
6.4 KiB
Java
275 lines
6.4 KiB
Java
package eu.dnetlib.data.collector.plugins.datasets;
|
|
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.util.Iterator;
|
|
|
|
import org.apache.commons.io.IOUtils;
|
|
import org.apache.commons.lang3.StringEscapeUtils;
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
|
import org.apache.http.client.methods.HttpPost;
|
|
import org.apache.http.entity.StringEntity;
|
|
import org.apache.http.impl.client.CloseableHttpClient;
|
|
import org.apache.http.impl.client.HttpClients;
|
|
|
|
import com.google.gson.Gson;
|
|
import com.google.gson.GsonBuilder;
|
|
|
|
/**
|
|
* The Class JournalIterator.
|
|
*/
|
|
public class DatasetsIterator implements Iterable<String>, Iterator<String> {
|
|
|
|
/** The logger. */
|
|
private static final Log log = LogFactory.getLog(DatasetsIterator.class);
|
|
|
|
/** The base url template. */
|
|
private static String BASE_URL_TEMPLATE = "http://ws.pangaea.de/es/pangaea/panmd/_search?_source=xml&size=%d&from=%d";
|
|
|
|
/** The journal id. */
|
|
private String journalId = "";
|
|
|
|
/** The journal name. */
|
|
private String journalName = "";
|
|
|
|
/** The journal issn. */
|
|
private String journalISSN = "";
|
|
|
|
/** The openaire datasource. */
|
|
private String openaireDatasource = "";
|
|
|
|
/** The total. */
|
|
private long total;
|
|
|
|
/** The from. */
|
|
private int from;
|
|
|
|
/** The current iterator. */
|
|
private int currentIterator;
|
|
|
|
/** The current response. */
|
|
private ElasticSearchResponse currentResponse;
|
|
|
|
/** The request. */
|
|
private RequestField request;
|
|
|
|
/** The default size. */
|
|
private static int DEFAULT_SIZE = 10;
|
|
|
|
private String projectCordaId;
|
|
|
|
private static String RECORD_TEMPLATE = "<datasetsRecord><oaf:projectid xmlns:oaf=\"http://namespace.openaire.eu/oaf\">%s</oaf:projectid>"
|
|
+ "<journal name='%s' issn='%s' datasourceid = '%s'/><metadata>%s</metadata></datasetsRecord>";
|
|
|
|
/**
|
|
* Instantiates a new journal iterator.
|
|
*
|
|
* @param request
|
|
* the request
|
|
*/
|
|
public DatasetsIterator(final RequestField request, final String projectCordaId, final PangaeaJournalInfo info) {
|
|
this.request = request;
|
|
this.setProjectCordaId(projectCordaId);
|
|
|
|
if (info != null) {
|
|
this.setJournalId(info.getJournalId());
|
|
this.setJournalName(StringEscapeUtils.escapeXml(info.getJournalName()));
|
|
this.setJournalISSN(info.getJournalISSN());
|
|
this.setOpenaireDatasource(info.getDatasourceId());
|
|
}
|
|
log.debug("Start Iterator");
|
|
}
|
|
|
|
/**
|
|
* Execute query.
|
|
*
|
|
* @param from
|
|
* the from
|
|
* @param size
|
|
* the size
|
|
* @return the string
|
|
*/
|
|
private String executeQuery(final int from, final int size) {
|
|
log.debug("executing query " + this.request.getQuery().getTerm());
|
|
log.debug(String.format("from:%d size:%d", from, size));
|
|
CloseableHttpResponse response = null;
|
|
InputStream responseBody = null;
|
|
CloseableHttpClient httpclient = HttpClients.createDefault();
|
|
try {
|
|
|
|
HttpPost post = new HttpPost(String.format(BASE_URL_TEMPLATE, size, from));
|
|
Gson g = new GsonBuilder().disableHtmlEscaping().create();
|
|
StringEntity entry = new StringEntity(g.toJson(this.request));
|
|
post.setEntity(entry);
|
|
long start = System.currentTimeMillis();
|
|
response = httpclient.execute(post);
|
|
int statusCode = response.getStatusLine().getStatusCode();
|
|
if (statusCode == 200) {
|
|
responseBody = response.getEntity().getContent();
|
|
String s = IOUtils.toString(responseBody);
|
|
log.debug("Request done in " + (System.currentTimeMillis() - start) + " ms");
|
|
responseBody.close();
|
|
return s;
|
|
}
|
|
return null;
|
|
} catch (Exception e) {
|
|
log.error("Error on executing query :" + request.getQuery().getTerm(), e);
|
|
return null;
|
|
} finally {
|
|
try {
|
|
responseBody.close();
|
|
response.close();
|
|
httpclient.close();
|
|
} catch (IOException e) {
|
|
log.error("Can't close connections gracefully", e);
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
/**
|
|
* Gets the journal id.
|
|
*
|
|
* @return the journalId
|
|
*/
|
|
public String getJournalId() {
|
|
return journalId;
|
|
}
|
|
|
|
/**
|
|
* Sets the journal id.
|
|
*
|
|
* @param journalId
|
|
* the journalId to set
|
|
*/
|
|
public void setJournalId(final String journalId) {
|
|
this.journalId = journalId;
|
|
}
|
|
|
|
/*
|
|
* (non-Javadoc)
|
|
*
|
|
* @see java.util.Iterator#hasNext()
|
|
*/
|
|
@Override
|
|
public boolean hasNext() {
|
|
return (from + currentIterator) < total;
|
|
}
|
|
|
|
/*
|
|
* (non-Javadoc)
|
|
*
|
|
* @see java.util.Iterator#next()
|
|
*/
|
|
@Override
|
|
public String next() {
|
|
String xml = String.format(RECORD_TEMPLATE, this.projectCordaId, this.journalName, this.journalISSN, this.openaireDatasource, currentResponse
|
|
.getXmlRecords().get(currentIterator));
|
|
currentIterator++;
|
|
if (currentIterator == DEFAULT_SIZE) {
|
|
getNextItem();
|
|
}
|
|
return xml;
|
|
}
|
|
|
|
/*
|
|
* (non-Javadoc)
|
|
*
|
|
* @see java.util.Iterator#remove()
|
|
*/
|
|
@Override
|
|
public void remove() {
|
|
throw new UnsupportedOperationException();
|
|
|
|
}
|
|
|
|
/*
|
|
* (non-Javadoc)
|
|
*
|
|
* @see java.lang.Iterable#iterator()
|
|
*/
|
|
@Override
|
|
public Iterator<String> iterator() {
|
|
from = 0;
|
|
total = 0;
|
|
getNextItem();
|
|
return this;
|
|
}
|
|
|
|
/**
|
|
* Gets the next item.
|
|
*
|
|
* @return the next item
|
|
*/
|
|
private void getNextItem() {
|
|
from += currentIterator;
|
|
currentResponse = ElasticSearchResponse.createNewResponse(executeQuery(from, DEFAULT_SIZE));
|
|
total = currentResponse == null ? 0 : currentResponse.getTotal();
|
|
log.debug("from : " + from + " total of the request is " + total);
|
|
currentIterator = 0;
|
|
}
|
|
|
|
/**
|
|
* @return the projectCordaId
|
|
*/
|
|
public String getProjectCordaId() {
|
|
return projectCordaId;
|
|
}
|
|
|
|
/**
|
|
* @param projectCordaId
|
|
* the projectCordaId to set
|
|
*/
|
|
public void setProjectCordaId(final String projectCordaId) {
|
|
this.projectCordaId = projectCordaId;
|
|
}
|
|
|
|
/**
|
|
* @return the journalName
|
|
*/
|
|
public String getJournalName() {
|
|
return journalName;
|
|
}
|
|
|
|
/**
|
|
* @param journalName
|
|
* the journalName to set
|
|
*/
|
|
public void setJournalName(final String journalName) {
|
|
this.journalName = journalName;
|
|
}
|
|
|
|
/**
|
|
* @return the journalISSN
|
|
*/
|
|
public String getJournalISSN() {
|
|
return journalISSN;
|
|
}
|
|
|
|
/**
|
|
* @param journalISSN
|
|
* the journalISSN to set
|
|
*/
|
|
public void setJournalISSN(final String journalISSN) {
|
|
this.journalISSN = journalISSN;
|
|
}
|
|
|
|
/**
|
|
* @return the openaireDatasource
|
|
*/
|
|
public String getOpenaireDatasource() {
|
|
return openaireDatasource;
|
|
}
|
|
|
|
/**
|
|
* @param openaireDatasource
|
|
* the openaireDatasource to set
|
|
*/
|
|
public void setOpenaireDatasource(final String openaireDatasource) {
|
|
this.openaireDatasource = openaireDatasource;
|
|
}
|
|
|
|
}
|