137 lines
4.0 KiB
Java
137 lines
4.0 KiB
Java
package eu.dnetlib.data.collector.plugins.projects.grist;
|
|
|
|
import java.io.IOException;
|
|
import java.net.MalformedURLException;
|
|
import java.net.URL;
|
|
import java.util.Iterator;
|
|
import java.util.List;
|
|
import java.util.Queue;
|
|
import java.util.concurrent.PriorityBlockingQueue;
|
|
|
|
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
|
|
import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException;
|
|
import eu.dnetlib.enabling.resultset.SizedIterable;
|
|
import org.apache.commons.io.IOUtils;
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
import org.dom4j.Document;
|
|
import org.dom4j.DocumentException;
|
|
import org.dom4j.Element;
|
|
import org.dom4j.io.SAXReader;
|
|
|
|
public class GristProjectsIterable implements SizedIterable<String> {
|
|
|
|
private static final Log log = LogFactory.getLog(GristProjectsIterable.class); // NOPMD by marko on 11/24/08 5:02 PM
|
|
|
|
private String queryURL;
|
|
private int total;
|
|
private SAXReader reader;
|
|
|
|
public GristProjectsIterable(String baseURL) throws CollectorServiceException {
|
|
queryURL = baseURL;
|
|
reader = new SAXReader();
|
|
total = getTotalCount();
|
|
}
|
|
|
|
@Override
|
|
public int getNumberOfElements() {
|
|
return total;
|
|
}
|
|
|
|
private int getTotalCount() throws CollectorServiceException {
|
|
try {
|
|
URL pageUrl = new URL(queryURL);
|
|
log.debug("Getting hit count from: " + pageUrl.toString());
|
|
String resultPage = IOUtils.toString(pageUrl);
|
|
Document doc = reader.read(IOUtils.toInputStream(resultPage));
|
|
String hitCount = doc.selectSingleNode("/Response/HitCount").getText();
|
|
return Integer.parseInt(hitCount);
|
|
} catch (NumberFormatException e) {
|
|
log.warn("Cannot set the total count from '/Response/HitCount'");
|
|
} catch (DocumentException e) {
|
|
throw new CollectorServiceException(e);
|
|
} catch (MalformedURLException e) {
|
|
throw new CollectorServiceException(e);
|
|
} catch (IOException e) {
|
|
throw new CollectorServiceException(e);
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
@Override
|
|
public Iterator<String> iterator() {
|
|
return new Iterator<String>() {
|
|
|
|
private Queue<String> projects = new PriorityBlockingQueue<String>();
|
|
private boolean morePages = true;
|
|
private int pageNumber = 0;
|
|
private SAXReader reader = new SAXReader();
|
|
//The following is for debug only
|
|
private int nextCounter = 0;
|
|
|
|
@Override
|
|
public boolean hasNext() {
|
|
try {
|
|
fillProjectListIfNeeded();
|
|
} catch (CollectorServiceException e) {
|
|
throw new CollectorServiceRuntimeException(e);
|
|
}
|
|
return !projects.isEmpty();
|
|
}
|
|
|
|
@Override
|
|
public String next() {
|
|
nextCounter++;
|
|
log.debug(String.format("Calling next %s times. projects queue has %s elements", nextCounter, projects.size()));
|
|
try {
|
|
fillProjectListIfNeeded();
|
|
return projects.poll();
|
|
} catch (CollectorServiceException e) {
|
|
throw new CollectorServiceRuntimeException(e);
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public void remove() {
|
|
throw new UnsupportedOperationException();
|
|
}
|
|
|
|
private boolean fillProjectListIfNeeded() throws CollectorServiceException {
|
|
if (morePages && projects.isEmpty()) {
|
|
String resultPage = getNextPage();
|
|
Document doc = null;
|
|
try {
|
|
doc = reader.read(IOUtils.toInputStream(resultPage));
|
|
List<Element> records = doc.selectNodes("//RecordList/Record");
|
|
if (records != null && !records.isEmpty()) {
|
|
for (Element p : records) {
|
|
|
|
projects.add(p.asXML());
|
|
}
|
|
return true;
|
|
} else {
|
|
log.info("No more projects to read at page nr. " + pageNumber);
|
|
morePages = false;
|
|
return false;
|
|
}
|
|
} catch (DocumentException e) {
|
|
throw new CollectorServiceException(e);
|
|
}
|
|
} else return false;
|
|
}
|
|
|
|
private String getNextPage() {
|
|
pageNumber++;
|
|
try {
|
|
URL pageUrl = new URL(queryURL + "&page=" + pageNumber);
|
|
log.debug("Getting page at: " + pageUrl.toString());
|
|
return IOUtils.toString(pageUrl);
|
|
} catch (Exception e) {
|
|
throw new CollectorServiceRuntimeException("Error on page " + pageNumber, e);
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
}
|