package eu.dnetlib.data.collector.plugins.projects.grist; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.Iterator; import java.util.List; import java.util.Queue; import java.util.concurrent.PriorityBlockingQueue; import eu.dnetlib.data.collector.rmi.CollectorServiceException; import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException; import eu.dnetlib.enabling.resultset.SizedIterable; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; public class GristProjectsIterable implements SizedIterable { private static final Log log = LogFactory.getLog(GristProjectsIterable.class); // NOPMD by marko on 11/24/08 5:02 PM private String queryURL; private int total; private SAXReader reader; public GristProjectsIterable(String baseURL) throws CollectorServiceException { queryURL = baseURL; reader = new SAXReader(); total = getTotalCount(); } @Override public int getNumberOfElements() { return total; } private int getTotalCount() throws CollectorServiceException { try { URL pageUrl = new URL(queryURL); log.debug("Getting hit count from: " + pageUrl.toString()); String resultPage = IOUtils.toString(pageUrl); Document doc = reader.read(IOUtils.toInputStream(resultPage)); String hitCount = doc.selectSingleNode("/Response/HitCount").getText(); return Integer.parseInt(hitCount); } catch (NumberFormatException e) { log.warn("Cannot set the total count from '/Response/HitCount'"); } catch (DocumentException e) { throw new CollectorServiceException(e); } catch (MalformedURLException e) { throw new CollectorServiceException(e); } catch (IOException e) { throw new CollectorServiceException(e); } return -1; } @Override public Iterator iterator() { return new Iterator() { private Queue projects = new PriorityBlockingQueue(); private boolean morePages = true; private int pageNumber = 0; private SAXReader reader = new SAXReader(); //The following is for debug only private int nextCounter = 0; @Override public boolean hasNext() { try { fillProjectListIfNeeded(); } catch (CollectorServiceException e) { throw new CollectorServiceRuntimeException(e); } return !projects.isEmpty(); } @Override public String next() { nextCounter++; log.debug(String.format("Calling next %s times. projects queue has %s elements", nextCounter, projects.size())); try { fillProjectListIfNeeded(); return projects.poll(); } catch (CollectorServiceException e) { throw new CollectorServiceRuntimeException(e); } } @Override public void remove() { throw new UnsupportedOperationException(); } private boolean fillProjectListIfNeeded() throws CollectorServiceException { if (morePages && projects.isEmpty()) { String resultPage = getNextPage(); Document doc = null; try { doc = reader.read(IOUtils.toInputStream(resultPage)); List records = doc.selectNodes("//RecordList/Record"); if (records != null && !records.isEmpty()) { for (Element p : records) { projects.add(p.asXML()); } return true; } else { log.info("No more projects to read at page nr. " + pageNumber); morePages = false; return false; } } catch (DocumentException e) { throw new CollectorServiceException(e); } } else return false; } private String getNextPage() { pageNumber++; try { URL pageUrl = new URL(queryURL + "&page=" + pageNumber); log.debug("Getting page at: " + pageUrl.toString()); return IOUtils.toString(pageUrl); } catch (Exception e) { throw new CollectorServiceRuntimeException("Error on page " + pageNumber, e); } } }; } }