package eu.dnetlib.data.collector.plugins.datasources; import java.io.InputStream; import java.util.Iterator; import java.util.NoSuchElementException; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import eu.dnetlib.data.collector.plugins.HttpConnector; import eu.dnetlib.data.collector.rmi.CollectorServiceException; import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner; public class Re3DataRepositoriesIterator implements Iterator, Iterable { private static final Log log = LogFactory.getLog(Re3DataRepositoriesIterator.class); // NOPMD by marko on 11/24/08 5:02 PM private String baseURL; private XMLStreamReader reader; private int countedRepos = 0; private String currentRepoPath = null; private HttpConnector httpConnector; @Override public boolean hasNext() { return currentRepoPath != null; } @Override public String next() { if (currentRepoPath == null) throw new NoSuchElementException(); try { String repoInfo = getRepositoryInfo(currentRepoPath); return repoInfo; } finally { currentRepoPath = moveToNextRepo(); } } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public Iterator iterator() { return this; } public Re3DataRepositoriesIterator(final InputStream xmlInputStream, final String baseUrl, final HttpConnector httpConnector) throws CollectorServiceException { this.httpConnector = httpConnector; XMLInputFactory factory = XMLInputFactory.newInstance(); try { reader = factory.createXMLStreamReader(xmlInputStream); } catch (XMLStreamException e) { throw new CollectorServiceException(e); } baseURL = baseUrl; // try to fetch the 1st currentRepoPath = moveToNextRepo(); } private String getNextRepositoryPath() { return reader.getAttributeValue(null, "href"); } private String moveToNextRepo() { try { while (reader.hasNext()) { int event = reader.next(); if (event == XMLStreamConstants.START_ELEMENT) { String elementName = reader.getLocalName(); if (elementName.equals("link")) { String repoPath = getNextRepositoryPath(); log.debug(String.format("Found %s repositories. The last has link %s", ++countedRepos, repoPath)); return repoPath; } } } log.info("Seems there are no more repository to iterate on. Total: " + countedRepos); return null; } catch (XMLStreamException e) { throw new CollectorServiceRuntimeException(e); } } private String getRepositoryInfo(final String repositoryPath) throws CollectorServiceRuntimeException { String targetURL = repositoryPath; if(!repositoryPath.startsWith(baseURL)) targetURL = baseURL + repositoryPath; try { log.info(targetURL); String inputSource = getHttpConnector().getInputSource(targetURL); return XmlCleaner.cleanAllEntities(inputSource); } catch (CollectorServiceException e) { throw new CollectorServiceRuntimeException("OOOPS something bad happen getting repo info from " + targetURL, e); } } // public String testAccess(){ // return getRepositoryInfo("/api/v1/repository/r3d100012823"); // } public String getBaseURL() { return baseURL; } public void setBaseURL(final String baseURL) { this.baseURL = baseURL; } public int getCountedRepos() { return countedRepos; } public void setCountedRepos(final int countedRepos) { this.countedRepos = countedRepos; } public XMLStreamReader getReader() { return reader; } public void setReader(final XMLStreamReader reader) { this.reader = reader; } public String getCurrentRepoPath() { return currentRepoPath; } public void setCurrentRepoPath(final String currentRepoPath) { this.currentRepoPath = currentRepoPath; } public HttpConnector getHttpConnector() { return httpConnector; } public void setHttpConnector(final HttpConnector httpConnector) { this.httpConnector = httpConnector; } }