package eu.dnetlib.data.collector.plugins.datasets; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.Iterator; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.google.common.collect.Maps; /** * The Class DatasetsByProjectIterator. */ public class DatasetsByProjectIterator implements Iterable, Iterator { private static final String SPLIT_REGEX = ";"; /** The project id key. */ public static String PROJECT_ID_KEY = "id"; /** The project name key. */ public static String PROJECT_NAME_KEY = "name"; /** The project corda id key. */ public static String PROJECT_CORDA_ID_KEY = "corda_id"; /** The current iterator. */ private Iterator currentIterator; /** The csv reader. */ private BufferedReader csvReader; /** The current project. */ private Map currentProject; /** The logger. */ private static final Log log = LogFactory.getLog(DatasetsByProjectIterator.class); /** * Instantiates a new datasets by project iterator. * * @param csvInputStream * the csv input stream * @throws IOException * Signals that an I/O exception has occurred. */ public DatasetsByProjectIterator(final InputStreamReader csvInputStream) throws IOException { this.csvReader = new BufferedReader(csvInputStream); this.currentProject = extractNextLine(); } /* * (non-Javadoc) * * @see java.util.Iterator#hasNext() */ @Override public boolean hasNext() { // CASE WHEN WE REACH THE LAST ITEM ON CSV // OR WE HAD SOME PROBLEM ON GET NEXT CSV ITEM if (this.currentProject == null) { return false; } // IN THIS CASE WE HAVE ANOTHER DATASETS // FOR THE CURRENT PROJECT AND RETURN TRUE if (currentIterator != null && currentIterator.hasNext()) { return true; } // OTHERWISE WE FINISHED TO ITERATE THE CURRENT // SETS OF DATASETS FOR A PARTICULAR PROJECT // SO WE HAVE TO RETRIEVE THE NEXT ITERATOR WITH // ITEMS this.currentProject = extractNextLine(); while (this.currentProject != null) { currentIterator = getNextIterator(); // IF THE NEXT ITERATOR HAS ITEMS RETURN YES // OTHERWISE THE CICLE CONTINUE if (currentIterator.hasNext()) { return true; } this.currentProject = extractNextLine(); } return false; } /* * (non-Javadoc) * * @see java.util.Iterator#next() */ @Override public String next() { return this.currentIterator.next(); } /* * (non-Javadoc) * * @see java.util.Iterator#remove() */ @Override public void remove() {} /* * (non-Javadoc) * * @see java.lang.Iterable#iterator() */ @Override public Iterator iterator() { if (this.currentProject != null) { currentIterator = getNextIterator(); return this; } return null; } private Iterator getNextIterator() { QueryField q = new QueryField(); RequestField r = new RequestField(); r.setQuery(q); q.getTerm().put("ft-techkeyword", this.currentProject.get(PROJECT_ID_KEY)); return new DatasetsIterator(r, this.currentProject.get(PROJECT_CORDA_ID_KEY), null).iterator(); } /** * Extract next line. * * @return the map * @throws IOException * Signals that an I/O exception has occurred. */ private Map extractNextLine() { String line; try { line = this.csvReader.readLine(); } catch (IOException e) { return null; } // WE REACH THE END OF THE CSV if (line == null) { return null; } log.debug("splitting line: " + line); String[] values = line.split(SPLIT_REGEX); if (values == null || values.length != 4) { log.error("Error on splitting line, the length must be 4"); return null; } int id = Integer.parseInt(values[0]); String project_name = values[2]; String cordaId = values[3]; Map splittedMap = Maps.newHashMap(); splittedMap.put(PROJECT_CORDA_ID_KEY, cordaId); splittedMap.put(PROJECT_ID_KEY, "project" + id); splittedMap.put(PROJECT_NAME_KEY, project_name); log.debug(String.format("found project %s with id Corda: %s and id for API: %s", project_name, cordaId, "project" + id)); return splittedMap; } }