dnet-core/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/datasets/DatasetsByProjectIterator.java

159 lines
4.1 KiB
Java

package eu.dnetlib.data.collector.plugins.datasets;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Iterator;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.google.common.collect.Maps;
/**
* The Class DatasetsByProjectIterator.
*/
public class DatasetsByProjectIterator implements Iterable<String>, Iterator<String> {
private static final String SPLIT_REGEX = ";";
/** The project id key. */
public static String PROJECT_ID_KEY = "id";
/** The project name key. */
public static String PROJECT_NAME_KEY = "name";
/** The project corda id key. */
public static String PROJECT_CORDA_ID_KEY = "corda_id";
/** The current iterator. */
private Iterator<String> currentIterator;
/** The csv reader. */
private BufferedReader csvReader;
/** The current project. */
private Map<String, String> currentProject;
/** The logger. */
private static final Log log = LogFactory.getLog(DatasetsByProjectIterator.class);
/**
* Instantiates a new datasets by project iterator.
*
* @param csvInputStream
* the csv input stream
* @throws IOException
* Signals that an I/O exception has occurred.
*/
public DatasetsByProjectIterator(final InputStreamReader csvInputStream) throws IOException {
this.csvReader = new BufferedReader(csvInputStream);
this.currentProject = extractNextLine();
}
/*
* (non-Javadoc)
*
* @see java.util.Iterator#hasNext()
*/
@Override
public boolean hasNext() {
// CASE WHEN WE REACH THE LAST ITEM ON CSV
// OR WE HAD SOME PROBLEM ON GET NEXT CSV ITEM
if (this.currentProject == null) { return false; }
// IN THIS CASE WE HAVE ANOTHER DATASETS
// FOR THE CURRENT PROJECT AND RETURN TRUE
if (currentIterator != null && currentIterator.hasNext()) { return true; }
// OTHERWISE WE FINISHED TO ITERATE THE CURRENT
// SETS OF DATASETS FOR A PARTICULAR PROJECT
// SO WE HAVE TO RETRIEVE THE NEXT ITERATOR WITH
// ITEMS
this.currentProject = extractNextLine();
while (this.currentProject != null) {
currentIterator = getNextIterator();
// IF THE NEXT ITERATOR HAS ITEMS RETURN YES
// OTHERWISE THE CICLE CONTINUE
if (currentIterator.hasNext()) { return true; }
this.currentProject = extractNextLine();
}
return false;
}
/*
* (non-Javadoc)
*
* @see java.util.Iterator#next()
*/
@Override
public String next() {
return this.currentIterator.next();
}
/*
* (non-Javadoc)
*
* @see java.util.Iterator#remove()
*/
@Override
public void remove() {}
/*
* (non-Javadoc)
*
* @see java.lang.Iterable#iterator()
*/
@Override
public Iterator<String> iterator() {
if (this.currentProject != null) {
currentIterator = getNextIterator();
return this;
}
return null;
}
private Iterator<String> getNextIterator() {
QueryField q = new QueryField();
RequestField r = new RequestField();
r.setQuery(q);
q.getTerm().put("ft-techkeyword", this.currentProject.get(PROJECT_ID_KEY));
return new DatasetsIterator(r, this.currentProject.get(PROJECT_CORDA_ID_KEY), null).iterator();
}
/**
* Extract next line.
*
* @return the map
* @throws IOException
* Signals that an I/O exception has occurred.
*/
private Map<String, String> extractNextLine() {
String line;
try {
line = this.csvReader.readLine();
} catch (IOException e) {
return null;
}
// WE REACH THE END OF THE CSV
if (line == null) { return null; }
log.debug("splitting line: " + line);
String[] values = line.split(SPLIT_REGEX);
if (values == null || values.length != 4) {
log.error("Error on splitting line, the length must be 4");
return null;
}
int id = Integer.parseInt(values[0]);
String project_name = values[2];
String cordaId = values[3];
Map<String, String> splittedMap = Maps.newHashMap();
splittedMap.put(PROJECT_CORDA_ID_KEY, cordaId);
splittedMap.put(PROJECT_ID_KEY, "project" + id);
splittedMap.put(PROJECT_NAME_KEY, project_name);
log.debug(String.format("found project %s with id Corda: %s and id for API: %s", project_name, cordaId, "project" + id));
return splittedMap;
}
}