dnet-core/dnet-data-services/src/main/java/eu/dnetlib/data/collector/plugins/opentrial/OpenTrialIterator.java

118 lines
2.9 KiB
Java

package eu.dnetlib.data.collector.plugins.opentrial;
/**
* Created by miriam on 07/03/2017.
*/
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException;
import org.apache.commons.io.IOUtils;
import java.net.*;
import java.util.Iterator;
import java.util.concurrent.ArrayBlockingQueue;
//import java.util.function.Consumer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.json.*;
public class OpenTrialIterator implements Iterable<String> {
private final String base_url;
private int total ;
private ArrayBlockingQueue<String> trials = new ArrayBlockingQueue<String>(100);
private int current = 0;
private static final Log log = LogFactory.getLog(OpenTrialIterator.class);
public OpenTrialIterator(String base_url, String from_date, String to_date)throws CollectorServiceException{
try {
String q = "per_page=100";
if (!(from_date == null)) {
if (!(to_date == null)) {
q = "q=registration_date%3A%5B" + from_date + "%20TO%20" + to_date + "%5D&" + q;
} else
q = "q=registration_date%3A%5B" + from_date + "%20TO%20*%5D&" + q;
}
this.base_url = base_url+ q;
log.info("url from which to collect " + this.base_url);
prepare();
}catch(Exception ex){
throw new CollectorServiceException(ex);
}
}
private void prepare()throws Exception {
JSONObject json = new JSONObject(getPage(1));
total = json.getInt("total_count");
log.info("Total number of entries to collect: " + total);
fillTrials(json);
}
@Override
public Iterator<String> iterator() {
return new Iterator<String>(){
private int page_number = 2;
@Override
public void remove(){
}
@Override
public String next() {
try {
if (trials.isEmpty()) {
JSONObject json = new JSONObject(getPage(page_number));
fillTrials(json);
page_number++;
}
return trials.poll();
}catch(Exception ex){
throw new CollectorServiceRuntimeException(ex);
}
}
@Override
public boolean hasNext(){
log.debug("More entries to collect: (" + current + "<" + total + "=" + (current < total));
return (current < total || !trials.isEmpty());
}
};
}
private void fillTrials(JSONObject json)throws CollectorServiceException{
JSONArray entries = json.getJSONArray("items");
for(Object entry: entries) {
try {
trials.put(XML.toString(entry));
}catch(Exception ex){
throw new CollectorServiceException(ex);
}
current++;
}
}
private String getPage(int page_number)throws CollectorServiceException {
try {
URL url = new URL(base_url + "&page=" + page_number);
URLConnection conn = url.openConnection();
conn.setRequestProperty("User-Agent", "Mozilla/5.0");
return (IOUtils.toString(conn.getInputStream()));
}catch(Exception ex){
throw new CollectorServiceException(ex);
}
}
}