118 lines
2.9 KiB
Java
118 lines
2.9 KiB
Java
|
package eu.dnetlib.data.collector.plugins.opentrial;
|
||
|
|
||
|
/**
|
||
|
* Created by miriam on 07/03/2017.
|
||
|
*/
|
||
|
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
|
||
|
import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException;
|
||
|
import org.apache.commons.io.IOUtils;
|
||
|
import java.net.*;
|
||
|
import java.util.Iterator;
|
||
|
import java.util.concurrent.ArrayBlockingQueue;
|
||
|
//import java.util.function.Consumer;
|
||
|
|
||
|
import org.apache.commons.logging.Log;
|
||
|
import org.apache.commons.logging.LogFactory;
|
||
|
import org.json.*;
|
||
|
|
||
|
|
||
|
|
||
|
public class OpenTrialIterator implements Iterable<String> {
|
||
|
|
||
|
private final String base_url;
|
||
|
private int total ;
|
||
|
private ArrayBlockingQueue<String> trials = new ArrayBlockingQueue<String>(100);
|
||
|
private int current = 0;
|
||
|
private static final Log log = LogFactory.getLog(OpenTrialIterator.class);
|
||
|
|
||
|
public OpenTrialIterator(String base_url, String from_date, String to_date)throws CollectorServiceException{
|
||
|
try {
|
||
|
String q = "per_page=100";
|
||
|
if (!(from_date == null)) {
|
||
|
if (!(to_date == null)) {
|
||
|
q = "q=registration_date%3A%5B" + from_date + "%20TO%20" + to_date + "%5D&" + q;
|
||
|
|
||
|
} else
|
||
|
q = "q=registration_date%3A%5B" + from_date + "%20TO%20*%5D&" + q;
|
||
|
}
|
||
|
this.base_url = base_url+ q;
|
||
|
log.info("url from which to collect " + this.base_url);
|
||
|
prepare();
|
||
|
}catch(Exception ex){
|
||
|
throw new CollectorServiceException(ex);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private void prepare()throws Exception {
|
||
|
JSONObject json = new JSONObject(getPage(1));
|
||
|
total = json.getInt("total_count");
|
||
|
log.info("Total number of entries to collect: " + total);
|
||
|
fillTrials(json);
|
||
|
}
|
||
|
|
||
|
|
||
|
@Override
|
||
|
public Iterator<String> iterator() {
|
||
|
return new Iterator<String>(){
|
||
|
|
||
|
private int page_number = 2;
|
||
|
|
||
|
|
||
|
@Override
|
||
|
public void remove(){
|
||
|
|
||
|
}
|
||
|
|
||
|
@Override
|
||
|
public String next() {
|
||
|
try {
|
||
|
if (trials.isEmpty()) {
|
||
|
JSONObject json = new JSONObject(getPage(page_number));
|
||
|
fillTrials(json);
|
||
|
page_number++;
|
||
|
}
|
||
|
return trials.poll();
|
||
|
}catch(Exception ex){
|
||
|
throw new CollectorServiceRuntimeException(ex);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
@Override
|
||
|
public boolean hasNext(){
|
||
|
log.debug("More entries to collect: (" + current + "<" + total + "=" + (current < total));
|
||
|
return (current < total || !trials.isEmpty());
|
||
|
}
|
||
|
|
||
|
|
||
|
};
|
||
|
|
||
|
}
|
||
|
|
||
|
private void fillTrials(JSONObject json)throws CollectorServiceException{
|
||
|
|
||
|
JSONArray entries = json.getJSONArray("items");
|
||
|
for(Object entry: entries) {
|
||
|
try {
|
||
|
trials.put(XML.toString(entry));
|
||
|
}catch(Exception ex){
|
||
|
throw new CollectorServiceException(ex);
|
||
|
}
|
||
|
current++;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
private String getPage(int page_number)throws CollectorServiceException {
|
||
|
|
||
|
try {
|
||
|
URL url = new URL(base_url + "&page=" + page_number);
|
||
|
URLConnection conn = url.openConnection();
|
||
|
conn.setRequestProperty("User-Agent", "Mozilla/5.0");
|
||
|
return (IOUtils.toString(conn.getInputStream()));
|
||
|
}catch(Exception ex){
|
||
|
throw new CollectorServiceException(ex);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
}
|