package eu.dnetlib.data.collector.plugins.ariadneplus.thanados; import com.google.gson.Gson; import com.google.gson.reflect.TypeToken; import eu.dnetlib.data.collector.ThreadSafeIterator; import eu.dnetlib.rmi.data.CollectorServiceRuntimeException; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.http.HttpStatus; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import java.io.BufferedInputStream; import java.io.IOException; import java.lang.reflect.Type; import java.util.List; import net.minidev.json.JSONArray; import net.minidev.json.JSONObject; import net.minidev.json.parser.JSONParser; import net.minidev.json.parser.ParseException; public class ThanadosIterator extends ThreadSafeIterator { private static final Log log = LogFactory.getLog(ThanadosIterator.class); private final CloseableHttpClient client = HttpClients.createDefault(); private String baseUrl; private List identifiers; private int counter = 0; private String urlFormat = "%s/%s?format=xml"; private String fromDate; public ThanadosIterator(final String baseUrl, final String listAddress, final String fromDate) { try { this.baseUrl = baseUrl; this.identifiers = downloadIdentifierList(listAddress, fromDate); this.counter = 0; this.fromDate = fromDate; } catch (Exception e) { throw new CollectorServiceRuntimeException("Error creating iterator", e); } } @Override public boolean doHasNext() { return counter < this.identifiers.size(); } @Override public String doNext() { try { if (counter < this.identifiers.size()) { String toDownload = String.format(urlFormat, baseUrl, identifiers.get(counter)); log.debug("Downloading "+toDownload); System.out.println("Downloading "+toDownload); return download(toDownload); } else { throw new CollectorServiceRuntimeException("Iterator has reached the end"); } } finally { counter++; } } protected List downloadIdentifierList(final String listUrl, final String fromDate) throws ParseException { String urlToListItems = listUrl; if(StringUtils.isNotBlank(fromDate)) urlToListItems = listUrl+"/"+fromDate; log.info("Getting list of items from "+urlToListItems); String response = download(urlToListItems); JSONObject map = (JSONObject)(new JSONParser(JSONParser.MODE_PERMISSIVE).parse(response)); final String sites = map.getAsString("\"sites\""); Gson converter = new Gson(); Type type = new TypeToken>(){}.getType(); return converter.fromJson(sites, type ); } private String download(final String url) { log.debug("download: "+url); final HttpGet method = new HttpGet(url); try(CloseableHttpResponse response = client.execute(method)) { int statusCode = response.getStatusLine().getStatusCode(); if (HttpStatus.SC_OK == statusCode) { return IOUtils.toString(new BufferedInputStream(response.getEntity().getContent()), "utf-8"); } else { throw new CollectorServiceRuntimeException("Error " + statusCode + " dowloading url: " + url); } } catch (IOException e) { throw new CollectorServiceRuntimeException("Error downloading url: " + url); } } public List getIdentifiers() { return identifiers; } public void setIdentifiers(List identifiers) { this.identifiers = identifiers; } }