112 lines
3.5 KiB
Java
112 lines
3.5 KiB
Java
package eu.dnetlib.data.collector.plugins.ariadneplus.thanados;
|
|
|
|
import com.google.gson.Gson;
|
|
import com.google.gson.reflect.TypeToken;
|
|
import eu.dnetlib.data.collector.ThreadSafeIterator;
|
|
import eu.dnetlib.rmi.data.CollectorServiceRuntimeException;
|
|
import org.apache.commons.io.IOUtils;
|
|
import org.apache.commons.lang3.StringUtils;
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
import org.apache.http.HttpStatus;
|
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
|
import org.apache.http.client.methods.HttpGet;
|
|
import org.apache.http.impl.client.CloseableHttpClient;
|
|
import org.apache.http.impl.client.HttpClients;
|
|
|
|
import java.io.BufferedInputStream;
|
|
import java.io.IOException;
|
|
import java.lang.reflect.Type;
|
|
import java.util.List;
|
|
|
|
import net.minidev.json.JSONArray;
|
|
import net.minidev.json.JSONObject;
|
|
import net.minidev.json.parser.JSONParser;
|
|
import net.minidev.json.parser.ParseException;
|
|
|
|
public class ThanadosIterator extends ThreadSafeIterator {
|
|
|
|
private static final Log log = LogFactory.getLog(ThanadosIterator.class);
|
|
private final CloseableHttpClient client = HttpClients.createDefault();
|
|
|
|
private String baseUrl;
|
|
private List<String> identifiers;
|
|
private int counter = 0;
|
|
private String urlFormat = "%s/%s?format=xml";
|
|
private String fromDate;
|
|
|
|
|
|
public ThanadosIterator(final String baseUrl, final String listAddress, final String fromDate) {
|
|
try {
|
|
this.baseUrl = baseUrl;
|
|
this.identifiers = downloadIdentifierList(listAddress, fromDate);
|
|
this.counter = 0;
|
|
this.fromDate = fromDate;
|
|
} catch (Exception e) {
|
|
throw new CollectorServiceRuntimeException("Error creating iterator", e);
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public boolean doHasNext() {
|
|
return counter < this.identifiers.size();
|
|
}
|
|
|
|
@Override
|
|
public String doNext() {
|
|
try {
|
|
if (counter < this.identifiers.size()) {
|
|
String toDownload = String.format(urlFormat, baseUrl, identifiers.get(counter));
|
|
log.debug("Downloading "+toDownload);
|
|
System.out.println("Downloading "+toDownload);
|
|
return download(toDownload);
|
|
} else {
|
|
throw new CollectorServiceRuntimeException("Iterator has reached the end");
|
|
}
|
|
} finally {
|
|
counter++;
|
|
}
|
|
}
|
|
|
|
protected List<String> downloadIdentifierList(final String listUrl, final String fromDate) throws ParseException {
|
|
String urlToListItems = listUrl;
|
|
if(StringUtils.isNotBlank(fromDate))
|
|
urlToListItems = listUrl+"/"+fromDate;
|
|
log.info("Getting list of items from "+urlToListItems);
|
|
String response = download(urlToListItems);
|
|
JSONObject map = (JSONObject)(new JSONParser(JSONParser.MODE_PERMISSIVE).parse(response));
|
|
final String sites = map.getAsString("\"sites\"");
|
|
Gson converter = new Gson();
|
|
Type type = new TypeToken<List<String>>(){}.getType();
|
|
return converter.fromJson(sites, type );
|
|
|
|
}
|
|
|
|
private String download(final String url) {
|
|
log.debug("download: "+url);
|
|
final HttpGet method = new HttpGet(url);
|
|
|
|
try(CloseableHttpResponse response = client.execute(method)) {
|
|
int statusCode = response.getStatusLine().getStatusCode();
|
|
|
|
if (HttpStatus.SC_OK == statusCode) {
|
|
return IOUtils.toString(new BufferedInputStream(response.getEntity().getContent()), "utf-8");
|
|
} else {
|
|
throw new CollectorServiceRuntimeException("Error " + statusCode + " dowloading url: " + url);
|
|
}
|
|
|
|
} catch (IOException e) {
|
|
throw new CollectorServiceRuntimeException("Error downloading url: " + url);
|
|
}
|
|
}
|
|
|
|
|
|
public List<String> getIdentifiers() {
|
|
return identifiers;
|
|
}
|
|
|
|
public void setIdentifiers(List<String> identifiers) {
|
|
this.identifiers = identifiers;
|
|
}
|
|
}
|