Collector Plugin for Thanados and support for collections in collections #3
|
@ -36,6 +36,11 @@
|
||||||
<artifactId>dnet-msro-service</artifactId>
|
<artifactId>dnet-msro-service</artifactId>
|
||||||
<version>[7.0.0-SAXONHE-SOLR772-SNAPSHOT, 8.0.0-SAXONHE)</version>
|
<version>[7.0.0-SAXONHE-SOLR772-SNAPSHOT, 8.0.0-SAXONHE)</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>net.minidev</groupId>
|
||||||
|
<artifactId>json-smart</artifactId>
|
||||||
|
<version>2.4.8</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
|
|
|
@ -4,6 +4,10 @@ import eu.dnetlib.data.collector.plugins.httplist.HttpListIterator;
|
||||||
import eu.dnetlib.rmi.data.CollectorServiceException;
|
import eu.dnetlib.rmi.data.CollectorServiceException;
|
||||||
import eu.dnetlib.rmi.data.InterfaceDescriptor;
|
import eu.dnetlib.rmi.data.InterfaceDescriptor;
|
||||||
import eu.dnetlib.rmi.data.plugin.AbstractCollectorPlugin;
|
import eu.dnetlib.rmi.data.plugin.AbstractCollectorPlugin;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.commons.lang3.time.DateUtils;
|
||||||
|
|
||||||
|
import java.util.Date;
|
||||||
|
|
||||||
public class ThanadosCollectorPlugin extends AbstractCollectorPlugin {
|
public class ThanadosCollectorPlugin extends AbstractCollectorPlugin {
|
||||||
|
|
||||||
|
@ -12,6 +16,6 @@ public class ThanadosCollectorPlugin extends AbstractCollectorPlugin {
|
||||||
final String baseUrl = interfaceDescriptor.getBaseUrl();
|
final String baseUrl = interfaceDescriptor.getBaseUrl();
|
||||||
final String listAddress = interfaceDescriptor.getParams().get("listUrl");
|
final String listAddress = interfaceDescriptor.getParams().get("listUrl");
|
||||||
|
|
||||||
return () -> new ThanadosIterator(baseUrl, listAddress);
|
return () -> new ThanadosIterator(baseUrl, listAddress, fromDate);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,7 @@ import com.google.gson.reflect.TypeToken;
|
||||||
import eu.dnetlib.data.collector.ThreadSafeIterator;
|
import eu.dnetlib.data.collector.ThreadSafeIterator;
|
||||||
import eu.dnetlib.rmi.data.CollectorServiceRuntimeException;
|
import eu.dnetlib.rmi.data.CollectorServiceRuntimeException;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.http.HttpStatus;
|
import org.apache.http.HttpStatus;
|
||||||
|
@ -18,6 +19,11 @@ import java.io.IOException;
|
||||||
import java.lang.reflect.Type;
|
import java.lang.reflect.Type;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import net.minidev.json.JSONArray;
|
||||||
|
import net.minidev.json.JSONObject;
|
||||||
|
import net.minidev.json.parser.JSONParser;
|
||||||
|
import net.minidev.json.parser.ParseException;
|
||||||
|
|
||||||
public class ThanadosIterator extends ThreadSafeIterator {
|
public class ThanadosIterator extends ThreadSafeIterator {
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(ThanadosIterator.class);
|
private static final Log log = LogFactory.getLog(ThanadosIterator.class);
|
||||||
|
@ -27,12 +33,15 @@ public class ThanadosIterator extends ThreadSafeIterator {
|
||||||
private List<String> identifiers;
|
private List<String> identifiers;
|
||||||
private int counter = 0;
|
private int counter = 0;
|
||||||
private String urlFormat = "%s/%s?format=xml";
|
private String urlFormat = "%s/%s?format=xml";
|
||||||
|
private String fromDate;
|
||||||
|
|
||||||
public ThanadosIterator(final String baseUrl, final String listAddress) {
|
|
||||||
|
public ThanadosIterator(final String baseUrl, final String listAddress, final String fromDate) {
|
||||||
try {
|
try {
|
||||||
this.baseUrl = baseUrl;
|
this.baseUrl = baseUrl;
|
||||||
this.identifiers = downloadIdentifierList(listAddress);
|
this.identifiers = downloadIdentifierList(listAddress, fromDate);
|
||||||
this.counter = 0;
|
this.counter = 0;
|
||||||
|
this.fromDate = fromDate;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new CollectorServiceRuntimeException("Error creating iterator", e);
|
throw new CollectorServiceRuntimeException("Error creating iterator", e);
|
||||||
}
|
}
|
||||||
|
@ -59,12 +68,17 @@ public class ThanadosIterator extends ThreadSafeIterator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<String> downloadIdentifierList(final String listUrl) {
|
protected List<String> downloadIdentifierList(final String listUrl, final String fromDate) throws ParseException {
|
||||||
|
String urlToListItems = listUrl;
|
||||||
String list = download(listUrl);
|
if(StringUtils.isNotBlank(fromDate))
|
||||||
|
urlToListItems = listUrl+"/"+fromDate;
|
||||||
|
log.info("Getting list of items from "+urlToListItems);
|
||||||
|
String response = download(urlToListItems);
|
||||||
|
JSONObject map = (JSONObject)(new JSONParser(JSONParser.MODE_PERMISSIVE).parse(response));
|
||||||
|
final String sites = map.getAsString("\"sites\"");
|
||||||
Gson converter = new Gson();
|
Gson converter = new Gson();
|
||||||
Type type = new TypeToken<List<String>>(){}.getType();
|
Type type = new TypeToken<List<String>>(){}.getType();
|
||||||
return converter.fromJson(list, type );
|
return converter.fromJson(sites, type );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@ public class ThanadosIteratorTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDownloadList(){
|
public void testDownloadList(){
|
||||||
it = new ThanadosIterator("", sitelist);
|
it = new ThanadosIterator("", sitelist, "");
|
||||||
it.getIdentifiers().stream().forEach(id -> System.out.println(id));
|
it.getIdentifiers().stream().forEach(id -> System.out.println(id));
|
||||||
System.out.println(it.getIdentifiers().size());
|
System.out.println(it.getIdentifiers().size());
|
||||||
}
|
}
|
||||||
|
@ -19,7 +19,7 @@ public class ThanadosIteratorTest {
|
||||||
@Test
|
@Test
|
||||||
public void testDownload(){
|
public void testDownload(){
|
||||||
int count = 0;
|
int count = 0;
|
||||||
it = new ThanadosIterator("https://thanados.openatlas.eu/api/0.3/subunits", sitelist);
|
it = new ThanadosIterator("https://thanados.openatlas.eu/api/0.3/subunits", sitelist, "");
|
||||||
while(it.hasNext()){
|
while(it.hasNext()){
|
||||||
it.next();
|
it.next();
|
||||||
count++;
|
count++;
|
||||||
|
@ -29,4 +29,11 @@ public class ThanadosIteratorTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIncremental(){
|
||||||
|
it = new ThanadosIterator("", sitelist, "2022-03-12");
|
||||||
|
it.getIdentifiers().stream().forEach(id -> System.out.println(id));
|
||||||
|
System.out.println(it.getIdentifiers().size());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue