AriadnePlus/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/ads/FileJSONCollectorPlugin.java

139 lines
3.9 KiB
Java

package eu.dnetlib.data.collector.plugins.ariadneplus.ads;
import java.io.File;
import java.io.FileReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.stream.JsonReader;
import eu.dnetlib.data.collector.plugins.FileCollectorPlugin;
import eu.dnetlib.rmi.data.CollectorServiceException;
import eu.dnetlib.rmi.data.InterfaceDescriptor;
public class FileJSONCollectorPlugin extends FileCollectorPlugin {
private static final Log log = LogFactory.getLog(FileJSONCollectorPlugin.class);
@Override
public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
throws CollectorServiceException {
log.info("FileJSONCollectorPlugin");
final String identifierFieldKey = interfaceDescriptor.getParams().get("identifierFieldKey");
URL u = null;
try {
u = new URL(interfaceDescriptor.getBaseUrl());
} catch (MalformedURLException e1) {
e1.printStackTrace();
throw new CollectorServiceException(e1);
}
final String baseUrl = u.getPath();
log.info("base URL = " + baseUrl);
try {
final FileReader fileReader = new FileReader(new File(baseUrl));
return new Iterable<String>() {
@Override
public Iterator<String> iterator() {
return new FileJSONIterator(fileReader, identifierFieldKey);
}
};
} catch (Exception e) {
e.printStackTrace();
throw new CollectorServiceException(e);
}
}
class FileJSONIterator implements Iterator<String> {
private String next;
private String identifierFieldKey;
private Iterator<JsonElement> aatInfosIterator;
private JsonReader jsonReader;
public FileJSONIterator(final FileReader fileReader, final String identifierFieldKey) {
this.identifierFieldKey = identifierFieldKey;
jsonReader = new JsonReader(fileReader);
JsonElement jsonElement = new JsonParser().parse(jsonReader);
JsonArray jsonArrays = jsonElement.getAsJsonArray();
aatInfosIterator = jsonArrays.iterator();
next = calculateNext();
}
@Override
public boolean hasNext() {
return next != null;
}
@Override
public String next() {
String s = next;
next = calculateNext();
return s;
}
private String calculateNext() {
try {
Document document = DocumentHelper.createDocument();
Element root = document.addElement("Entry");
if (!aatInfosIterator.hasNext()) {
log.info("json entries finished, closing RESULT SET");
jsonReader.close();
return null;
}
JsonElement aatInfos = aatInfosIterator.next();
JsonObject aatInfoJsonObj = aatInfos.getAsJsonObject();
Set<Entry<String, JsonElement>> keys = aatInfoJsonObj.entrySet();
for(Map.Entry<String,JsonElement> entry : keys) {
String key = entry.getKey();
JsonElement jsonElement = aatInfoJsonObj.get(key);
if (jsonElement.isJsonNull()) {
continue;
}
String value = jsonElement.getAsString();
if (StringUtils.isEmpty(value)) {
continue;
}
String name = new String(key);
Element element = root.addElement(name);
value = StringEscapeUtils.escapeXml11(value.replace('\r',' ').replace('\t',' '));
element.addText(value);
}
String xmlRecord = new String(document.asXML());
log.debug(xmlRecord);
return xmlRecord;
} catch (Exception e) {
log.error("Error calculating next json element", e);
}
return null;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
}