plugin to collect one json file

This commit is contained in:
Enrico Ottonello 2020-02-14 13:00:09 +01:00
parent ae91617d1b
commit c5945c386b
2 changed files with 171 additions and 0 deletions

View File

@ -0,0 +1,159 @@
package eu.dnetlib.data.collector.plugins.ariadneplus.ads;
import java.io.File;
import java.io.FileReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.stream.JsonReader;
import eu.dnetlib.data.collector.plugins.FileCollectorPlugin;
import eu.dnetlib.rmi.data.CollectorServiceException;
import eu.dnetlib.rmi.data.InterfaceDescriptor;
public class FileJSONCollectorPlugin extends FileCollectorPlugin {
private static final Log log = LogFactory.getLog(FileJSONCollectorPlugin.class);
@Override
public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
throws CollectorServiceException {
log.info("FileJSONCollectorPlugin");
System.out.println("FileJSONCollectorPlugin");
final String identifierFieldKey = interfaceDescriptor.getParams().get("identifierFieldKey");
URL u = null;
try {
u = new URL(interfaceDescriptor.getBaseUrl());
System.out.println(u.toString());
} catch (MalformedURLException e1) {
e1.printStackTrace();
throw new CollectorServiceException(e1);
}
final String baseUrl = u.getPath();
log.info("base URL = " + baseUrl);
try {
final FileReader fileReader = new FileReader(new File(baseUrl));
return new Iterable<String>() {
@Override
public Iterator<String> iterator() {
return new FileJSONIterator(fileReader, identifierFieldKey);
}
};
} catch (Exception e) {
e.printStackTrace();
throw new CollectorServiceException(e);
}
}
class FileJSONIterator implements Iterator<String> {
private String next;
private String identifierFieldKey;
private Iterator<JsonElement> aatInfosIterator;
private JsonReader jsonReader;
public FileJSONIterator(final FileReader fileReader, final String identifierFieldKey) {
this.identifierFieldKey = identifierFieldKey;
jsonReader = new JsonReader(fileReader);
System.out.println("parsing ...");
JsonElement jsonElement = new JsonParser().parse(jsonReader);
JsonArray jsonArrays = jsonElement.getAsJsonArray();
System.out.println("size: "+ jsonArrays.size());
aatInfosIterator = jsonArrays.iterator();
next = calculateNext();
}
@Override
public boolean hasNext() {
return next != null;
}
@Override
public String next() {
String s = next;
next = calculateNext();
return s;
}
private String calculateNext() {
try {
System.out.println("calculateNext()");
Document document = DocumentHelper.createDocument();
Element root = document.addElement("jsonRecord");
if (!aatInfosIterator.hasNext()) {
System.out.println("son entries finished");
log.info("json entries finished, closing RESULT SET");
jsonReader.close();
return null;
}
System.out.println("parsing next entry");
JsonElement aatInfos = aatInfosIterator.next();
JsonObject aatInfoJsonObj = aatInfos.getAsJsonObject();
Set<Entry<String, JsonElement>> keys = aatInfoJsonObj.entrySet();
for(Map.Entry<String,JsonElement> entry : keys) {
String key = entry.getKey();
// if (key.equals("updated")) {
// continue;
// }
JsonElement jsonElement = aatInfoJsonObj.get(key);
if (jsonElement.isJsonNull()) {
continue;
}
String value = jsonElement.getAsString();
if (StringUtils.isEmpty(value)) {
continue;
}
System.out.println("key: "+key+" value: "+value);
Element element = root.addElement("Entry");
String name = new String(key);
if (name.equals("sourceURI")) {
name = new String("Source_ID");
} else if (name.equals("targetURI")) {
name = new String("Target_URI");
}
value = StringEscapeUtils.escapeXml11(value.replace('\r',' ').replace('\t',' '));
if (name.equals(identifierFieldKey)) {
element.addAttribute("isID", "true");
element.addAttribute("name", name).addText(value);
} else {
element.addAttribute("name", name).addText(value);
}
}
String xmlRecord = new String(document.asXML());
log.debug(xmlRecord);
return xmlRecord;
} catch (Exception e) {
log.error("Error calculating next json element", e);
}
return null;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
}

View File

@ -17,4 +17,16 @@
</bean>
</property>
</bean>
<bean id="aatCollectorPlugin" class="eu.dnetlib.data.collector.plugins.ariadneplus.ads.FileJSONCollectorPlugin">
<property name="protocolDescriptor">
<bean class="eu.dnetlib.rmi.data.ProtocolDescriptor" p:name="aat">
<property name="params">
<list>
<bean class="eu.dnetlib.rmi.data.ProtocolParameter"
p:name="identifierFieldKey"/>
</list>
</property>
</bean>
</property>
</bean>
</beans>