diff --git a/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/ads/FileJSONCollectorPlugin.java b/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/ads/FileJSONCollectorPlugin.java new file mode 100644 index 0000000..a7b93e1 --- /dev/null +++ b/dnet-ariadneplus/src/main/java/eu/dnetlib/data/collector/plugins/ariadneplus/ads/FileJSONCollectorPlugin.java @@ -0,0 +1,159 @@ +package eu.dnetlib.data.collector.plugins.ariadneplus.ads; + +import java.io.File; +import java.io.FileReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.commons.lang3.StringEscapeUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dom4j.Document; +import org.dom4j.DocumentHelper; +import org.dom4j.Element; + +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import com.google.gson.stream.JsonReader; + +import eu.dnetlib.data.collector.plugins.FileCollectorPlugin; +import eu.dnetlib.rmi.data.CollectorServiceException; +import eu.dnetlib.rmi.data.InterfaceDescriptor; + +public class FileJSONCollectorPlugin extends FileCollectorPlugin { + + private static final Log log = LogFactory.getLog(FileJSONCollectorPlugin.class); + + @Override + public Iterable collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate) + throws CollectorServiceException { + log.info("FileJSONCollectorPlugin"); + System.out.println("FileJSONCollectorPlugin"); + final String identifierFieldKey = interfaceDescriptor.getParams().get("identifierFieldKey"); + + URL u = null; + try { + u = new URL(interfaceDescriptor.getBaseUrl()); + System.out.println(u.toString()); + } catch (MalformedURLException e1) { + e1.printStackTrace(); + throw new CollectorServiceException(e1); + } + final String baseUrl = u.getPath(); + + log.info("base URL = " + baseUrl); + + try { + final FileReader fileReader = new FileReader(new File(baseUrl)); + return new Iterable() { + + @Override + public Iterator iterator() { + return new FileJSONIterator(fileReader, identifierFieldKey); + } + }; + } catch (Exception e) { + e.printStackTrace(); + throw new CollectorServiceException(e); + } + } + + class FileJSONIterator implements Iterator { + + private String next; + private String identifierFieldKey; + private Iterator aatInfosIterator; + private JsonReader jsonReader; + + public FileJSONIterator(final FileReader fileReader, final String identifierFieldKey) { + this.identifierFieldKey = identifierFieldKey; + jsonReader = new JsonReader(fileReader); + System.out.println("parsing ..."); + JsonElement jsonElement = new JsonParser().parse(jsonReader); + JsonArray jsonArrays = jsonElement.getAsJsonArray(); + System.out.println("size: "+ jsonArrays.size()); + aatInfosIterator = jsonArrays.iterator(); + next = calculateNext(); + } + + @Override + public boolean hasNext() { + return next != null; + } + + @Override + public String next() { + String s = next; + next = calculateNext(); + return s; + } + + private String calculateNext() { + try { + System.out.println("calculateNext()"); + Document document = DocumentHelper.createDocument(); + Element root = document.addElement("jsonRecord"); + + if (!aatInfosIterator.hasNext()) { + System.out.println("son entries finished"); + log.info("json entries finished, closing RESULT SET"); + jsonReader.close(); + return null; + } + System.out.println("parsing next entry"); + JsonElement aatInfos = aatInfosIterator.next(); + JsonObject aatInfoJsonObj = aatInfos.getAsJsonObject(); + Set> keys = aatInfoJsonObj.entrySet(); + for(Map.Entry entry : keys) { + String key = entry.getKey(); +// if (key.equals("updated")) { +// continue; +// } + JsonElement jsonElement = aatInfoJsonObj.get(key); + if (jsonElement.isJsonNull()) { + continue; + } + String value = jsonElement.getAsString(); + if (StringUtils.isEmpty(value)) { + continue; + } + System.out.println("key: "+key+" value: "+value); + Element element = root.addElement("Entry"); + String name = new String(key); + if (name.equals("sourceURI")) { + name = new String("Source_ID"); + } else if (name.equals("targetURI")) { + name = new String("Target_URI"); + } + value = StringEscapeUtils.escapeXml11(value.replace('\r',' ').replace('\t',' ')); + if (name.equals(identifierFieldKey)) { + element.addAttribute("isID", "true"); + element.addAttribute("name", name).addText(value); + } else { + element.addAttribute("name", name).addText(value); + } + } + String xmlRecord = new String(document.asXML()); + log.debug(xmlRecord); + return xmlRecord; + } catch (Exception e) { + log.error("Error calculating next json element", e); + } + return null; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + } + +} diff --git a/dnet-ariadneplus/src/main/resources/eu/dnetlib/data/collector/plugins/ariadneplus/applicationContext-ariadneplus-collector-plugins.xml b/dnet-ariadneplus/src/main/resources/eu/dnetlib/data/collector/plugins/ariadneplus/applicationContext-ariadneplus-collector-plugins.xml index f86d2f4..bc3fe25 100644 --- a/dnet-ariadneplus/src/main/resources/eu/dnetlib/data/collector/plugins/ariadneplus/applicationContext-ariadneplus-collector-plugins.xml +++ b/dnet-ariadneplus/src/main/resources/eu/dnetlib/data/collector/plugins/ariadneplus/applicationContext-ariadneplus-collector-plugins.xml @@ -17,4 +17,16 @@ + + + + + + + + + + +