diff --git a/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/DataFieldsUrlConfiguration.java b/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/DataFieldsUrlConfiguration.java index 0323e8011..0cd8c559b 100644 --- a/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/DataFieldsUrlConfiguration.java +++ b/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/DataFieldsUrlConfiguration.java @@ -12,6 +12,8 @@ public class DataFieldsUrlConfiguration { private String description; private String source; private String count; + private String path; + private String host; public String getId() { return id; @@ -67,4 +69,20 @@ public class DataFieldsUrlConfiguration { public void setCount(String count) { this.count = count; } + + public String getPath() { + return path; + } + @XmlElement(name = "path") + public void setPath(String path) { + this.path = path; + } + + public String getHost() { + return host; + } + @XmlElement(name = "host") + public void setHost(String host) { + this.host = host; + } } diff --git a/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/DataUrlConfiguration.java b/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/DataUrlConfiguration.java index d098f62cf..e787eafaa 100644 --- a/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/DataUrlConfiguration.java +++ b/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/DataUrlConfiguration.java @@ -1,6 +1,8 @@ package eu.eudat.logic.proxy.config; import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlElementWrapper; +import java.util.List; /** * Created by ikalyvas on 6/29/2018. @@ -8,6 +10,11 @@ import javax.xml.bind.annotation.XmlElement; public class DataUrlConfiguration { private String path; private DataFieldsUrlConfiguration fieldsUrlConfiguration; + private UrlConfiguration urlConfiguration; + private String parseClass; + private String parseField; + private List mergedFields; + private String mergedFieldName; public String getPath() { return path; @@ -26,4 +33,50 @@ public class DataUrlConfiguration { public void setFieldsUrlConfiguration(DataFieldsUrlConfiguration fieldsUrlConfiguration) { this.fieldsUrlConfiguration = fieldsUrlConfiguration; } + + public UrlConfiguration getUrlConfiguration() { + return urlConfiguration; + } + + @XmlElement(name = "urlConfig") + public void setUrlConfiguration(UrlConfiguration urlConfiguration) { + this.urlConfiguration = urlConfiguration; + } + + public String getParseClass() { + return parseClass; + } + + @XmlElement(name = "parse-class") + public void setParseClass(String parseClass) { + this.parseClass = parseClass; + } + + public String getParseField() { + return parseField; + } + + @XmlElement(name = "parse-field") + public void setParseField(String parseField) { + this.parseField = parseField; + } + + public List getMergedFields() { + return mergedFields; + } + + @XmlElementWrapper(name = "merge-fields") + @XmlElement(name = "field") + public void setMergedFields(List mergedFields) { + this.mergedFields = mergedFields; + } + + public String getMergedFieldName() { + return mergedFieldName; + } + + @XmlElement(name = "merge-field-name") + public void setMergedFieldName(String mergedFieldName) { + this.mergedFieldName = mergedFieldName; + } } diff --git a/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/ExternalUrlCriteria.java b/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/ExternalUrlCriteria.java index fd0ea091c..5d837eea2 100644 --- a/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/ExternalUrlCriteria.java +++ b/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/config/ExternalUrlCriteria.java @@ -5,6 +5,8 @@ public class ExternalUrlCriteria { private String page; private String pageSize; private String funderId; + private String path; + private String host; public String getLike() { return like; @@ -34,6 +36,22 @@ public class ExternalUrlCriteria { this.funderId = funderId; } + public String getPath() { + return path; + } + + public void setPath(String path) { + this.path = path; + } + + public String getHost() { + return host; + } + + public void setHost(String host) { + this.host = host; + } + public ExternalUrlCriteria(String like) { this.like = like; } diff --git a/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/fetching/RemoteFetcher.java b/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/fetching/RemoteFetcher.java index e7a53c40f..fcc4dbcd9 100644 --- a/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/fetching/RemoteFetcher.java +++ b/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/fetching/RemoteFetcher.java @@ -4,7 +4,10 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.jayway.jsonpath.DocumentContext; import com.jayway.jsonpath.JsonPath; -import eu.eudat.logic.proxy.config.*; +import eu.eudat.logic.proxy.config.DataUrlConfiguration; +import eu.eudat.logic.proxy.config.ExternalUrlCriteria; +import eu.eudat.logic.proxy.config.FetchStrategy; +import eu.eudat.logic.proxy.config.UrlConfiguration; import eu.eudat.logic.proxy.config.configloaders.ConfigLoader; import eu.eudat.logic.proxy.config.exceptions.HugeResultSet; import eu.eudat.logic.proxy.config.exceptions.NoURLFound; @@ -14,9 +17,11 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.cache.annotation.Cacheable; import org.springframework.stereotype.Service; -import java.io.File; -import java.io.IOException; -import java.io.UnsupportedEncodingException; +import javax.xml.bind.JAXBContext; +import javax.xml.bind.Unmarshaller; +import java.beans.PropertyDescriptor; +import java.io.*; +import java.lang.reflect.Method; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; @@ -184,6 +189,16 @@ public class RemoteFetcher { } else { completedPath = completedPath.replace("{pageSize}", "60"); } + if (externalUrlCriteria.getHost() != null) { + completedPath = completedPath.replace("{host}", externalUrlCriteria.getHost()); + } else { + completedPath = completedPath.replace("{host}", ""); + } + if (externalUrlCriteria.getPath() != null) { + completedPath = completedPath.replace("{path}", externalUrlCriteria.getPath()); + } else { + completedPath = completedPath.replace("{path}", ""); + } return completedPath; } @@ -225,32 +240,82 @@ public class RemoteFetcher { HttpURLConnection con = (HttpURLConnection) url.openConnection(); con.setRequestMethod("GET"); - con.setRequestProperty("Accept", contentType); + if (contentType != null && !contentType.isEmpty()) { + con.setRequestProperty("Accept", contentType); + } int responseCode = con.getResponseCode(); if (responseCode == HttpURLConnection.HTTP_OK) { // success //do here all the parsing - DocumentContext jsonContext = JsonPath.parse(con.getInputStream()); - Results results; - if (jsonDataPath.getFieldsUrlConfiguration().getSource() != null) { - results = new Results(jsonContext.read(jsonDataPath.getPath() - + "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription() - + "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId() - + "," + jsonDataPath.getFieldsUrlConfiguration().getSource() + "]"), - new HashMap<>(1, 1)); - } else if (jsonDataPath.getFieldsUrlConfiguration().getCount() != null) { // parsing services.openaire.eu - results = new Results(jsonContext.read(jsonDataPath.getPath() - + "[" + jsonDataPath.getFieldsUrlConfiguration().getName() - + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"), - new HashMap<>(1, 1)); - } else { - results = new Results(jsonContext.read(jsonDataPath.getPath() - + "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription() - + "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"), - new HashMap<>(1, 1)); + Results results = new Results(); + if (con.getHeaderField("Content-Type").contains("json")) { + DocumentContext jsonContext = JsonPath.parse(con.getInputStream()); + + if (jsonDataPath.getFieldsUrlConfiguration().getSource() != null) { + results = new Results(jsonContext.read(jsonDataPath.getPath() + + "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription() + + "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + + "," + jsonDataPath.getFieldsUrlConfiguration().getSource() + "]"), + new HashMap<>(1, 1)); + } else if (jsonDataPath.getFieldsUrlConfiguration().getCount() != null) { // parsing services.openaire.eu + results = new Results(jsonContext.read(jsonDataPath.getPath() + + "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"), + new HashMap<>(1, 1)); + } else if (jsonDataPath.getFieldsUrlConfiguration().getPath() != null) { + results = new Results(jsonContext.read(jsonDataPath.getPath() + + "[" + jsonDataPath.getFieldsUrlConfiguration().getPath() + + "," + jsonDataPath.getFieldsUrlConfiguration().getHost() + "]"), + new HashMap<>(1, 1)); + + List> multiResults = results.results.stream().map(result -> { + ExternalUrlCriteria externalUrlCriteria = new ExternalUrlCriteria(); + externalUrlCriteria.setPath(result.get("path")); + externalUrlCriteria.setHost(result.get("host")); + String replacedPath = replaceCriteriaOnUrl(jsonDataPath.getUrlConfiguration().getUrl(), externalUrlCriteria, jsonDataPath.getUrlConfiguration().getFirstpage()); + return getResultsFromUrl(replacedPath, jsonDataPath.getUrlConfiguration().getData(), jsonDataPath.getUrlConfiguration().getData().getPath(), jsonDataPath.getUrlConfiguration().getContentType()); + }).filter(Objects::nonNull).map(results1 -> results1.results.get(0)).collect(Collectors.toList()); + results = new Results(multiResults, new HashMap<>(1, 1)); + } else { + results = new Results(jsonContext.read(jsonDataPath.getPath() + + "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription() + + "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"), + new HashMap<>(1, 1)); + } + results.results = results.results.stream().map(e -> e.entrySet().stream().collect(Collectors.toMap(x -> this.transformKey(jsonDataPath,x.getKey()), Map.Entry::getValue))) + .collect(Collectors.toList()); + } else if (con.getHeaderField("Content-Type").contains("xml")) { + Class aClass = Class.forName(jsonDataPath.getParseClass()); + JAXBContext jaxbContext = JAXBContext.newInstance(aClass); + Unmarshaller unmarshaller = jaxbContext.createUnmarshaller(); + Object data = unmarshaller.unmarshal(con.getInputStream()); + Method reader = null; + if (jsonDataPath.getParseField() != null && !jsonDataPath.getParseField().isEmpty()) { + reader = new PropertyDescriptor(jsonDataPath.getParseField(), aClass).getReadMethod(); + } + ObjectMapper objectMapper = new ObjectMapper(); + List> values = new ArrayList<>(); + Map map = objectMapper.convertValue( reader != null ? reader.invoke(data) : data, Map.class); + if (jsonDataPath.getMergedFields() != null && !jsonDataPath.getMergedFields().isEmpty() && jsonDataPath.getMergedFieldName() != null && !jsonDataPath.getMergedFieldName().isEmpty()) { + Map finalMap = new HashMap<>(); + for (Map.Entry entry : map.entrySet()) { + if (jsonDataPath.getMergedFields().contains(entry.getKey())) { + if (!finalMap.containsKey(jsonDataPath.getMergedFieldName())) { + finalMap.put(jsonDataPath.getMergedFieldName(), entry.getValue()); + } else { + finalMap.put(jsonDataPath.getMergedFieldName(), finalMap.get(jsonDataPath.getMergedFieldName())+ " " + entry.getValue()); + } + } else { + finalMap.put(entry.getKey(), entry.getValue()); + } + } + values.add(finalMap); + } else { + values.add(map); + } + results = new Results(values, new HashMap<>(1, 1)); } - results.results = results.results.stream().map(e -> e.entrySet().stream().collect(Collectors.toMap(x -> this.transformKey(jsonDataPath,x.getKey()), Map.Entry::getValue))) - .collect(Collectors.toList()); + return results; } } catch (MalformedURLException e1) { @@ -302,6 +367,8 @@ public class RemoteFetcher { if (dataUrlConfiguration.getFieldsUrlConfiguration().getName() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getName().replace("'",""))) return "name"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getSource() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getSource().replace("'",""))) return "source"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getCount() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getCount().replace("'",""))) return "count"; + if (dataUrlConfiguration.getFieldsUrlConfiguration().getPath() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getPath().replace("'",""))) return "path"; + if (dataUrlConfiguration.getFieldsUrlConfiguration().getHost() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getHost().replace("'",""))) return "host"; return null; } @@ -311,8 +378,8 @@ public class RemoteFetcher { Map pagination; Results() { - this.results = new ArrayList>(); - this.pagination = new HashMap(); + this.results = new ArrayList<>(); + this.pagination = new HashMap<>(); } Results(List> results, Map pagination) {