Add support for RemoteFetcher redirection and xml content types

This commit is contained in:
George Kalampokis 2020-02-10 18:24:15 +02:00
parent 4e5a48e6c4
commit 2116838c45
4 changed files with 183 additions and 27 deletions

View File

@ -12,6 +12,8 @@ public class DataFieldsUrlConfiguration {
private String description;
private String source;
private String count;
private String path;
private String host;
public String getId() {
return id;
@ -67,4 +69,20 @@ public class DataFieldsUrlConfiguration {
public void setCount(String count) {
this.count = count;
}
public String getPath() {
return path;
}
@XmlElement(name = "path")
public void setPath(String path) {
this.path = path;
}
public String getHost() {
return host;
}
@XmlElement(name = "host")
public void setHost(String host) {
this.host = host;
}
}

View File

@ -1,6 +1,8 @@
package eu.eudat.logic.proxy.config;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
import java.util.List;
/**
* Created by ikalyvas on 6/29/2018.
@ -8,6 +10,11 @@ import javax.xml.bind.annotation.XmlElement;
public class DataUrlConfiguration {
private String path;
private DataFieldsUrlConfiguration fieldsUrlConfiguration;
private UrlConfiguration urlConfiguration;
private String parseClass;
private String parseField;
private List<String> mergedFields;
private String mergedFieldName;
public String getPath() {
return path;
@ -26,4 +33,50 @@ public class DataUrlConfiguration {
public void setFieldsUrlConfiguration(DataFieldsUrlConfiguration fieldsUrlConfiguration) {
this.fieldsUrlConfiguration = fieldsUrlConfiguration;
}
public UrlConfiguration getUrlConfiguration() {
return urlConfiguration;
}
@XmlElement(name = "urlConfig")
public void setUrlConfiguration(UrlConfiguration urlConfiguration) {
this.urlConfiguration = urlConfiguration;
}
public String getParseClass() {
return parseClass;
}
@XmlElement(name = "parse-class")
public void setParseClass(String parseClass) {
this.parseClass = parseClass;
}
public String getParseField() {
return parseField;
}
@XmlElement(name = "parse-field")
public void setParseField(String parseField) {
this.parseField = parseField;
}
public List<String> getMergedFields() {
return mergedFields;
}
@XmlElementWrapper(name = "merge-fields")
@XmlElement(name = "field")
public void setMergedFields(List<String> mergedFields) {
this.mergedFields = mergedFields;
}
public String getMergedFieldName() {
return mergedFieldName;
}
@XmlElement(name = "merge-field-name")
public void setMergedFieldName(String mergedFieldName) {
this.mergedFieldName = mergedFieldName;
}
}

View File

@ -5,6 +5,8 @@ public class ExternalUrlCriteria {
private String page;
private String pageSize;
private String funderId;
private String path;
private String host;
public String getLike() {
return like;
@ -34,6 +36,22 @@ public class ExternalUrlCriteria {
this.funderId = funderId;
}
public String getPath() {
return path;
}
public void setPath(String path) {
this.path = path;
}
public String getHost() {
return host;
}
public void setHost(String host) {
this.host = host;
}
public ExternalUrlCriteria(String like) {
this.like = like;
}

View File

@ -4,7 +4,10 @@ import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import eu.eudat.logic.proxy.config.*;
import eu.eudat.logic.proxy.config.DataUrlConfiguration;
import eu.eudat.logic.proxy.config.ExternalUrlCriteria;
import eu.eudat.logic.proxy.config.FetchStrategy;
import eu.eudat.logic.proxy.config.UrlConfiguration;
import eu.eudat.logic.proxy.config.configloaders.ConfigLoader;
import eu.eudat.logic.proxy.config.exceptions.HugeResultSet;
import eu.eudat.logic.proxy.config.exceptions.NoURLFound;
@ -14,9 +17,11 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.stereotype.Service;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import java.beans.PropertyDescriptor;
import java.io.*;
import java.lang.reflect.Method;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
@ -184,6 +189,16 @@ public class RemoteFetcher {
} else {
completedPath = completedPath.replace("{pageSize}", "60");
}
if (externalUrlCriteria.getHost() != null) {
completedPath = completedPath.replace("{host}", externalUrlCriteria.getHost());
} else {
completedPath = completedPath.replace("{host}", "");
}
if (externalUrlCriteria.getPath() != null) {
completedPath = completedPath.replace("{path}", externalUrlCriteria.getPath());
} else {
completedPath = completedPath.replace("{path}", "");
}
return completedPath;
}
@ -225,32 +240,82 @@ public class RemoteFetcher {
HttpURLConnection con = (HttpURLConnection) url.openConnection();
con.setRequestMethod("GET");
con.setRequestProperty("Accept", contentType);
if (contentType != null && !contentType.isEmpty()) {
con.setRequestProperty("Accept", contentType);
}
int responseCode = con.getResponseCode();
if (responseCode == HttpURLConnection.HTTP_OK) { // success
//do here all the parsing
DocumentContext jsonContext = JsonPath.parse(con.getInputStream());
Results results;
if (jsonDataPath.getFieldsUrlConfiguration().getSource() != null) {
results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getSource() + "]"),
new HashMap<>(1, 1));
} else if (jsonDataPath.getFieldsUrlConfiguration().getCount() != null) { // parsing services.openaire.eu
results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"),
new HashMap<>(1, 1));
} else {
results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"),
new HashMap<>(1, 1));
Results results = new Results();
if (con.getHeaderField("Content-Type").contains("json")) {
DocumentContext jsonContext = JsonPath.parse(con.getInputStream());
if (jsonDataPath.getFieldsUrlConfiguration().getSource() != null) {
results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getSource() + "]"),
new HashMap<>(1, 1));
} else if (jsonDataPath.getFieldsUrlConfiguration().getCount() != null) { // parsing services.openaire.eu
results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"),
new HashMap<>(1, 1));
} else if (jsonDataPath.getFieldsUrlConfiguration().getPath() != null) {
results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getPath()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getHost() + "]"),
new HashMap<>(1, 1));
List<Map<String, String>> multiResults = results.results.stream().map(result -> {
ExternalUrlCriteria externalUrlCriteria = new ExternalUrlCriteria();
externalUrlCriteria.setPath(result.get("path"));
externalUrlCriteria.setHost(result.get("host"));
String replacedPath = replaceCriteriaOnUrl(jsonDataPath.getUrlConfiguration().getUrl(), externalUrlCriteria, jsonDataPath.getUrlConfiguration().getFirstpage());
return getResultsFromUrl(replacedPath, jsonDataPath.getUrlConfiguration().getData(), jsonDataPath.getUrlConfiguration().getData().getPath(), jsonDataPath.getUrlConfiguration().getContentType());
}).filter(Objects::nonNull).map(results1 -> results1.results.get(0)).collect(Collectors.toList());
results = new Results(multiResults, new HashMap<>(1, 1));
} else {
results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"),
new HashMap<>(1, 1));
}
results.results = results.results.stream().map(e -> e.entrySet().stream().collect(Collectors.toMap(x -> this.transformKey(jsonDataPath,x.getKey()), Map.Entry::getValue)))
.collect(Collectors.toList());
} else if (con.getHeaderField("Content-Type").contains("xml")) {
Class<?> aClass = Class.forName(jsonDataPath.getParseClass());
JAXBContext jaxbContext = JAXBContext.newInstance(aClass);
Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
Object data = unmarshaller.unmarshal(con.getInputStream());
Method reader = null;
if (jsonDataPath.getParseField() != null && !jsonDataPath.getParseField().isEmpty()) {
reader = new PropertyDescriptor(jsonDataPath.getParseField(), aClass).getReadMethod();
}
ObjectMapper objectMapper = new ObjectMapper();
List<Map<String, String>> values = new ArrayList<>();
Map<String, String> map = objectMapper.convertValue( reader != null ? reader.invoke(data) : data, Map.class);
if (jsonDataPath.getMergedFields() != null && !jsonDataPath.getMergedFields().isEmpty() && jsonDataPath.getMergedFieldName() != null && !jsonDataPath.getMergedFieldName().isEmpty()) {
Map<String, String> finalMap = new HashMap<>();
for (Map.Entry<String, String> entry : map.entrySet()) {
if (jsonDataPath.getMergedFields().contains(entry.getKey())) {
if (!finalMap.containsKey(jsonDataPath.getMergedFieldName())) {
finalMap.put(jsonDataPath.getMergedFieldName(), entry.getValue());
} else {
finalMap.put(jsonDataPath.getMergedFieldName(), finalMap.get(jsonDataPath.getMergedFieldName())+ " " + entry.getValue());
}
} else {
finalMap.put(entry.getKey(), entry.getValue());
}
}
values.add(finalMap);
} else {
values.add(map);
}
results = new Results(values, new HashMap<>(1, 1));
}
results.results = results.results.stream().map(e -> e.entrySet().stream().collect(Collectors.toMap(x -> this.transformKey(jsonDataPath,x.getKey()), Map.Entry::getValue)))
.collect(Collectors.toList());
return results;
}
} catch (MalformedURLException e1) {
@ -302,6 +367,8 @@ public class RemoteFetcher {
if (dataUrlConfiguration.getFieldsUrlConfiguration().getName() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getName().replace("'",""))) return "name";
if (dataUrlConfiguration.getFieldsUrlConfiguration().getSource() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getSource().replace("'",""))) return "source";
if (dataUrlConfiguration.getFieldsUrlConfiguration().getCount() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getCount().replace("'",""))) return "count";
if (dataUrlConfiguration.getFieldsUrlConfiguration().getPath() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getPath().replace("'",""))) return "path";
if (dataUrlConfiguration.getFieldsUrlConfiguration().getHost() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getHost().replace("'",""))) return "host";
return null;
}
@ -311,8 +378,8 @@ public class RemoteFetcher {
Map<String, Integer> pagination;
Results() {
this.results = new ArrayList<Map<String, String>>();
this.pagination = new HashMap<String, Integer>();
this.results = new ArrayList<>();
this.pagination = new HashMap<>();
}
Results(List<Map<String, String>> results, Map<String, Integer> pagination) {