Add support for RemoteFetcher redirection and xml content types

This commit is contained in:
George Kalampokis 2020-02-10 18:24:15 +02:00
parent 4e5a48e6c4
commit 2116838c45
4 changed files with 183 additions and 27 deletions

View File

@ -12,6 +12,8 @@ public class DataFieldsUrlConfiguration {
private String description; private String description;
private String source; private String source;
private String count; private String count;
private String path;
private String host;
public String getId() { public String getId() {
return id; return id;
@ -67,4 +69,20 @@ public class DataFieldsUrlConfiguration {
public void setCount(String count) { public void setCount(String count) {
this.count = count; this.count = count;
} }
public String getPath() {
return path;
}
@XmlElement(name = "path")
public void setPath(String path) {
this.path = path;
}
public String getHost() {
return host;
}
@XmlElement(name = "host")
public void setHost(String host) {
this.host = host;
}
} }

View File

@ -1,6 +1,8 @@
package eu.eudat.logic.proxy.config; package eu.eudat.logic.proxy.config;
import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
import java.util.List;
/** /**
* Created by ikalyvas on 6/29/2018. * Created by ikalyvas on 6/29/2018.
@ -8,6 +10,11 @@ import javax.xml.bind.annotation.XmlElement;
public class DataUrlConfiguration { public class DataUrlConfiguration {
private String path; private String path;
private DataFieldsUrlConfiguration fieldsUrlConfiguration; private DataFieldsUrlConfiguration fieldsUrlConfiguration;
private UrlConfiguration urlConfiguration;
private String parseClass;
private String parseField;
private List<String> mergedFields;
private String mergedFieldName;
public String getPath() { public String getPath() {
return path; return path;
@ -26,4 +33,50 @@ public class DataUrlConfiguration {
public void setFieldsUrlConfiguration(DataFieldsUrlConfiguration fieldsUrlConfiguration) { public void setFieldsUrlConfiguration(DataFieldsUrlConfiguration fieldsUrlConfiguration) {
this.fieldsUrlConfiguration = fieldsUrlConfiguration; this.fieldsUrlConfiguration = fieldsUrlConfiguration;
} }
public UrlConfiguration getUrlConfiguration() {
return urlConfiguration;
}
@XmlElement(name = "urlConfig")
public void setUrlConfiguration(UrlConfiguration urlConfiguration) {
this.urlConfiguration = urlConfiguration;
}
public String getParseClass() {
return parseClass;
}
@XmlElement(name = "parse-class")
public void setParseClass(String parseClass) {
this.parseClass = parseClass;
}
public String getParseField() {
return parseField;
}
@XmlElement(name = "parse-field")
public void setParseField(String parseField) {
this.parseField = parseField;
}
public List<String> getMergedFields() {
return mergedFields;
}
@XmlElementWrapper(name = "merge-fields")
@XmlElement(name = "field")
public void setMergedFields(List<String> mergedFields) {
this.mergedFields = mergedFields;
}
public String getMergedFieldName() {
return mergedFieldName;
}
@XmlElement(name = "merge-field-name")
public void setMergedFieldName(String mergedFieldName) {
this.mergedFieldName = mergedFieldName;
}
} }

View File

@ -5,6 +5,8 @@ public class ExternalUrlCriteria {
private String page; private String page;
private String pageSize; private String pageSize;
private String funderId; private String funderId;
private String path;
private String host;
public String getLike() { public String getLike() {
return like; return like;
@ -34,6 +36,22 @@ public class ExternalUrlCriteria {
this.funderId = funderId; this.funderId = funderId;
} }
public String getPath() {
return path;
}
public void setPath(String path) {
this.path = path;
}
public String getHost() {
return host;
}
public void setHost(String host) {
this.host = host;
}
public ExternalUrlCriteria(String like) { public ExternalUrlCriteria(String like) {
this.like = like; this.like = like;
} }

View File

@ -4,7 +4,10 @@ import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.jayway.jsonpath.DocumentContext; import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath; import com.jayway.jsonpath.JsonPath;
import eu.eudat.logic.proxy.config.*; import eu.eudat.logic.proxy.config.DataUrlConfiguration;
import eu.eudat.logic.proxy.config.ExternalUrlCriteria;
import eu.eudat.logic.proxy.config.FetchStrategy;
import eu.eudat.logic.proxy.config.UrlConfiguration;
import eu.eudat.logic.proxy.config.configloaders.ConfigLoader; import eu.eudat.logic.proxy.config.configloaders.ConfigLoader;
import eu.eudat.logic.proxy.config.exceptions.HugeResultSet; import eu.eudat.logic.proxy.config.exceptions.HugeResultSet;
import eu.eudat.logic.proxy.config.exceptions.NoURLFound; import eu.eudat.logic.proxy.config.exceptions.NoURLFound;
@ -14,9 +17,11 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.Cacheable; import org.springframework.cache.annotation.Cacheable;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.io.File; import javax.xml.bind.JAXBContext;
import java.io.IOException; import javax.xml.bind.Unmarshaller;
import java.io.UnsupportedEncodingException; import java.beans.PropertyDescriptor;
import java.io.*;
import java.lang.reflect.Method;
import java.net.HttpURLConnection; import java.net.HttpURLConnection;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
@ -184,6 +189,16 @@ public class RemoteFetcher {
} else { } else {
completedPath = completedPath.replace("{pageSize}", "60"); completedPath = completedPath.replace("{pageSize}", "60");
} }
if (externalUrlCriteria.getHost() != null) {
completedPath = completedPath.replace("{host}", externalUrlCriteria.getHost());
} else {
completedPath = completedPath.replace("{host}", "");
}
if (externalUrlCriteria.getPath() != null) {
completedPath = completedPath.replace("{path}", externalUrlCriteria.getPath());
} else {
completedPath = completedPath.replace("{path}", "");
}
return completedPath; return completedPath;
} }
@ -225,32 +240,82 @@ public class RemoteFetcher {
HttpURLConnection con = (HttpURLConnection) url.openConnection(); HttpURLConnection con = (HttpURLConnection) url.openConnection();
con.setRequestMethod("GET"); con.setRequestMethod("GET");
con.setRequestProperty("Accept", contentType); if (contentType != null && !contentType.isEmpty()) {
con.setRequestProperty("Accept", contentType);
}
int responseCode = con.getResponseCode(); int responseCode = con.getResponseCode();
if (responseCode == HttpURLConnection.HTTP_OK) { // success if (responseCode == HttpURLConnection.HTTP_OK) { // success
//do here all the parsing //do here all the parsing
DocumentContext jsonContext = JsonPath.parse(con.getInputStream()); Results results = new Results();
Results results; if (con.getHeaderField("Content-Type").contains("json")) {
if (jsonDataPath.getFieldsUrlConfiguration().getSource() != null) { DocumentContext jsonContext = JsonPath.parse(con.getInputStream());
results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription() if (jsonDataPath.getFieldsUrlConfiguration().getSource() != null) {
+ "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId() results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getSource() + "]"), + "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription()
new HashMap<>(1, 1)); + "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId()
} else if (jsonDataPath.getFieldsUrlConfiguration().getCount() != null) { // parsing services.openaire.eu + "," + jsonDataPath.getFieldsUrlConfiguration().getSource() + "]"),
results = new Results(jsonContext.read(jsonDataPath.getPath() new HashMap<>(1, 1));
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName() } else if (jsonDataPath.getFieldsUrlConfiguration().getCount() != null) { // parsing services.openaire.eu
+ "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"), results = new Results(jsonContext.read(jsonDataPath.getPath()
new HashMap<>(1, 1)); + "[" + jsonDataPath.getFieldsUrlConfiguration().getName()
} else { + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"),
results = new Results(jsonContext.read(jsonDataPath.getPath() new HashMap<>(1, 1));
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription() } else if (jsonDataPath.getFieldsUrlConfiguration().getPath() != null) {
+ "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"), results = new Results(jsonContext.read(jsonDataPath.getPath()
new HashMap<>(1, 1)); + "[" + jsonDataPath.getFieldsUrlConfiguration().getPath()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getHost() + "]"),
new HashMap<>(1, 1));
List<Map<String, String>> multiResults = results.results.stream().map(result -> {
ExternalUrlCriteria externalUrlCriteria = new ExternalUrlCriteria();
externalUrlCriteria.setPath(result.get("path"));
externalUrlCriteria.setHost(result.get("host"));
String replacedPath = replaceCriteriaOnUrl(jsonDataPath.getUrlConfiguration().getUrl(), externalUrlCriteria, jsonDataPath.getUrlConfiguration().getFirstpage());
return getResultsFromUrl(replacedPath, jsonDataPath.getUrlConfiguration().getData(), jsonDataPath.getUrlConfiguration().getData().getPath(), jsonDataPath.getUrlConfiguration().getContentType());
}).filter(Objects::nonNull).map(results1 -> results1.results.get(0)).collect(Collectors.toList());
results = new Results(multiResults, new HashMap<>(1, 1));
} else {
results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"),
new HashMap<>(1, 1));
}
results.results = results.results.stream().map(e -> e.entrySet().stream().collect(Collectors.toMap(x -> this.transformKey(jsonDataPath,x.getKey()), Map.Entry::getValue)))
.collect(Collectors.toList());
} else if (con.getHeaderField("Content-Type").contains("xml")) {
Class<?> aClass = Class.forName(jsonDataPath.getParseClass());
JAXBContext jaxbContext = JAXBContext.newInstance(aClass);
Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
Object data = unmarshaller.unmarshal(con.getInputStream());
Method reader = null;
if (jsonDataPath.getParseField() != null && !jsonDataPath.getParseField().isEmpty()) {
reader = new PropertyDescriptor(jsonDataPath.getParseField(), aClass).getReadMethod();
}
ObjectMapper objectMapper = new ObjectMapper();
List<Map<String, String>> values = new ArrayList<>();
Map<String, String> map = objectMapper.convertValue( reader != null ? reader.invoke(data) : data, Map.class);
if (jsonDataPath.getMergedFields() != null && !jsonDataPath.getMergedFields().isEmpty() && jsonDataPath.getMergedFieldName() != null && !jsonDataPath.getMergedFieldName().isEmpty()) {
Map<String, String> finalMap = new HashMap<>();
for (Map.Entry<String, String> entry : map.entrySet()) {
if (jsonDataPath.getMergedFields().contains(entry.getKey())) {
if (!finalMap.containsKey(jsonDataPath.getMergedFieldName())) {
finalMap.put(jsonDataPath.getMergedFieldName(), entry.getValue());
} else {
finalMap.put(jsonDataPath.getMergedFieldName(), finalMap.get(jsonDataPath.getMergedFieldName())+ " " + entry.getValue());
}
} else {
finalMap.put(entry.getKey(), entry.getValue());
}
}
values.add(finalMap);
} else {
values.add(map);
}
results = new Results(values, new HashMap<>(1, 1));
} }
results.results = results.results.stream().map(e -> e.entrySet().stream().collect(Collectors.toMap(x -> this.transformKey(jsonDataPath,x.getKey()), Map.Entry::getValue)))
.collect(Collectors.toList());
return results; return results;
} }
} catch (MalformedURLException e1) { } catch (MalformedURLException e1) {
@ -302,6 +367,8 @@ public class RemoteFetcher {
if (dataUrlConfiguration.getFieldsUrlConfiguration().getName() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getName().replace("'",""))) return "name"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getName() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getName().replace("'",""))) return "name";
if (dataUrlConfiguration.getFieldsUrlConfiguration().getSource() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getSource().replace("'",""))) return "source"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getSource() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getSource().replace("'",""))) return "source";
if (dataUrlConfiguration.getFieldsUrlConfiguration().getCount() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getCount().replace("'",""))) return "count"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getCount() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getCount().replace("'",""))) return "count";
if (dataUrlConfiguration.getFieldsUrlConfiguration().getPath() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getPath().replace("'",""))) return "path";
if (dataUrlConfiguration.getFieldsUrlConfiguration().getHost() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getHost().replace("'",""))) return "host";
return null; return null;
} }
@ -311,8 +378,8 @@ public class RemoteFetcher {
Map<String, Integer> pagination; Map<String, Integer> pagination;
Results() { Results() {
this.results = new ArrayList<Map<String, String>>(); this.results = new ArrayList<>();
this.pagination = new HashMap<String, Integer>(); this.pagination = new HashMap<>();
} }
Results(List<Map<String, String>> results, Map<String, Integer> pagination) { Results(List<Map<String, String>> results, Map<String, Integer> pagination) {