Refactor remote fetcher and made it independent of sun libraries

This commit is contained in:
George Kalampokis 2021-10-13 17:48:46 +03:00
parent 65644c3a35
commit d0d918682b
4 changed files with 202 additions and 153 deletions
dmp-backend/web/src/main/java/eu/eudat/logic/proxy/fetching

View File

@ -8,7 +8,6 @@ import org.springframework.stereotype.Component;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import java.beans.PropertyDescriptor;
import java.io.InputStream;
import java.lang.reflect.Method;
import java.util.*;
@ -43,7 +42,8 @@ public class LocalFetcher {
is.close();
Method reader = null;
if (configSingle.getParseField() != null && !configSingle.getParseField().isEmpty()) {
reader = new PropertyDescriptor(configSingle.getParseField(), aClass).getReadMethod();
String camelCaseGetter =configSingle.getParseField() != null && !configSingle.getParseField().isEmpty() ? "get" + configSingle.getParseField().substring(0, 1).toUpperCase() + configSingle.getParseField().substring(1) : "";
reader = aClass.getMethod(camelCaseGetter);
}
ObjectMapper objectMapper = new ObjectMapper();
List<Map<String, String>> values = new ArrayList<>();

View File

@ -12,29 +12,20 @@ import eu.eudat.logic.proxy.config.UrlConfiguration;
import eu.eudat.logic.proxy.config.configloaders.ConfigLoader;
import eu.eudat.logic.proxy.config.exceptions.HugeResultSet;
import eu.eudat.logic.proxy.config.exceptions.NoURLFound;
import net.minidev.json.JSONArray;
import eu.eudat.logic.proxy.fetching.entities.Results;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpMethod;
import org.springframework.http.HttpStatus;
import org.springframework.http.*;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import java.beans.PropertyDescriptor;
import java.io.*;
import java.io.File;
import java.io.StringReader;
import java.lang.reflect.Method;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;
@ -153,22 +144,36 @@ public class RemoteFetcher {
}
private List<Map<String, String>> getAll(List<UrlConfiguration> urlConfigs, FetchStrategy fetchStrategy, ExternalUrlCriteria externalUrlCriteria) throws NoURLFound, HugeResultSet {
private List<Map<String, String>> getAll(List<UrlConfiguration> urlConfigs, FetchStrategy fetchStrategy, ExternalUrlCriteria externalUrlCriteria) {
List<Map<String, String>> results = new LinkedList<>();
if (urlConfigs == null || urlConfigs.isEmpty()) return results;
if (urlConfigs == null || urlConfigs.isEmpty()) {
return results;
}
// throw new NoURLFound("No Repository urls found in configuration");
urlConfigs.sort(Comparator.comparing(UrlConfiguration::getOrdinal));
for (UrlConfiguration urlConfig : urlConfigs) {
urlConfigs.forEach(urlConfiguration -> {
ifFunderQueryExist(urlConfiguration, externalUrlCriteria);
if (urlConfiguration.getType() == null || urlConfiguration.getType().equals("External")) {
try {
results.addAll(getAllResultsFromUrl(urlConfiguration.getUrl(), fetchStrategy, urlConfiguration.getData(), urlConfiguration.getPaginationPath(), externalUrlCriteria, urlConfiguration.getLabel(), urlConfiguration.getKey(), urlConfiguration.getContentType(), urlConfiguration.getFirstpage(), urlConfiguration.getRequestBody(), urlConfiguration.getRequestType()));
} catch (Exception e) {
logger.error(e.getLocalizedMessage(), e);
}
} else if (urlConfiguration.getType() != null && urlConfiguration.getType().equals("Internal")) {
results.addAll(getAllResultsFromMockUpJson(urlConfiguration.getUrl(), externalUrlCriteria.getLike()));
}
});
/* for (UrlConfiguration urlConfig : urlConfigs) {
ifFunderQueryExist(urlConfig, externalUrlCriteria);
if (urlConfig.getType() == null || urlConfig.getType().equals("External")) {
results.addAll(getAllResultsFromUrl(urlConfig.getUrl(), fetchStrategy, urlConfig.getData(), urlConfig.getPaginationPath(), externalUrlCriteria, urlConfig.getLabel(), urlConfig.getKey(), urlConfig.getContentType(), urlConfig.getFirstpage(), urlConfig.getRequestBody(), urlConfig.getRequestType()));
} else if (urlConfig.getType() != null && urlConfig.getType().equals("Internal")) {
results.addAll(getAllResultsFromMockUpJson(urlConfig.getUrl(), externalUrlCriteria.getLike()));
}
}
}*/
return results;
}
@ -183,7 +188,7 @@ public class RemoteFetcher {
}
}
private String replaceCriteriaOnUrl(String path, ExternalUrlCriteria externalUrlCriteria, String firstPage) {
protected String replaceCriteriaOnUrl(String path, ExternalUrlCriteria externalUrlCriteria, String firstPage) {
String completedPath = path;
if (externalUrlCriteria.getLike() != null) {
if ((path.contains("openaire") || path.contains("orcid") || path.contains("ror")) && externalUrlCriteria.getLike().equals("")) {
@ -233,7 +238,7 @@ public class RemoteFetcher {
return completedPath;
}
private List<Map<String, String>> getAllResultsFromUrl(String path, FetchStrategy fetchStrategy, final DataUrlConfiguration jsonDataPath, final String jsonPaginationPath, ExternalUrlCriteria externalUrlCriteria, String tag, String key, String contentType, String firstPage, String requestBody, String requestType) throws HugeResultSet {
private List<Map<String, String>> getAllResultsFromUrl(String path, FetchStrategy fetchStrategy, final DataUrlConfiguration jsonDataPath, final String jsonPaginationPath, ExternalUrlCriteria externalUrlCriteria, String tag, String key, String contentType, String firstPage, String requestBody, String requestType) throws Exception {
Set<Integer> pages = new HashSet<>();
String replacedPath = replaceCriteriaOnUrl(path, externalUrlCriteria, firstPage);
@ -253,6 +258,7 @@ public class RemoteFetcher {
Optional<Results> optionalResults = pages.parallelStream()
.map(page -> getResultsFromUrl(path + "&page=" + page, jsonDataPath, jsonPaginationPath, contentType, replacedBody, requestType))
.filter(Objects::nonNull)
.reduce((result1, result2) -> {
result1.getResults().addAll(result2.getResults());
return result1;
@ -264,7 +270,7 @@ public class RemoteFetcher {
}
private Results getResultsFromUrl(String urlString, DataUrlConfiguration jsonDataPath, String jsonPaginationPath, String contentType, String requestBody, String requestType) {
protected Results getResultsFromUrl(String urlString, DataUrlConfiguration jsonDataPath, String jsonPaginationPath, String contentType, String requestBody, String requestType) {
try {
RestTemplate restTemplate = new RestTemplate();
@ -292,103 +298,23 @@ public class RemoteFetcher {
DocumentContext jsonContext = JsonPath.parse(response.getBody());
if (jsonDataPath.getFieldsUrlConfiguration().getSource() != null) {
results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getSource() + "]"),
new HashMap<>(1, 1));
results = RemoteFetcherUtils.getFromJsonWithSource(jsonContext, jsonDataPath);
} else if (jsonDataPath.getFieldsUrlConfiguration().getCount() != null) { // parsing services.openaire.eu
results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"),
new HashMap<>(1, 1));
List<Map<String, String>> fixedResults = results.getResults().stream().map(item -> {
for (int i = 0; i < 2; i++) {
String id;
if (i == 0) {
id = jsonDataPath.getFieldsUrlConfiguration().getId().replace("'", "");
} else {
id = jsonDataPath.getFieldsUrlConfiguration().getName().replace("'", "");
}
if (!(item.get(id) instanceof String)) {
Object obj = item.get(id);
if (obj instanceof JSONArray) {
JSONArray jarr = (JSONArray) obj;
if (jarr.get(0) instanceof String) {
item.put(id, jarr.get(0).toString());
} else {
for (int j = 0; j < jarr.size(); j++) {
mapToMap(id, (Map<String, String>)jarr.get(j), item, i == 1);
}
}
} else {
if (obj instanceof Map) {
mapToMap(id, (Map<String, String>) obj, item, i == 1);
} else if (obj != null){
item.put(id, obj.toString());
}
}
}
}
return item;
}).collect(Collectors.toList());
results = new Results(fixedResults, new HashMap<>(1, 1));
results = RemoteFetcherUtils.getFromJsonWithParsedData(jsonContext, jsonDataPath);
} else if (jsonDataPath.getFieldsUrlConfiguration().getPath() != null) {
results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getPath()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getHost() + "]"),
new HashMap<>(1, 1));
List<Map<String, String>> multiResults = results.results.stream().map(result -> {
ExternalUrlCriteria externalUrlCriteria = new ExternalUrlCriteria();
externalUrlCriteria.setPath(result.get("path"));
externalUrlCriteria.setHost(result.get("host"));
String replacedPath = replaceCriteriaOnUrl(jsonDataPath.getUrlConfiguration().getUrl(), externalUrlCriteria, jsonDataPath.getUrlConfiguration().getFirstpage());
return getResultsFromUrl(replacedPath, jsonDataPath.getUrlConfiguration().getData(), jsonDataPath.getUrlConfiguration().getData().getPath(), jsonDataPath.getUrlConfiguration().getContentType(), requestBody, requestType);
}).filter(Objects::nonNull).map(results1 -> results1.results.get(0)).collect(Collectors.toList());
results = new Results(multiResults, new HashMap<>(1, 1));
results = RemoteFetcherUtils.getFromJsonWithRecursiveFetching(jsonContext, jsonDataPath, this, requestBody, requestType);
} else if (jsonDataPath.getFieldsUrlConfiguration().getTypes() != null) {
List<Map<String, Object>> tempRes = jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getId() + "," + jsonDataPath.getFieldsUrlConfiguration().getName()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getTypes() + "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "]");
List<Map<String, String>> finalRes = new ArrayList<>();
tempRes.forEach(map -> {
Map<String, String> resMap = new HashMap<>();
map.forEach((key, value) -> {
if (key.equals(jsonDataPath.getFieldsUrlConfiguration().getTypes().substring(1, jsonDataPath.getFieldsUrlConfiguration().getTypes().length() - 1))) {
resMap.put("tags", ((JSONArray) value).toJSONString());
} else if (key.equals(jsonDataPath.getFieldsUrlConfiguration().getUri().substring(1, jsonDataPath.getFieldsUrlConfiguration().getTypes().length() - 1))) {
resMap.put(key, ((JSONArray) value).toJSONString());
} else {
resMap.put(key, (String) value);
}
});
finalRes.add(resMap);
});
results = new Results(finalRes,
new HashMap<>(1, 1));
results = RemoteFetcherUtils.getFromJsonWithType(jsonContext, jsonDataPath);
} else if (jsonDataPath.getFieldsUrlConfiguration().getFirstName() != null) {
results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getId() + "," + jsonDataPath.getFieldsUrlConfiguration().getFirstName()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getLastName() + "]"),
new HashMap<>(1, 1));
results.getResults().stream().forEach(entry -> {
String name = entry.get(jsonDataPath.getFieldsUrlConfiguration().getFirstName().replace("'", "")) + " " + entry.get(jsonDataPath.getFieldsUrlConfiguration().getLastName().replace("'", ""));
entry.put("name", name);
entry.remove(jsonDataPath.getFieldsUrlConfiguration().getFirstName().replace("'", ""));
entry.remove(jsonDataPath.getFieldsUrlConfiguration().getLastName().replace("'", ""));
});
results = RemoteFetcherUtils.getFromJsonWithFirstAndLastName(jsonContext, jsonDataPath);
} else {
results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"),
new HashMap<>(1, 1));
}
results.results = results.results.stream().map(e -> e.entrySet().stream().collect(Collectors.toMap(x -> this.transformKey(jsonDataPath,x.getKey()), Map.Entry::getValue)))
.collect(Collectors.toList());
results.setResults(results.getResults().stream().map(e -> e.entrySet().stream().collect(Collectors.toMap(x -> this.transformKey(jsonDataPath,x.getKey()), Map.Entry::getValue)))
.collect(Collectors.toList()));
}
else if (response.getHeaders().get("Content-Type").get(0).contains("xml")) {
Class<?> aClass = Class.forName(jsonDataPath.getParseClass());
@ -398,7 +324,8 @@ public class RemoteFetcher {
Object data = unmarshaller.unmarshal(stringReader);
Method reader = null;
if (jsonDataPath.getParseField() != null && !jsonDataPath.getParseField().isEmpty()) {
reader = new PropertyDescriptor(jsonDataPath.getParseField(), aClass).getReadMethod();
String camelCaseGetter = jsonDataPath.getParseField() != null && !jsonDataPath.getParseField().isEmpty() ? "get" + jsonDataPath.getParseField().substring(0, 1).toUpperCase() + jsonDataPath.getParseField().substring(1) : "";
reader = aClass.getMethod(camelCaseGetter);
}
ObjectMapper objectMapper = new ObjectMapper();
List<Map<String, String>> values = new ArrayList<>();
@ -448,8 +375,6 @@ public class RemoteFetcher {
} catch (Exception exception) {
logger.error(exception.getMessage(), exception);
} //maybe print smth...
finally {
}
return null;
}
@ -494,48 +419,7 @@ public class RemoteFetcher {
}
static class Results {
List<Map<String, String>> results;
Map<String, Integer> pagination;
Results() {
this.results = new ArrayList<>();
this.pagination = new HashMap<>();
}
Results(List<Map<String, String>> results, Map<String, Integer> pagination) {
this.results = results;
this.pagination = pagination;
}
List<Map<String, String>> getResults() {
return results;
}
public void setResults(List<Map<String, String>> results) {
this.results = results;
}
Map<String, Integer> getPagination() {
return pagination;
}
public void setPagination(Map<String, Integer> pagination) {
this.pagination = pagination;
}
}
private void mapToMap(String key, Map<String, String> source, Map<String, String> destination, boolean isTitle) {
if (source != null) {
String content = source.get("content");
/*if (isTitle) {
String classId = source.get("classid");
if (classId.equals("main title")) {
destination.put(key, content);
}
} else {*/
destination.put(key, content);
// }
}
}
}

View File

@ -0,0 +1,128 @@
package eu.eudat.logic.proxy.fetching;
import com.jayway.jsonpath.DocumentContext;
import eu.eudat.logic.proxy.config.DataUrlConfiguration;
import eu.eudat.logic.proxy.config.ExternalUrlCriteria;
import eu.eudat.logic.proxy.fetching.entities.Results;
import net.minidev.json.JSONArray;
import java.util.*;
import java.util.stream.Collectors;
public class RemoteFetcherUtils {
public static Results getFromJsonWithSource(DocumentContext jsonContext, DataUrlConfiguration jsonDataPath) {
return new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getSource() + "]"),
new HashMap<>(1, 1));
}
public static Results getFromJsonWithParsedData(DocumentContext jsonContext, DataUrlConfiguration jsonDataPath) {
Results results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"),
new HashMap<>(1, 1));
List<Map<String, String>> fixedResults = results.getResults().stream().map(item -> {
for (int i = 0; i < 2; i++) {
String id;
if (i == 0) {
id = jsonDataPath.getFieldsUrlConfiguration().getId().replace("'", "");
} else {
id = jsonDataPath.getFieldsUrlConfiguration().getName().replace("'", "");
}
if (!(item.get(id) instanceof String)) {
Object obj = item.get(id);
if (obj instanceof JSONArray) {
JSONArray jarr = (JSONArray) obj;
if (jarr.get(0) instanceof String) {
item.put(id, jarr.get(0).toString());
} else {
for (int j = 0; j < jarr.size(); j++) {
mapToMap(id, (Map<String, String>)jarr.get(j), item, i == 1);
}
}
} else {
if (obj instanceof Map) {
mapToMap(id, (Map<String, String>) obj, item, i == 1);
} else if (obj != null){
item.put(id, obj.toString());
}
}
}
}
return item;
}).collect(Collectors.toList());
return new Results(fixedResults, new HashMap<>(1, 1));
}
public static Results getFromJsonWithRecursiveFetching(DocumentContext jsonContext, DataUrlConfiguration jsonDataPath, RemoteFetcher remoteFetcher, String requestBody, String requestType) {
Results results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getPath()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getHost() + "]"),
new HashMap<>(1, 1));
List<Map<String, String>> multiResults = results.getResults().stream().map(result -> {
ExternalUrlCriteria externalUrlCriteria = new ExternalUrlCriteria();
externalUrlCriteria.setPath(result.get("path"));
externalUrlCriteria.setHost(result.get("host"));
String replacedPath = remoteFetcher.replaceCriteriaOnUrl(jsonDataPath.getUrlConfiguration().getUrl(), externalUrlCriteria, jsonDataPath.getUrlConfiguration().getFirstpage());
return remoteFetcher.getResultsFromUrl(replacedPath, jsonDataPath.getUrlConfiguration().getData(), jsonDataPath.getUrlConfiguration().getData().getPath(), jsonDataPath.getUrlConfiguration().getContentType(), requestBody, requestType);
}).filter(Objects::nonNull).map(results1 -> results1.getResults().get(0)).collect(Collectors.toList());
return new Results(multiResults, new HashMap<>(1, 1));
}
public static Results getFromJsonWithType(DocumentContext jsonContext, DataUrlConfiguration jsonDataPath) {
List<Map<String, Object>> tempRes = jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getId() + "," + jsonDataPath.getFieldsUrlConfiguration().getName()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getTypes() + "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "]");
List<Map<String, String>> finalRes = new ArrayList<>();
tempRes.forEach(map -> {
Map<String, String> resMap = new HashMap<>();
map.forEach((key, value) -> {
if (key.equals(jsonDataPath.getFieldsUrlConfiguration().getTypes().substring(1, jsonDataPath.getFieldsUrlConfiguration().getTypes().length() - 1))) {
resMap.put("tags", ((JSONArray) value).toJSONString());
} else if (key.equals(jsonDataPath.getFieldsUrlConfiguration().getUri().substring(1, jsonDataPath.getFieldsUrlConfiguration().getTypes().length() - 1))) {
resMap.put(key, ((JSONArray) value).toJSONString());
} else {
resMap.put(key, (String) value);
}
});
finalRes.add(resMap);
});
return new Results(finalRes,
new HashMap<>(1, 1));
}
public static Results getFromJsonWithFirstAndLastName(DocumentContext jsonContext, DataUrlConfiguration jsonDataPath) {
Results results = new Results(jsonContext.read(jsonDataPath.getPath()
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getId() + "," + jsonDataPath.getFieldsUrlConfiguration().getFirstName()
+ "," + jsonDataPath.getFieldsUrlConfiguration().getLastName() + "]"),
new HashMap<>(1, 1));
results.getResults().stream().forEach(entry -> {
String name = entry.get(jsonDataPath.getFieldsUrlConfiguration().getFirstName().replace("'", "")) + " " + entry.get(jsonDataPath.getFieldsUrlConfiguration().getLastName().replace("'", ""));
entry.put("name", name);
entry.remove(jsonDataPath.getFieldsUrlConfiguration().getFirstName().replace("'", ""));
entry.remove(jsonDataPath.getFieldsUrlConfiguration().getLastName().replace("'", ""));
});
return results;
}
private static void mapToMap(String key, Map<String, String> source, Map<String, String> destination, boolean isTitle) {
if (source != null) {
String content = source.get("content");
/*if (isTitle) {
String classId = source.get("classid");
if (classId.equals("main title")) {
destination.put(key, content);
}
} else {*/
destination.put(key, content);
// }
}
}
}

View File

@ -0,0 +1,37 @@
package eu.eudat.logic.proxy.fetching.entities;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class Results {
List<Map<String, String>> results;
Map<String, Integer> pagination;
public Results() {
this.results = new ArrayList<>();
this.pagination = new HashMap<>();
}
public Results(List<Map<String, String>> results, Map<String, Integer> pagination) {
this.results = results;
this.pagination = pagination;
}
public List<Map<String, String>> getResults() {
return results;
}
public void setResults(List<Map<String, String>> results) {
this.results = results;
}
public Map<String, Integer> getPagination() {
return pagination;
}
public void setPagination(Map<String, Integer> pagination) {
this.pagination = pagination;
}
}