argos/dmp-backend/web/src/main/java/eu/eudat/logic/proxy/fetching/RemoteFetcher.java

428 lines
26 KiB
Java

package eu.eudat.logic.proxy.fetching;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import eu.eudat.logic.proxy.config.DataUrlConfiguration;
import eu.eudat.logic.proxy.config.ExternalUrlCriteria;
import eu.eudat.logic.proxy.config.FetchStrategy;
import eu.eudat.logic.proxy.config.UrlConfiguration;
import eu.eudat.logic.proxy.config.configloaders.ConfigLoader;
import eu.eudat.logic.proxy.config.entities.GenericUrls;
import eu.eudat.logic.proxy.config.exceptions.HugeResultSet;
import eu.eudat.logic.proxy.config.exceptions.NoURLFound;
import eu.eudat.logic.proxy.fetching.entities.Results;
import eu.eudat.logic.services.ApiContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.http.*;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;
import jakarta.xml.bind.JAXBContext;
import jakarta.xml.bind.Unmarshaller;
import java.io.File;
import java.io.StringReader;
import java.lang.reflect.Method;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;
@Service
public class RemoteFetcher {
private static final Logger logger = LoggerFactory.getLogger(RemoteFetcher.class);
private final static ObjectMapper mapper = new ObjectMapper();
private final ConfigLoader configLoader;
@Autowired
public RemoteFetcher(ConfigLoader configLoader) {
this.configLoader = configLoader;
}
@Cacheable(value = "repositories", keyGenerator = "externalUrlsKeyGenerator")
public List<Map<String, String>> getRepositories(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet {
List<UrlConfiguration> urlConfigs =
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getRepositories().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
: configLoader.getExternalUrls().getRepositories().getUrls();
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getRepositories().getFetchMode();
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
}
@Cacheable(value = "grants", keyGenerator = "externalUrlsKeyGenerator")
public List<Map<String, String>> getGrants(ExternalUrlCriteria externalUrlCriteria) throws NoURLFound, HugeResultSet {
List<UrlConfiguration> urlConfigs = configLoader.getExternalUrls().getGrants().getUrls();
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getGrants().getFetchMode();
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
}
@Cacheable(value = "projects", keyGenerator = "externalUrlsKeyGenerator")
public List<Map<String, String>> getProjects(ExternalUrlCriteria externalUrlCriteria) throws NoURLFound, HugeResultSet {
List<UrlConfiguration> urlConfigs = configLoader.getExternalUrls().getProjects().getUrls();
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getProjects().getFetchMode();
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
}
@Cacheable(value = "funders", keyGenerator = "externalUrlsKeyGenerator")
public List<Map<String, String>> getFunders(ExternalUrlCriteria externalUrlCriteria) throws NoURLFound, HugeResultSet {
List<UrlConfiguration> urlConfigs = configLoader.getExternalUrls().getFunders().getUrls();
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getFunders().getFetchMode();
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
}
@Cacheable(value = "organisations", keyGenerator = "externalUrlsKeyGenerator")
public List<Map<String, String>> getOrganisations(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet {
List<UrlConfiguration> urlConfigs =
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getOrganisations().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
: configLoader.getExternalUrls().getOrganisations().getUrls();
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getOrganisations().getFetchMode();
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
}
@Cacheable(value = "registries", keyGenerator = "externalUrlsKeyGenerator")
public List<Map<String, String>> getRegistries(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet {
List<UrlConfiguration> urlConfigs =
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getRegistries().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
: configLoader.getExternalUrls().getRegistries().getUrls();
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getRegistries().getFetchMode();
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
}
@Cacheable(value = "services", keyGenerator = "externalUrlsKeyGenerator")
public List<Map<String, String>> getServices(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet {
List<UrlConfiguration> urlConfigs =
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getServices().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
: configLoader.getExternalUrls().getServices().getUrls();
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getServices().getFetchMode();
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
}
@Cacheable(value = "researchers", keyGenerator = "externalUrlsKeyGenerator")
public List<Map<String, String>> getResearchers(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet {
List<UrlConfiguration> urlConfigs =
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getResearchers().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
: configLoader.getExternalUrls().getResearchers().getUrls();
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getResearchers().getFetchMode();
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
}
/*@Cacheable("tags")
public List<Map<String, String>> getTags(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet {
List<UrlConfiguration> urlConfigs =
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getTags().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
: configLoader.getExternalUrls().getTags().getUrls();
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getTags().getFetchMode();
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
}*/
@Cacheable(value = "externalDatasets", keyGenerator = "externalUrlsKeyGenerator")
public List<Map<String, String>> getDatasets(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet {
List<UrlConfiguration> urlConfigs =
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getDatasets().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
: configLoader.getExternalUrls().getDatasets().getUrls();
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getDatasets().getFetchMode();
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
}
@Cacheable(value = "licenses", keyGenerator = "externalUrlsKeyGenerator")
public List<Map<String, String>> getlicenses(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet {
List<UrlConfiguration> urlConfigs =
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getLicenses().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
: configLoader.getExternalUrls().getLicenses().getUrls();
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getLicenses().getFetchMode();
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
}
public Integer findEntries(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet {
List<UrlConfiguration> urlConfigs =
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getValidations().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
: configLoader.getExternalUrls().getValidations().getUrls();
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getValidations().getFetchMode();
List<Map<String, String>> data = this.getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
return data.size();
}
public List<Map<String, String>> getExternalGeneric(ExternalUrlCriteria externalUrlCriteria, GenericUrls genericUrls) {
List<UrlConfiguration> urlConfigurations = genericUrls.getUrls();
FetchStrategy fetchStrategy = genericUrls.getFetchMode();
return getAll(urlConfigurations, fetchStrategy, externalUrlCriteria);
}
private List<Map<String, String>> getAll(List<UrlConfiguration> urlConfigs, FetchStrategy fetchStrategy, ExternalUrlCriteria externalUrlCriteria) {
List<Map<String, String>> results = new LinkedList<>();
if (urlConfigs == null || urlConfigs.isEmpty()) {
return results;
}
// throw new NoURLFound("No Repository urls found in configuration");
urlConfigs.sort(Comparator.comparing(UrlConfiguration::getOrdinal));
urlConfigs.forEach(urlConfiguration -> {
ifFunderQueryExist(urlConfiguration, externalUrlCriteria);
if (urlConfiguration.getType() == null || urlConfiguration.getType().equals("External")) {
try {
results.addAll(getAllResultsFromUrl(urlConfiguration.getUrl(), fetchStrategy, urlConfiguration.getData(), urlConfiguration.getPaginationPath(), externalUrlCriteria, urlConfiguration.getLabel(), urlConfiguration.getKey(), urlConfiguration.getContentType(), urlConfiguration.getFirstpage(), urlConfiguration.getRequestBody(), urlConfiguration.getRequestType()));
} catch (Exception e) {
logger.error(e.getLocalizedMessage(), e);
}
} else if (urlConfiguration.getType() != null && urlConfiguration.getType().equals("Internal")) {
results.addAll(getAllResultsFromMockUpJson(urlConfiguration.getUrl(), externalUrlCriteria.getLike()));
}
});
/* for (UrlConfiguration urlConfig : urlConfigs) {
ifFunderQueryExist(urlConfig, externalUrlCriteria);
if (urlConfig.getType() == null || urlConfig.getType().equals("External")) {
results.addAll(getAllResultsFromUrl(urlConfig.getUrl(), fetchStrategy, urlConfig.getData(), urlConfig.getPaginationPath(), externalUrlCriteria, urlConfig.getLabel(), urlConfig.getKey(), urlConfig.getContentType(), urlConfig.getFirstpage(), urlConfig.getRequestBody(), urlConfig.getRequestType()));
} else if (urlConfig.getType() != null && urlConfig.getType().equals("Internal")) {
results.addAll(getAllResultsFromMockUpJson(urlConfig.getUrl(), externalUrlCriteria.getLike()));
}
}*/
return results;
}
private void ifFunderQueryExist(UrlConfiguration urlConfiguration, ExternalUrlCriteria externalUrlCriteria) {
if (urlConfiguration.getFunderQuery() != null) {
if (externalUrlCriteria.getFunderId() != null && !urlConfiguration.getFunderQuery().startsWith("dmp:")) {
urlConfiguration.setUrl(urlConfiguration.getUrl().replace("{funderQuery}", urlConfiguration.getFunderQuery()));
}
else {
urlConfiguration.setUrl(urlConfiguration.getUrl().replace("{funderQuery}", ""));
}
}
}
protected String replaceCriteriaOnUrl(String path, ExternalUrlCriteria externalUrlCriteria, String firstPage) {
String completedPath = path;
if (externalUrlCriteria.getLike() != null) {
if ((path.contains("openaire") || path.contains("orcid") || path.contains("ror")) && externalUrlCriteria.getLike().equals("")) {
completedPath = completedPath.replaceAll("\\{like}", "*");
} else {
completedPath = completedPath.replaceAll("\\{like}", externalUrlCriteria.getLike());
}
} else {
completedPath = completedPath.replace("{like}", "");
}
if (externalUrlCriteria.getFunderId() != null) {
String funderPrefix = externalUrlCriteria.getFunderId().split(":")[0];
String funderId = externalUrlCriteria.getFunderId().replace(funderPrefix + ":", "");
if (funderId.toCharArray()[0] == ':') {
funderId = externalUrlCriteria.getFunderId();
}
/*
try { funderId = URLEncoder.encode(funderId, "UTF-8"); } catch
(UnsupportedEncodingException e) { logger.error(e.getMessage(), e); }
*/
completedPath = completedPath.replace("{funderId}", funderId);
}
if (externalUrlCriteria.getPage() != null) {
completedPath = completedPath.replace("{page}", externalUrlCriteria.getPage());
} else {
if (firstPage != null) {
completedPath = completedPath.replace("{page}", firstPage);
} else {
completedPath = completedPath.replace("{page}", "1");
}
}
if (externalUrlCriteria.getPageSize() != null) {
completedPath = completedPath.replace("{pageSize}", externalUrlCriteria.getPageSize());
} else {
completedPath = completedPath.replace("{pageSize}", "60");
}
if (externalUrlCriteria.getHost() != null) {
completedPath = completedPath.replace("{host}", externalUrlCriteria.getHost());
} else {
completedPath = completedPath.replace("{host}", "");
}
if (externalUrlCriteria.getPath() != null) {
completedPath = completedPath.replace("{path}", externalUrlCriteria.getPath());
} else {
completedPath = completedPath.replace("{path}", "");
}
return completedPath;
}
private List<Map<String, String>> getAllResultsFromUrl(String path, FetchStrategy fetchStrategy, final DataUrlConfiguration jsonDataPath, final String jsonPaginationPath, ExternalUrlCriteria externalUrlCriteria, String tag, String key, String contentType, String firstPage, String requestBody, String requestType) throws Exception {
Set<Integer> pages = new HashSet<>();
String replacedPath = replaceCriteriaOnUrl(path, externalUrlCriteria, firstPage);
String replacedBody = replaceCriteriaOnUrl(requestBody, externalUrlCriteria, firstPage);
Results results = getResultsFromUrl(replacedPath, jsonDataPath, jsonPaginationPath, contentType, replacedBody, requestType);
if (fetchStrategy == FetchStrategy.FIRST)
return results == null ? new LinkedList<>() : results.getResults().stream().peek(x -> x.put("tag", tag)).peek(x -> x.put("key", key)).collect(Collectors.toList());
if (results != null && results.getPagination() != null && results.getPagination().get("pages") != null) //if has more pages, add them to the pages set
for (int i = 2; i <= results.getPagination().get("pages"); i++)
pages.add(i);
Long maxResults = configLoader.getExternalUrls().getMaxresults();
if ((maxResults > 0 && results != null) && (results.getPagination().get("count") > maxResults))
throw new HugeResultSet("The submitted search query " + externalUrlCriteria.getLike() + " is about to return " + results.getPagination().get("count") + " results... Please submit a more detailed search query");
Optional<Results> optionalResults = pages.parallelStream()
.map(page -> getResultsFromUrl(path + "&page=" + page, jsonDataPath, jsonPaginationPath, contentType, replacedBody, requestType))
.filter(Objects::nonNull)
.reduce((result1, result2) -> {
result1.getResults().addAll(result2.getResults());
return result1;
});
Results remainingResults = optionalResults.orElseGet(Results::new);
remainingResults.getResults().addAll(results.getResults());
return remainingResults.getResults().stream().peek(x -> x.put("tag", tag)).collect(Collectors.toList());
}
protected Results getResultsFromUrl(String urlString, DataUrlConfiguration jsonDataPath, String jsonPaginationPath, String contentType, String requestBody, String requestType) {
try {
RestTemplate restTemplate = new RestTemplate();
HttpHeaders headers = new HttpHeaders();
HttpEntity<JsonNode> entity;
ResponseEntity<String> response;
/*
* URL url = new URL(urlString.replaceAll(" ", "%20"));
*
* HttpURLConnection con = (HttpURLConnection) url.openConnection();
* con.setRequestMethod("GET");
*/
if (contentType != null && !contentType.isEmpty()) {
headers.setAccept(Collections.singletonList(MediaType.valueOf(contentType)));
headers.setContentType(MediaType.valueOf(contentType));
}
JsonNode jsonBody = mapper.readTree(requestBody);
entity = new HttpEntity<>(jsonBody, headers);
response = restTemplate.exchange(urlString, HttpMethod.resolve(requestType), entity, String.class);
if (response.getStatusCode() == HttpStatus.OK) { // success
//do here all the parsing
Results results = new Results();
if (response.getHeaders().get("Content-Type").get(0).contains("json")) {
DocumentContext jsonContext = JsonPath.parse(response.getBody());
if (jsonDataPath.getFieldsUrlConfiguration().getPath() != null) {
results = RemoteFetcherUtils.getFromJsonWithRecursiveFetching(jsonContext, jsonDataPath, this, requestBody, requestType);
} else if (jsonDataPath.getFieldsUrlConfiguration().getFirstName() != null) {
results = RemoteFetcherUtils.getFromJsonWithFirstAndLastName(jsonContext, jsonDataPath);
} else {
results = RemoteFetcherUtils.getFromJson(jsonContext, jsonDataPath);
}
results.setResults(results.getResults().stream().map(e -> e.entrySet().stream().collect(Collectors.toMap(x -> this.transformKey(jsonDataPath,x.getKey()), Map.Entry::getValue)))
.collect(Collectors.toList()));
}
else if (response.getHeaders().get("Content-Type").get(0).contains("xml")) {
Class<?> aClass = Class.forName(jsonDataPath.getParseClass());
JAXBContext jaxbContext = JAXBContext.newInstance(aClass);
Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
StringReader stringReader = new StringReader(response.getBody());
Object data = unmarshaller.unmarshal(stringReader);
Method reader = null;
if (jsonDataPath.getParseField() != null && !jsonDataPath.getParseField().isEmpty()) {
String camelCaseGetter = jsonDataPath.getParseField() != null && !jsonDataPath.getParseField().isEmpty() ? "get" + jsonDataPath.getParseField().substring(0, 1).toUpperCase() + jsonDataPath.getParseField().substring(1) : "";
reader = aClass.getMethod(camelCaseGetter);
}
List<Map<String, String>> values = new ArrayList<>();
int max = 1;
if (reader != null) {
Object invokedField = reader.invoke(data);
if (invokedField instanceof Collection) {
max = ((Collection) invokedField).size();
}
}
for (int i = 0; i< max; i++) {
Object value;
if (reader != null) {
Object invokedField = reader.invoke(data);
if (invokedField instanceof Collection) {
value = ((Collection) invokedField).toArray()[i];
} else {
value = invokedField;
}
} else {
value = data;
}
Map<String, String> map = mapper.convertValue(value, Map.class);
if (jsonDataPath.getMergedFields() != null && !jsonDataPath.getMergedFields().isEmpty() && jsonDataPath.getMergedFieldName() != null && !jsonDataPath.getMergedFieldName().isEmpty()) {
Map<String, String> finalMap = new HashMap<>();
for (Map.Entry<String, String> entry : map.entrySet()) {
if (jsonDataPath.getMergedFields().contains(entry.getKey())) {
if (!finalMap.containsKey(jsonDataPath.getMergedFieldName())) {
finalMap.put(jsonDataPath.getMergedFieldName(), entry.getValue());
} else {
finalMap.put(jsonDataPath.getMergedFieldName(), finalMap.get(jsonDataPath.getMergedFieldName()) + " " + entry.getValue());
}
} else {
finalMap.put(entry.getKey(), entry.getValue());
}
}
values.add(finalMap);
} else {
values.add(map);
}
}
results = new Results(values, new HashMap<>(1, 1));
}
if (results.getPagination().size() == 0) {
results.getPagination().put("pages", 1);
results.getPagination().put("count", results.getResults().size());
}
return results;
}
} catch (Exception exception) {
logger.error(exception.getMessage(), exception);
} //maybe print smth...
return null;
}
private List<Map<String, String>> getAllResultsFromMockUpJson(String path, String query) {
List<Map<String, String>> internalResults;
try {
String filePath = Paths.get(path).toUri().toURL().toString();
internalResults = mapper.readValue(new File(filePath), new TypeReference<List<Map<String, String>>>(){});
return searchListMap(internalResults, query);
} catch (Exception e) {
logger.error(e.getMessage(), e);
return new LinkedList<>();
}
}
private List<Map<String, String>> searchListMap(List<Map<String, String>> internalResults, String query) {
List<Map<String, String>> list = new LinkedList<>();
for (Map<String, String> map : internalResults)
{
if (map.get("name") != null && map.get("name").toUpperCase().contains(query.toUpperCase())) {
list.add(map);
}
if (map.get("label") != null && map.get("label").toUpperCase().contains(query.toUpperCase())) {
list.add(map);
}
}
return list;
}
private String transformKey(DataUrlConfiguration dataUrlConfiguration, String key) {
if (dataUrlConfiguration.getFieldsUrlConfiguration().getId() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getId().replace("'",""))) return "pid";
if (dataUrlConfiguration.getFieldsUrlConfiguration().getDescription() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getDescription().replace("'",""))) return "description";
if (dataUrlConfiguration.getFieldsUrlConfiguration().getUri() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getUri().replace("'",""))) return "uri";
if (dataUrlConfiguration.getFieldsUrlConfiguration().getName() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getName().replace("'",""))) return "name";
if (dataUrlConfiguration.getFieldsUrlConfiguration().getSource() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getSource().replace("'",""))) return "source";
if (dataUrlConfiguration.getFieldsUrlConfiguration().getCount() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getCount().replace("'",""))) return "count";
if (dataUrlConfiguration.getFieldsUrlConfiguration().getPath() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getPath().replace("'",""))) return "path";
if (dataUrlConfiguration.getFieldsUrlConfiguration().getHost() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getHost().replace("'",""))) return "host";
return null;
}
}