package eu.eudat.logic.proxy.fetching; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.jayway.jsonpath.DocumentContext; import com.jayway.jsonpath.JsonPath; import eu.eudat.logic.proxy.config.DataUrlConfiguration; import eu.eudat.logic.proxy.config.ExternalUrlCriteria; import eu.eudat.logic.proxy.config.FetchStrategy; import eu.eudat.logic.proxy.config.UrlConfiguration; import eu.eudat.logic.proxy.config.configloaders.ConfigLoader; import eu.eudat.logic.proxy.config.exceptions.HugeResultSet; import eu.eudat.logic.proxy.config.exceptions.NoURLFound; import eu.eudat.logic.proxy.fetching.entities.Results; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.cache.annotation.Cacheable; import org.springframework.http.*; import org.springframework.stereotype.Service; import org.springframework.web.client.RestTemplate; import javax.xml.bind.JAXBContext; import javax.xml.bind.Unmarshaller; import java.io.File; import java.io.StringReader; import java.lang.reflect.Method; import java.nio.file.Paths; import java.util.*; import java.util.stream.Collectors; @Service public class RemoteFetcher { private static final Logger logger = LoggerFactory.getLogger(RemoteFetcher.class); private ConfigLoader configLoader; @Autowired public RemoteFetcher(ConfigLoader configLoader) { this.configLoader = configLoader; } @Cacheable(value = "repositories", keyGenerator = "externalUrlsKeyGenerator") public List> getRepositories(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet { List urlConfigs = key != null && !key.isEmpty() ? configLoader.getExternalUrls().getRepositories().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList()) : configLoader.getExternalUrls().getRepositories().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getRepositories().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable(value = "grants", keyGenerator = "externalUrlsKeyGenerator") public List> getGrants(ExternalUrlCriteria externalUrlCriteria) throws NoURLFound, HugeResultSet { List urlConfigs = configLoader.getExternalUrls().getGrants().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getGrants().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable(value = "projects", keyGenerator = "externalUrlsKeyGenerator") public List> getProjects(ExternalUrlCriteria externalUrlCriteria) throws NoURLFound, HugeResultSet { List urlConfigs = configLoader.getExternalUrls().getProjects().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getProjects().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable(value = "funders", keyGenerator = "externalUrlsKeyGenerator") public List> getFunders(ExternalUrlCriteria externalUrlCriteria) throws NoURLFound, HugeResultSet { List urlConfigs = configLoader.getExternalUrls().getFunders().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getFunders().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable(value = "organisations", keyGenerator = "externalUrlsKeyGenerator") public List> getOrganisations(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet { List urlConfigs = key != null && !key.isEmpty() ? configLoader.getExternalUrls().getOrganisations().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList()) : configLoader.getExternalUrls().getOrganisations().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getOrganisations().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable(value = "registries", keyGenerator = "externalUrlsKeyGenerator") public List> getRegistries(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet { List urlConfigs = key != null && !key.isEmpty() ? configLoader.getExternalUrls().getRegistries().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList()) : configLoader.getExternalUrls().getRegistries().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getRegistries().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable(value = "services", keyGenerator = "externalUrlsKeyGenerator") public List> getServices(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet { List urlConfigs = key != null && !key.isEmpty() ? configLoader.getExternalUrls().getServices().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList()) : configLoader.getExternalUrls().getServices().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getServices().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable(value = "researchers", keyGenerator = "externalUrlsKeyGenerator") public List> getResearchers(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet { List urlConfigs = key != null && !key.isEmpty() ? configLoader.getExternalUrls().getResearchers().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList()) : configLoader.getExternalUrls().getResearchers().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getResearchers().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } /*@Cacheable("tags") public List> getTags(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet { List urlConfigs = key != null && !key.isEmpty() ? configLoader.getExternalUrls().getTags().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList()) : configLoader.getExternalUrls().getTags().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getTags().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); }*/ @Cacheable(value = "externalDatasets", keyGenerator = "externalUrlsKeyGenerator") public List> getDatasets(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet { List urlConfigs = key != null && !key.isEmpty() ? configLoader.getExternalUrls().getDatasets().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList()) : configLoader.getExternalUrls().getDatasets().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getDatasets().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable(value = "licenses", keyGenerator = "externalUrlsKeyGenerator") public List> getlicenses(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet { List urlConfigs = key != null && !key.isEmpty() ? configLoader.getExternalUrls().getLicenses().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList()) : configLoader.getExternalUrls().getLicenses().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getLicenses().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } public Integer findEntries(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet { List urlConfigs = key != null && !key.isEmpty() ? configLoader.getExternalUrls().getValidations().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList()) : configLoader.getExternalUrls().getValidations().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getValidations().getFetchMode(); List> data = this.getAll(urlConfigs, fetchStrategy, externalUrlCriteria); return data.size(); } private List> getAll(List urlConfigs, FetchStrategy fetchStrategy, ExternalUrlCriteria externalUrlCriteria) { List> results = new LinkedList<>(); if (urlConfigs == null || urlConfigs.isEmpty()) { return results; } // throw new NoURLFound("No Repository urls found in configuration"); urlConfigs.sort(Comparator.comparing(UrlConfiguration::getOrdinal)); urlConfigs.forEach(urlConfiguration -> { ifFunderQueryExist(urlConfiguration, externalUrlCriteria); if (urlConfiguration.getType() == null || urlConfiguration.getType().equals("External")) { try { results.addAll(getAllResultsFromUrl(urlConfiguration.getUrl(), fetchStrategy, urlConfiguration.getData(), urlConfiguration.getPaginationPath(), externalUrlCriteria, urlConfiguration.getLabel(), urlConfiguration.getKey(), urlConfiguration.getContentType(), urlConfiguration.getFirstpage(), urlConfiguration.getRequestBody(), urlConfiguration.getRequestType())); } catch (Exception e) { logger.error(e.getLocalizedMessage(), e); } } else if (urlConfiguration.getType() != null && urlConfiguration.getType().equals("Internal")) { results.addAll(getAllResultsFromMockUpJson(urlConfiguration.getUrl(), externalUrlCriteria.getLike())); } }); /* for (UrlConfiguration urlConfig : urlConfigs) { ifFunderQueryExist(urlConfig, externalUrlCriteria); if (urlConfig.getType() == null || urlConfig.getType().equals("External")) { results.addAll(getAllResultsFromUrl(urlConfig.getUrl(), fetchStrategy, urlConfig.getData(), urlConfig.getPaginationPath(), externalUrlCriteria, urlConfig.getLabel(), urlConfig.getKey(), urlConfig.getContentType(), urlConfig.getFirstpage(), urlConfig.getRequestBody(), urlConfig.getRequestType())); } else if (urlConfig.getType() != null && urlConfig.getType().equals("Internal")) { results.addAll(getAllResultsFromMockUpJson(urlConfig.getUrl(), externalUrlCriteria.getLike())); } }*/ return results; } private void ifFunderQueryExist(UrlConfiguration urlConfiguration, ExternalUrlCriteria externalUrlCriteria) { if (urlConfiguration.getFunderQuery() != null) { if (externalUrlCriteria.getFunderId() != null && !urlConfiguration.getFunderQuery().startsWith("dmp:")) { urlConfiguration.setUrl(urlConfiguration.getUrl().replace("{funderQuery}", urlConfiguration.getFunderQuery())); } else { urlConfiguration.setUrl(urlConfiguration.getUrl().replace("{funderQuery}", "")); } } } protected String replaceCriteriaOnUrl(String path, ExternalUrlCriteria externalUrlCriteria, String firstPage) { String completedPath = path; if (externalUrlCriteria.getLike() != null) { if ((path.contains("openaire") || path.contains("orcid") || path.contains("ror")) && externalUrlCriteria.getLike().equals("")) { completedPath = completedPath.replaceAll("\\{like}", "*"); } else { completedPath = completedPath.replaceAll("\\{like}", externalUrlCriteria.getLike()); } } else { completedPath = completedPath.replace("{like}", ""); } if (externalUrlCriteria.getFunderId() != null) { String funderPrefix = externalUrlCriteria.getFunderId().split(":")[0]; String funderId = externalUrlCriteria.getFunderId().replace(funderPrefix + ":", ""); if (funderId.toCharArray()[0] == ':') { funderId = externalUrlCriteria.getFunderId(); } /* try { funderId = URLEncoder.encode(funderId, "UTF-8"); } catch (UnsupportedEncodingException e) { logger.error(e.getMessage(), e); } */ completedPath = completedPath.replace("{funderId}", funderId); } if (externalUrlCriteria.getPage() != null) { completedPath = completedPath.replace("{page}", externalUrlCriteria.getPage()); } else { if (firstPage != null) { completedPath = completedPath.replace("{page}", firstPage); } else { completedPath = completedPath.replace("{page}", "1"); } } if (externalUrlCriteria.getPageSize() != null) { completedPath = completedPath.replace("{pageSize}", externalUrlCriteria.getPageSize()); } else { completedPath = completedPath.replace("{pageSize}", "60"); } if (externalUrlCriteria.getHost() != null) { completedPath = completedPath.replace("{host}", externalUrlCriteria.getHost()); } else { completedPath = completedPath.replace("{host}", ""); } if (externalUrlCriteria.getPath() != null) { completedPath = completedPath.replace("{path}", externalUrlCriteria.getPath()); } else { completedPath = completedPath.replace("{path}", ""); } return completedPath; } private List> getAllResultsFromUrl(String path, FetchStrategy fetchStrategy, final DataUrlConfiguration jsonDataPath, final String jsonPaginationPath, ExternalUrlCriteria externalUrlCriteria, String tag, String key, String contentType, String firstPage, String requestBody, String requestType) throws Exception { Set pages = new HashSet<>(); String replacedPath = replaceCriteriaOnUrl(path, externalUrlCriteria, firstPage); String replacedBody = replaceCriteriaOnUrl(requestBody, externalUrlCriteria, firstPage); Results results = getResultsFromUrl(replacedPath, jsonDataPath, jsonPaginationPath, contentType, replacedBody, requestType); if (fetchStrategy == FetchStrategy.FIRST) return results == null ? new LinkedList<>() : results.getResults().stream().peek(x -> x.put("tag", tag)).peek(x -> x.put("key", key)).collect(Collectors.toList()); if (results != null && results.getPagination() != null && results.getPagination().get("pages") != null) //if has more pages, add them to the pages set for (int i = 2; i <= results.getPagination().get("pages"); i++) pages.add(i); Long maxResults = configLoader.getExternalUrls().getMaxresults(); if ((maxResults > 0 && results != null) && (results.getPagination().get("count") > maxResults)) throw new HugeResultSet("The submitted search query " + externalUrlCriteria.getLike() + " is about to return " + results.getPagination().get("count") + " results... Please submit a more detailed search query"); Optional optionalResults = pages.parallelStream() .map(page -> getResultsFromUrl(path + "&page=" + page, jsonDataPath, jsonPaginationPath, contentType, replacedBody, requestType)) .filter(Objects::nonNull) .reduce((result1, result2) -> { result1.getResults().addAll(result2.getResults()); return result1; }); Results remainingResults = optionalResults.orElseGet(Results::new); remainingResults.getResults().addAll(results.getResults()); return remainingResults.getResults().stream().peek(x -> x.put("tag", tag)).collect(Collectors.toList()); } protected Results getResultsFromUrl(String urlString, DataUrlConfiguration jsonDataPath, String jsonPaginationPath, String contentType, String requestBody, String requestType) { try { RestTemplate restTemplate = new RestTemplate(); HttpHeaders headers = new HttpHeaders(); HttpEntity entity; ResponseEntity response; /* * URL url = new URL(urlString.replaceAll(" ", "%20")); * * HttpURLConnection con = (HttpURLConnection) url.openConnection(); * con.setRequestMethod("GET"); */ if (contentType != null && !contentType.isEmpty()) { headers.setAccept(Collections.singletonList(MediaType.valueOf(contentType))); headers.setContentType(MediaType.valueOf(contentType)); } JsonNode jsonBody = new ObjectMapper().readTree(requestBody); entity = new HttpEntity<>(jsonBody, headers); response = restTemplate.exchange(urlString, HttpMethod.resolve(requestType), entity, String.class); if (response.getStatusCode() == HttpStatus.OK) { // success //do here all the parsing Results results = new Results(); if (response.getHeaders().get("Content-Type").get(0).contains("json")) { DocumentContext jsonContext = JsonPath.parse(response.getBody()); if (jsonDataPath.getFieldsUrlConfiguration().getSource() != null) { results = RemoteFetcherUtils.getFromJsonWithSource(jsonContext, jsonDataPath); } else if (jsonDataPath.getFieldsUrlConfiguration().getCount() != null) { // parsing services.openaire.eu results = RemoteFetcherUtils.getFromJsonWithParsedData(jsonContext, jsonDataPath); } else if (jsonDataPath.getFieldsUrlConfiguration().getPath() != null) { results = RemoteFetcherUtils.getFromJsonWithRecursiveFetching(jsonContext, jsonDataPath, this, requestBody, requestType); } else if (jsonDataPath.getFieldsUrlConfiguration().getTypes() != null) { results = RemoteFetcherUtils.getFromJsonWithType(jsonContext, jsonDataPath); } else if (jsonDataPath.getFieldsUrlConfiguration().getFirstName() != null) { results = RemoteFetcherUtils.getFromJsonWithFirstAndLastName(jsonContext, jsonDataPath); } else { results = new Results(jsonContext.read(jsonDataPath.getPath() + "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription() + "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"), new HashMap<>(1, 1)); } results.setResults(results.getResults().stream().map(e -> e.entrySet().stream().collect(Collectors.toMap(x -> this.transformKey(jsonDataPath,x.getKey()), Map.Entry::getValue))) .collect(Collectors.toList())); } else if (response.getHeaders().get("Content-Type").get(0).contains("xml")) { Class aClass = Class.forName(jsonDataPath.getParseClass()); JAXBContext jaxbContext = JAXBContext.newInstance(aClass); Unmarshaller unmarshaller = jaxbContext.createUnmarshaller(); StringReader stringReader = new StringReader(response.getBody()); Object data = unmarshaller.unmarshal(stringReader); Method reader = null; if (jsonDataPath.getParseField() != null && !jsonDataPath.getParseField().isEmpty()) { String camelCaseGetter = jsonDataPath.getParseField() != null && !jsonDataPath.getParseField().isEmpty() ? "get" + jsonDataPath.getParseField().substring(0, 1).toUpperCase() + jsonDataPath.getParseField().substring(1) : ""; reader = aClass.getMethod(camelCaseGetter); } ObjectMapper objectMapper = new ObjectMapper(); List> values = new ArrayList<>(); int max = 1; if (reader != null) { Object invokedField = reader.invoke(data); if (invokedField instanceof Collection) { max = ((Collection) invokedField).size(); } } for (int i = 0; i< max; i++) { Object value; if (reader != null) { Object invokedField = reader.invoke(data); if (invokedField instanceof Collection) { value = ((Collection) invokedField).toArray()[i]; } else { value = invokedField; } } else { value = data; } Map map = objectMapper.convertValue(value, Map.class); if (jsonDataPath.getMergedFields() != null && !jsonDataPath.getMergedFields().isEmpty() && jsonDataPath.getMergedFieldName() != null && !jsonDataPath.getMergedFieldName().isEmpty()) { Map finalMap = new HashMap<>(); for (Map.Entry entry : map.entrySet()) { if (jsonDataPath.getMergedFields().contains(entry.getKey())) { if (!finalMap.containsKey(jsonDataPath.getMergedFieldName())) { finalMap.put(jsonDataPath.getMergedFieldName(), entry.getValue()); } else { finalMap.put(jsonDataPath.getMergedFieldName(), finalMap.get(jsonDataPath.getMergedFieldName()) + " " + entry.getValue()); } } else { finalMap.put(entry.getKey(), entry.getValue()); } } values.add(finalMap); } else { values.add(map); } } results = new Results(values, new HashMap<>(1, 1)); } return results; } } catch (Exception exception) { logger.error(exception.getMessage(), exception); } //maybe print smth... return null; } private List> getAllResultsFromMockUpJson(String path, String query) { List> internalResults; try { String filePath = Paths.get(path).toUri().toURL().toString(); ObjectMapper mapper = new ObjectMapper(); internalResults = mapper.readValue(new File(filePath), new TypeReference>>(){}); return searchListMap(internalResults, query); } catch (Exception e) { logger.error(e.getMessage(), e); return new LinkedList<>(); } } private List> searchListMap(List> internalResults, String query) { List> list = new LinkedList<>(); for (Map map : internalResults) { if (map.get("name") != null && map.get("name").toUpperCase().contains(query.toUpperCase())) { list.add(map); } if (map.get("label") != null && map.get("label").toUpperCase().contains(query.toUpperCase())) { list.add(map); } } return list; } private String transformKey(DataUrlConfiguration dataUrlConfiguration, String key) { if (dataUrlConfiguration.getFieldsUrlConfiguration().getId() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getId().replace("'",""))) return "pid"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getDescription() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getDescription().replace("'",""))) return "description"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getUri() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getUri().replace("'",""))) return "uri"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getName() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getName().replace("'",""))) return "name"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getSource() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getSource().replace("'",""))) return "source"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getCount() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getCount().replace("'",""))) return "count"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getPath() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getPath().replace("'",""))) return "path"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getHost() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getHost().replace("'",""))) return "host"; return null; } }