package eu.eudat.logic.proxy.fetching; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.jayway.jsonpath.DocumentContext; import com.jayway.jsonpath.JsonPath; import eu.eudat.logic.proxy.config.*; import eu.eudat.logic.proxy.config.configloaders.ConfigLoader; import eu.eudat.logic.proxy.config.exceptions.HugeResultSet; import eu.eudat.logic.proxy.config.exceptions.NoURLFound; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.cache.annotation.Cacheable; import org.springframework.stereotype.Service; import java.io.File; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLEncoder; import java.nio.file.Paths; import java.util.*; import java.util.stream.Collectors; @Service public class RemoteFetcher { private static final Logger logger = LoggerFactory.getLogger(RemoteFetcher.class); private ConfigLoader configLoader; @Autowired public RemoteFetcher(ConfigLoader configLoader) { this.configLoader = configLoader; } @Cacheable("repositories") public List> getRepositories(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet { List urlConfigs = key != null && !key.isEmpty() ? configLoader.getExternalUrls().getRepositories().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList()) : configLoader.getExternalUrls().getRepositories().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getRepositories().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable("grants") public List> getGrants(ExternalUrlCriteria externalUrlCriteria) throws NoURLFound, HugeResultSet { List urlConfigs = configLoader.getExternalUrls().getGrants().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getGrants().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable("projects") public List> getProjects(ExternalUrlCriteria externalUrlCriteria) throws NoURLFound, HugeResultSet { List urlConfigs = configLoader.getExternalUrls().getProjects().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getProjects().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable("funders") public List> getFunders(ExternalUrlCriteria externalUrlCriteria) throws NoURLFound, HugeResultSet { List urlConfigs = configLoader.getExternalUrls().getFunders().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getFunders().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable("organisations") public List> getOrganisations(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet { List urlConfigs = key != null && !key.isEmpty() ? configLoader.getExternalUrls().getOrganisations().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList()) : configLoader.getExternalUrls().getOrganisations().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getOrganisations().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable("registries") public List> getRegistries(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet { List urlConfigs = key != null && !key.isEmpty() ? configLoader.getExternalUrls().getRegistries().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList()) : configLoader.getExternalUrls().getRegistries().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getRegistries().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable("services") public List> getServices(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet { List urlConfigs = key != null && !key.isEmpty() ? configLoader.getExternalUrls().getServices().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList()) : configLoader.getExternalUrls().getServices().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getServices().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable("researchers") public List> getResearchers(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet { List urlConfigs = key != null && !key.isEmpty() ? configLoader.getExternalUrls().getResearchers().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList()) : configLoader.getExternalUrls().getResearchers().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getResearchers().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable("tags") public List> getTags(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet { List urlConfigs = key != null && !key.isEmpty() ? configLoader.getExternalUrls().getTags().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList()) : configLoader.getExternalUrls().getTags().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getTags().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } @Cacheable("externalDatasets") public List> getDatasets(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet { List urlConfigs = key != null && !key.isEmpty() ? configLoader.getExternalUrls().getDatasets().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList()) : configLoader.getExternalUrls().getDatasets().getUrls(); FetchStrategy fetchStrategy = configLoader.getExternalUrls().getDatasets().getFetchMode(); return getAll(urlConfigs, fetchStrategy, externalUrlCriteria); } private List> getAll(List urlConfigs, FetchStrategy fetchStrategy, ExternalUrlCriteria externalUrlCriteria) throws NoURLFound, HugeResultSet { List> results = new LinkedList<>(); if (urlConfigs == null || urlConfigs.isEmpty()) return results; // throw new NoURLFound("No Repository urls found in configuration"); urlConfigs.sort(Comparator.comparing(UrlConfiguration::getOrdinal)); for (UrlConfiguration urlConfig : urlConfigs) { ifFunderQueryExist(urlConfig, externalUrlCriteria); if (urlConfig.getType() == null || urlConfig.getType().equals("External")) { results.addAll(getAllResultsFromUrl(urlConfig.getUrl(), fetchStrategy, urlConfig.getData(), urlConfig.getPaginationPath(), externalUrlCriteria, urlConfig.getLabel(), urlConfig.getKey(), urlConfig.getContentType(), urlConfig.getFirstpage())); } else if (urlConfig.getType() != null && urlConfig.getType().equals("Internal")) { results.addAll(getAllResultsFromMockUpJson(urlConfig.getUrl(), externalUrlCriteria.getLike())); } } return results; } private void ifFunderQueryExist(UrlConfiguration urlConfiguration, ExternalUrlCriteria externalUrlCriteria) { if (urlConfiguration.getFunderQuery() != null) { if (externalUrlCriteria.getFunderId() != null && !urlConfiguration.getFunderQuery().startsWith("dmp:")) { urlConfiguration.setUrl(urlConfiguration.getUrl().replace("{funderQuery}", urlConfiguration.getFunderQuery())); } else { urlConfiguration.setUrl(urlConfiguration.getUrl().replace("{funderQuery}", "")); } } } private String replaceCriteriaOnUrl(String path, ExternalUrlCriteria externalUrlCriteria, String firstPage) { String completedPath = path; if (externalUrlCriteria.getLike() != null) { if (path.contains("openaire") && externalUrlCriteria.getLike().equals("")) completedPath = completedPath.replaceAll("\\{like}", "*"); else completedPath = completedPath.replaceAll("\\{like}", externalUrlCriteria.getLike()); } else { completedPath = completedPath.replace("{like}", ""); } if (externalUrlCriteria.getFunderId() != null) { String funderId = externalUrlCriteria.getFunderId(); try { funderId = URLEncoder.encode(externalUrlCriteria.getFunderId(), "UTF-8"); } catch (UnsupportedEncodingException e) { logger.error(e.getMessage(), e); } completedPath = completedPath.replace("{funderId}", funderId); } if (externalUrlCriteria.getPage() != null) { completedPath = completedPath.replace("{page}", externalUrlCriteria.getPage()); } else { if (firstPage != null) { completedPath = completedPath.replace("{page}", firstPage); } else { completedPath = completedPath.replace("{page}", "1"); } } if (externalUrlCriteria.getPageSize() != null) { completedPath = completedPath.replace("{pageSize}", externalUrlCriteria.getPageSize()); } else { completedPath = completedPath.replace("{pageSize}", "60"); } return completedPath; } private List> getAllResultsFromUrl(String path, FetchStrategy fetchStrategy, final DataUrlConfiguration jsonDataPath, final String jsonPaginationPath, ExternalUrlCriteria externalUrlCriteria, String tag, String key, String contentType, String firstPage) throws HugeResultSet { Set pages = new HashSet<>(); String replacedPath = replaceCriteriaOnUrl(path, externalUrlCriteria, firstPage); Results results = getResultsFromUrl(replacedPath, jsonDataPath, jsonPaginationPath, contentType); if (fetchStrategy == FetchStrategy.FIRST) return results == null ? new LinkedList<>() : results.getResults().stream().peek(x -> x.put("tag", tag)).peek(x -> x.put("key", key)).collect(Collectors.toList()); if (results != null && results.getPagination() != null && results.getPagination().get("pages") != null) //if has more pages, add them to the pages set for (int i = 2; i <= results.getPagination().get("pages"); i++) pages.add(i); Long maxResults = configLoader.getExternalUrls().getMaxresults(); if ((maxResults > 0 && results != null) && (results.getPagination().get("count") > maxResults)) throw new HugeResultSet("The submitted search query " + externalUrlCriteria.getLike() + " is about to return " + results.getPagination().get("count") + " results... Please submit a more detailed search query"); Optional optionalResults = pages.parallelStream() .map(page -> getResultsFromUrl(path + "&page=" + page, jsonDataPath, jsonPaginationPath, contentType)) .reduce((result1, result2) -> { result1.getResults().addAll(result2.getResults()); return result1; }); Results remainingResults = optionalResults.orElseGet(Results::new); remainingResults.getResults().addAll(results.getResults()); return remainingResults.getResults().stream().peek(x -> x.put("tag", tag)).collect(Collectors.toList()); } private Results getResultsFromUrl(String urlString, DataUrlConfiguration jsonDataPath, String jsonPaginationPath, String contentType) { try { URL url = new URL(urlString.replace(" ", "%20")); HttpURLConnection con = (HttpURLConnection) url.openConnection(); con.setRequestMethod("GET"); con.setRequestProperty("Accept", contentType); int responseCode = con.getResponseCode(); if (responseCode == HttpURLConnection.HTTP_OK) { // success //do here all the parsing DocumentContext jsonContext = JsonPath.parse(con.getInputStream()); Results results; if (jsonDataPath.getFieldsUrlConfiguration().getSource() != null) { results = new Results(jsonContext.read(jsonDataPath.getPath() + "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription() + "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "," + jsonDataPath.getFieldsUrlConfiguration().getSource() + "]"), new HashMap<>(1, 1)); } else if (jsonDataPath.getFieldsUrlConfiguration().getCount() != null) { // parsing services.openaire.eu results = new Results(jsonContext.read(jsonDataPath.getPath() + "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"), new HashMap<>(1, 1)); } else { results = new Results(jsonContext.read(jsonDataPath.getPath() + "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription() + "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"), new HashMap<>(1, 1)); } results.results = results.results.stream().map(e -> e.entrySet().stream().collect(Collectors.toMap(x -> this.transformKey(jsonDataPath,x.getKey()), Map.Entry::getValue))) .collect(Collectors.toList()); return results; } } catch (MalformedURLException e1) { logger.error(e1.getMessage(), e1); } //maybe print smth... catch (IOException e2) { logger.error(e2.getMessage(), e2); } //maybe print smth... catch (Exception exception) { logger.error(exception.getMessage(), exception); } //maybe print smth... finally { } return null; } private List> getAllResultsFromMockUpJson(String path, String query) { List> internalResults; try { String filePath = Paths.get(path).toUri().toURL().toString(); ObjectMapper mapper = new ObjectMapper(); internalResults = mapper.readValue(new File(filePath), new TypeReference>>(){}); return searchListMap(internalResults, query); } catch (Exception e) { logger.error(e.getMessage(), e); return new LinkedList<>(); } } private List> searchListMap(List> internalResults, String query) { List> list = new LinkedList<>(); for (Map map : internalResults) { if (map.get("name") != null && map.get("name").toUpperCase().contains(query.toUpperCase())) { list.add(map); } if (map.get("label") != null && map.get("label").toUpperCase().contains(query.toUpperCase())) { list.add(map); } } return list; } private String transformKey(DataUrlConfiguration dataUrlConfiguration, String key) { if (dataUrlConfiguration.getFieldsUrlConfiguration().getId() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getId().replace("'",""))) return "pid"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getDescription() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getDescription().replace("'",""))) return "description"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getUri() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getUri().replace("'",""))) return "uri"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getName() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getName().replace("'",""))) return "name"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getSource() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getSource().replace("'",""))) return "source"; if (dataUrlConfiguration.getFieldsUrlConfiguration().getCount() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getCount().replace("'",""))) return "count"; return null; } static class Results { List> results; Map pagination; Results() { this.results = new ArrayList>(); this.pagination = new HashMap(); } Results(List> results, Map pagination) { this.results = results; this.pagination = pagination; } List> getResults() { return results; } public void setResults(List> results) { this.results = results; } Map getPagination() { return pagination; } public void setPagination(Map pagination) { this.pagination = pagination; } } }