340 lines
18 KiB
Java
340 lines
18 KiB
Java
package eu.eudat.logic.proxy.fetching;
|
|
|
|
import com.fasterxml.jackson.core.type.TypeReference;
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
import com.jayway.jsonpath.DocumentContext;
|
|
import com.jayway.jsonpath.JsonPath;
|
|
import eu.eudat.logic.proxy.config.*;
|
|
import eu.eudat.logic.proxy.config.configloaders.ConfigLoader;
|
|
import eu.eudat.logic.proxy.config.exceptions.HugeResultSet;
|
|
import eu.eudat.logic.proxy.config.exceptions.NoURLFound;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
import org.springframework.cache.annotation.Cacheable;
|
|
import org.springframework.stereotype.Service;
|
|
|
|
import java.io.File;
|
|
import java.io.IOException;
|
|
import java.io.UnsupportedEncodingException;
|
|
import java.net.HttpURLConnection;
|
|
import java.net.MalformedURLException;
|
|
import java.net.URL;
|
|
import java.net.URLEncoder;
|
|
import java.nio.file.Paths;
|
|
import java.util.*;
|
|
import java.util.stream.Collectors;
|
|
|
|
@Service
|
|
public class RemoteFetcher {
|
|
private static final Logger logger = LoggerFactory.getLogger(RemoteFetcher.class);
|
|
|
|
private ConfigLoader configLoader;
|
|
|
|
@Autowired
|
|
public RemoteFetcher(ConfigLoader configLoader) {
|
|
this.configLoader = configLoader;
|
|
}
|
|
|
|
@Cacheable("repositories")
|
|
public List<Map<String, String>> getRepositories(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet {
|
|
List<UrlConfiguration> urlConfigs =
|
|
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getRepositories().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
|
|
: configLoader.getExternalUrls().getRepositories().getUrls();
|
|
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getRepositories().getFetchMode();
|
|
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
|
|
}
|
|
|
|
@Cacheable("grants")
|
|
public List<Map<String, String>> getGrants(ExternalUrlCriteria externalUrlCriteria) throws NoURLFound, HugeResultSet {
|
|
List<UrlConfiguration> urlConfigs = configLoader.getExternalUrls().getGrants().getUrls();
|
|
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getGrants().getFetchMode();
|
|
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
|
|
}
|
|
|
|
@Cacheable("projects")
|
|
public List<Map<String, String>> getProjects(ExternalUrlCriteria externalUrlCriteria) throws NoURLFound, HugeResultSet {
|
|
List<UrlConfiguration> urlConfigs = configLoader.getExternalUrls().getProjects().getUrls();
|
|
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getProjects().getFetchMode();
|
|
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
|
|
}
|
|
|
|
@Cacheable("funders")
|
|
public List<Map<String, String>> getFunders(ExternalUrlCriteria externalUrlCriteria) throws NoURLFound, HugeResultSet {
|
|
List<UrlConfiguration> urlConfigs = configLoader.getExternalUrls().getFunders().getUrls();
|
|
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getFunders().getFetchMode();
|
|
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
|
|
}
|
|
|
|
@Cacheable("organisations")
|
|
public List<Map<String, String>> getOrganisations(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet {
|
|
List<UrlConfiguration> urlConfigs =
|
|
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getOrganisations().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
|
|
: configLoader.getExternalUrls().getOrganisations().getUrls();
|
|
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getOrganisations().getFetchMode();
|
|
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
|
|
}
|
|
|
|
@Cacheable("registries")
|
|
public List<Map<String, String>> getRegistries(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet {
|
|
List<UrlConfiguration> urlConfigs =
|
|
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getRegistries().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
|
|
: configLoader.getExternalUrls().getRegistries().getUrls();
|
|
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getRegistries().getFetchMode();
|
|
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
|
|
}
|
|
|
|
@Cacheable("services")
|
|
public List<Map<String, String>> getServices(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet {
|
|
List<UrlConfiguration> urlConfigs =
|
|
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getServices().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
|
|
: configLoader.getExternalUrls().getServices().getUrls();
|
|
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getServices().getFetchMode();
|
|
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
|
|
}
|
|
|
|
@Cacheable("researchers")
|
|
public List<Map<String, String>> getResearchers(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet {
|
|
List<UrlConfiguration> urlConfigs =
|
|
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getResearchers().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
|
|
: configLoader.getExternalUrls().getResearchers().getUrls();
|
|
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getResearchers().getFetchMode();
|
|
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
|
|
}
|
|
|
|
@Cacheable("tags")
|
|
public List<Map<String, String>> getTags(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet {
|
|
List<UrlConfiguration> urlConfigs =
|
|
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getTags().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
|
|
: configLoader.getExternalUrls().getTags().getUrls();
|
|
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getTags().getFetchMode();
|
|
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
|
|
}
|
|
|
|
@Cacheable("externalDatasets")
|
|
public List<Map<String, String>> getDatasets(ExternalUrlCriteria externalUrlCriteria, String key) throws NoURLFound, HugeResultSet {
|
|
List<UrlConfiguration> urlConfigs =
|
|
key != null && !key.isEmpty() ? configLoader.getExternalUrls().getDatasets().getUrls().stream().filter(item -> item.getKey().equals(key)).collect(Collectors.toList())
|
|
: configLoader.getExternalUrls().getDatasets().getUrls();
|
|
FetchStrategy fetchStrategy = configLoader.getExternalUrls().getDatasets().getFetchMode();
|
|
return getAll(urlConfigs, fetchStrategy, externalUrlCriteria);
|
|
}
|
|
|
|
|
|
private List<Map<String, String>> getAll(List<UrlConfiguration> urlConfigs, FetchStrategy fetchStrategy, ExternalUrlCriteria externalUrlCriteria) throws NoURLFound, HugeResultSet {
|
|
|
|
List<Map<String, String>> results = new LinkedList<>();
|
|
|
|
if (urlConfigs == null || urlConfigs.isEmpty()) return results;
|
|
// throw new NoURLFound("No Repository urls found in configuration");
|
|
|
|
urlConfigs.sort(Comparator.comparing(UrlConfiguration::getOrdinal));
|
|
for (UrlConfiguration urlConfig : urlConfigs) {
|
|
ifFunderQueryExist(urlConfig, externalUrlCriteria);
|
|
if (urlConfig.getType() == null || urlConfig.getType().equals("External")) {
|
|
results.addAll(getAllResultsFromUrl(urlConfig.getUrl(), fetchStrategy, urlConfig.getData(), urlConfig.getPaginationPath(), externalUrlCriteria, urlConfig.getLabel(), urlConfig.getKey(), urlConfig.getContentType(), urlConfig.getFirstpage()));
|
|
} else if (urlConfig.getType() != null && urlConfig.getType().equals("Internal")) {
|
|
results.addAll(getAllResultsFromMockUpJson(urlConfig.getUrl(), externalUrlCriteria.getLike()));
|
|
}
|
|
}
|
|
return results;
|
|
}
|
|
|
|
private void ifFunderQueryExist(UrlConfiguration urlConfiguration, ExternalUrlCriteria externalUrlCriteria) {
|
|
if (urlConfiguration.getFunderQuery() != null) {
|
|
if (externalUrlCriteria.getFunderId() != null && !urlConfiguration.getFunderQuery().startsWith("dmp:")) {
|
|
urlConfiguration.setUrl(urlConfiguration.getUrl().replace("{funderQuery}", urlConfiguration.getFunderQuery()));
|
|
}
|
|
else {
|
|
urlConfiguration.setUrl(urlConfiguration.getUrl().replace("{funderQuery}", ""));
|
|
}
|
|
}
|
|
}
|
|
|
|
private String replaceCriteriaOnUrl(String path, ExternalUrlCriteria externalUrlCriteria, String firstPage) {
|
|
String completedPath = path;
|
|
if (externalUrlCriteria.getLike() != null) {
|
|
if (path.contains("openaire") && externalUrlCriteria.getLike().equals(""))
|
|
completedPath = completedPath.replaceAll("\\{like}", "*");
|
|
else
|
|
completedPath = completedPath.replaceAll("\\{like}", externalUrlCriteria.getLike());
|
|
} else {
|
|
completedPath = completedPath.replace("{like}", "");
|
|
}
|
|
if (externalUrlCriteria.getFunderId() != null) {
|
|
String funderId = externalUrlCriteria.getFunderId();
|
|
try {
|
|
funderId = URLEncoder.encode(externalUrlCriteria.getFunderId(), "UTF-8");
|
|
} catch (UnsupportedEncodingException e) {
|
|
logger.error(e.getMessage(), e);
|
|
}
|
|
completedPath = completedPath.replace("{funderId}", funderId);
|
|
}
|
|
if (externalUrlCriteria.getPage() != null) {
|
|
completedPath = completedPath.replace("{page}", externalUrlCriteria.getPage());
|
|
} else {
|
|
if (firstPage != null) {
|
|
completedPath = completedPath.replace("{page}", firstPage);
|
|
} else {
|
|
completedPath = completedPath.replace("{page}", "1");
|
|
}
|
|
}
|
|
if (externalUrlCriteria.getPageSize() != null) {
|
|
completedPath = completedPath.replace("{pageSize}", externalUrlCriteria.getPageSize());
|
|
} else {
|
|
completedPath = completedPath.replace("{pageSize}", "60");
|
|
}
|
|
return completedPath;
|
|
}
|
|
|
|
private List<Map<String, String>> getAllResultsFromUrl(String path, FetchStrategy fetchStrategy, final DataUrlConfiguration jsonDataPath, final String jsonPaginationPath, ExternalUrlCriteria externalUrlCriteria, String tag, String key, String contentType, String firstPage) throws HugeResultSet {
|
|
Set<Integer> pages = new HashSet<>();
|
|
|
|
String replacedPath = replaceCriteriaOnUrl(path, externalUrlCriteria, firstPage);
|
|
|
|
Results results = getResultsFromUrl(replacedPath, jsonDataPath, jsonPaginationPath, contentType);
|
|
if (fetchStrategy == FetchStrategy.FIRST)
|
|
return results == null ? new LinkedList<>() : results.getResults().stream().peek(x -> x.put("tag", tag)).peek(x -> x.put("key", key)).collect(Collectors.toList());
|
|
|
|
if (results != null && results.getPagination() != null && results.getPagination().get("pages") != null) //if has more pages, add them to the pages set
|
|
for (int i = 2; i <= results.getPagination().get("pages"); i++)
|
|
pages.add(i);
|
|
|
|
Long maxResults = configLoader.getExternalUrls().getMaxresults();
|
|
if ((maxResults > 0 && results != null) && (results.getPagination().get("count") > maxResults))
|
|
throw new HugeResultSet("The submitted search query " + externalUrlCriteria.getLike() + " is about to return " + results.getPagination().get("count") + " results... Please submit a more detailed search query");
|
|
|
|
Optional<Results> optionalResults = pages.parallelStream()
|
|
.map(page -> getResultsFromUrl(path + "&page=" + page, jsonDataPath, jsonPaginationPath, contentType))
|
|
.reduce((result1, result2) -> {
|
|
result1.getResults().addAll(result2.getResults());
|
|
return result1;
|
|
});
|
|
Results remainingResults = optionalResults.orElseGet(Results::new);
|
|
remainingResults.getResults().addAll(results.getResults());
|
|
|
|
return remainingResults.getResults().stream().peek(x -> x.put("tag", tag)).collect(Collectors.toList());
|
|
}
|
|
|
|
|
|
private Results getResultsFromUrl(String urlString, DataUrlConfiguration jsonDataPath, String jsonPaginationPath, String contentType) {
|
|
|
|
try {
|
|
|
|
URL url = new URL(urlString.replace(" ", "%20"));
|
|
|
|
HttpURLConnection con = (HttpURLConnection) url.openConnection();
|
|
con.setRequestMethod("GET");
|
|
con.setRequestProperty("Accept", contentType);
|
|
|
|
int responseCode = con.getResponseCode();
|
|
if (responseCode == HttpURLConnection.HTTP_OK) { // success
|
|
//do here all the parsing
|
|
DocumentContext jsonContext = JsonPath.parse(con.getInputStream());
|
|
Results results;
|
|
if (jsonDataPath.getFieldsUrlConfiguration().getSource() != null) {
|
|
results = new Results(jsonContext.read(jsonDataPath.getPath()
|
|
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription()
|
|
+ "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId()
|
|
+ "," + jsonDataPath.getFieldsUrlConfiguration().getSource() + "]"),
|
|
new HashMap<>(1, 1));
|
|
} else if (jsonDataPath.getFieldsUrlConfiguration().getCount() != null) { // parsing services.openaire.eu
|
|
results = new Results(jsonContext.read(jsonDataPath.getPath()
|
|
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName()
|
|
+ "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"),
|
|
new HashMap<>(1, 1));
|
|
} else {
|
|
results = new Results(jsonContext.read(jsonDataPath.getPath()
|
|
+ "[" + jsonDataPath.getFieldsUrlConfiguration().getName() + "," + jsonDataPath.getFieldsUrlConfiguration().getDescription()
|
|
+ "," + jsonDataPath.getFieldsUrlConfiguration().getUri() + "," + jsonDataPath.getFieldsUrlConfiguration().getId() + "]"),
|
|
new HashMap<>(1, 1));
|
|
}
|
|
results.results = results.results.stream().map(e -> e.entrySet().stream().collect(Collectors.toMap(x -> this.transformKey(jsonDataPath,x.getKey()), Map.Entry::getValue)))
|
|
.collect(Collectors.toList());
|
|
return results;
|
|
}
|
|
} catch (MalformedURLException e1) {
|
|
logger.error(e1.getMessage(), e1);
|
|
} //maybe print smth...
|
|
catch (IOException e2) {
|
|
logger.error(e2.getMessage(), e2);
|
|
} //maybe print smth...
|
|
catch (Exception exception) {
|
|
logger.error(exception.getMessage(), exception);
|
|
} //maybe print smth...
|
|
finally {
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private List<Map<String, String>> getAllResultsFromMockUpJson(String path, String query) {
|
|
List<Map<String, String>> internalResults;
|
|
try {
|
|
String filePath = Paths.get(path).toUri().toURL().toString();
|
|
ObjectMapper mapper = new ObjectMapper();
|
|
internalResults = mapper.readValue(new File(filePath), new TypeReference<List<Map<String, Object>>>(){});
|
|
return searchListMap(internalResults, query);
|
|
} catch (Exception e) {
|
|
logger.error(e.getMessage(), e);
|
|
return new LinkedList<>();
|
|
}
|
|
}
|
|
|
|
private List<Map<String, String>> searchListMap(List<Map<String, String>> internalResults, String query) {
|
|
List<Map<String, String>> list = new LinkedList<>();
|
|
for (Map<String, String> map : internalResults)
|
|
{
|
|
if (map.get("name") != null && map.get("name").toUpperCase().contains(query.toUpperCase())) {
|
|
list.add(map);
|
|
}
|
|
if (map.get("label") != null && map.get("label").toUpperCase().contains(query.toUpperCase())) {
|
|
list.add(map);
|
|
}
|
|
}
|
|
return list;
|
|
}
|
|
|
|
private String transformKey(DataUrlConfiguration dataUrlConfiguration, String key) {
|
|
if (dataUrlConfiguration.getFieldsUrlConfiguration().getId() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getId().replace("'",""))) return "pid";
|
|
if (dataUrlConfiguration.getFieldsUrlConfiguration().getDescription() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getDescription().replace("'",""))) return "description";
|
|
if (dataUrlConfiguration.getFieldsUrlConfiguration().getUri() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getUri().replace("'",""))) return "uri";
|
|
if (dataUrlConfiguration.getFieldsUrlConfiguration().getName() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getName().replace("'",""))) return "name";
|
|
if (dataUrlConfiguration.getFieldsUrlConfiguration().getSource() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getSource().replace("'",""))) return "source";
|
|
if (dataUrlConfiguration.getFieldsUrlConfiguration().getCount() != null && key.equals(dataUrlConfiguration.getFieldsUrlConfiguration().getCount().replace("'",""))) return "count";
|
|
return null;
|
|
}
|
|
|
|
|
|
static class Results {
|
|
List<Map<String, String>> results;
|
|
Map<String, Integer> pagination;
|
|
|
|
Results() {
|
|
this.results = new ArrayList<Map<String, String>>();
|
|
this.pagination = new HashMap<String, Integer>();
|
|
}
|
|
|
|
Results(List<Map<String, String>> results, Map<String, Integer> pagination) {
|
|
this.results = results;
|
|
this.pagination = pagination;
|
|
}
|
|
|
|
List<Map<String, String>> getResults() {
|
|
return results;
|
|
}
|
|
|
|
public void setResults(List<Map<String, String>> results) {
|
|
this.results = results;
|
|
}
|
|
|
|
Map<String, Integer> getPagination() {
|
|
return pagination;
|
|
}
|
|
|
|
public void setPagination(Map<String, Integer> pagination) {
|
|
this.pagination = pagination;
|
|
}
|
|
}
|
|
}
|