2018-06-27 12:29:21 +02:00
package eu.eudat.logic.proxy.fetching ;
2017-11-21 17:29:16 +01:00
2019-09-05 16:44:25 +02:00
import com.fasterxml.jackson.core.type.TypeReference ;
import com.fasterxml.jackson.databind.ObjectMapper ;
2017-11-21 17:29:16 +01:00
import com.jayway.jsonpath.DocumentContext ;
import com.jayway.jsonpath.JsonPath ;
2020-02-10 17:24:15 +01:00
import eu.eudat.logic.proxy.config.DataUrlConfiguration ;
import eu.eudat.logic.proxy.config.ExternalUrlCriteria ;
import eu.eudat.logic.proxy.config.FetchStrategy ;
import eu.eudat.logic.proxy.config.UrlConfiguration ;
2018-06-27 12:29:21 +02:00
import eu.eudat.logic.proxy.config.configloaders.ConfigLoader ;
import eu.eudat.logic.proxy.config.exceptions.HugeResultSet ;
import eu.eudat.logic.proxy.config.exceptions.NoURLFound ;
2020-03-13 11:04:20 +01:00
import net.minidev.json.JSONArray ;
2020-01-16 16:46:24 +01:00
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
2018-02-16 11:34:02 +01:00
import org.springframework.beans.factory.annotation.Autowired ;
import org.springframework.cache.annotation.Cacheable ;
2020-10-05 10:26:35 +02:00
import org.springframework.http.HttpEntity ;
import org.springframework.http.HttpHeaders ;
import org.springframework.http.HttpMethod ;
import org.springframework.http.HttpStatus ;
2018-02-16 11:34:02 +01:00
import org.springframework.stereotype.Service ;
2020-10-05 10:26:35 +02:00
import org.springframework.web.client.RestTemplate ;
import org.springframework.http.MediaType ;
import org.springframework.http.ResponseEntity ;
2018-02-16 11:34:02 +01:00
2020-02-10 17:24:15 +01:00
import javax.xml.bind.JAXBContext ;
import javax.xml.bind.Unmarshaller ;
import java.beans.PropertyDescriptor ;
import java.io.* ;
import java.lang.reflect.Method ;
2018-02-16 11:34:02 +01:00
import java.net.HttpURLConnection ;
import java.net.MalformedURLException ;
import java.net.URL ;
2019-10-03 13:06:44 +02:00
import java.net.URLEncoder ;
2019-11-08 14:53:46 +01:00
import java.nio.file.Paths ;
2018-02-16 11:34:02 +01:00
import java.util.* ;
2018-05-28 11:50:42 +02:00
import java.util.stream.Collectors ;
2017-11-21 17:29:16 +01:00
2017-11-22 09:57:51 +01:00
@Service
2017-11-21 17:29:16 +01:00
public class RemoteFetcher {
2020-01-16 16:46:24 +01:00
private static final Logger logger = LoggerFactory . getLogger ( RemoteFetcher . class ) ;
2018-01-17 16:06:35 +01:00
private ConfigLoader configLoader ;
2018-03-28 15:24:47 +02:00
@Autowired
2019-09-05 16:44:25 +02:00
public RemoteFetcher ( ConfigLoader configLoader ) {
2018-03-28 15:24:47 +02:00
this . configLoader = configLoader ;
}
2018-01-17 16:06:35 +01:00
@Cacheable ( " repositories " )
2019-10-03 13:06:44 +02:00
public List < Map < String , String > > getRepositories ( ExternalUrlCriteria externalUrlCriteria , String key ) throws NoURLFound , HugeResultSet {
2018-06-29 10:29:43 +02:00
List < UrlConfiguration > urlConfigs =
key ! = null & & ! key . isEmpty ( ) ? configLoader . getExternalUrls ( ) . getRepositories ( ) . getUrls ( ) . stream ( ) . filter ( item - > item . getKey ( ) . equals ( key ) ) . collect ( Collectors . toList ( ) )
2018-05-28 11:50:42 +02:00
: configLoader . getExternalUrls ( ) . getRepositories ( ) . getUrls ( ) ;
2018-01-17 16:06:35 +01:00
FetchStrategy fetchStrategy = configLoader . getExternalUrls ( ) . getRepositories ( ) . getFetchMode ( ) ;
2019-10-03 13:06:44 +02:00
return getAll ( urlConfigs , fetchStrategy , externalUrlCriteria ) ;
2018-01-17 16:06:35 +01:00
}
2019-07-31 16:57:34 +02:00
@Cacheable ( " grants " )
2019-10-03 13:06:44 +02:00
public List < Map < String , String > > getGrants ( ExternalUrlCriteria externalUrlCriteria ) throws NoURLFound , HugeResultSet {
2019-09-05 16:44:25 +02:00
List < UrlConfiguration > urlConfigs = configLoader . getExternalUrls ( ) . getGrants ( ) . getUrls ( ) ;
2019-07-31 16:57:34 +02:00
FetchStrategy fetchStrategy = configLoader . getExternalUrls ( ) . getGrants ( ) . getFetchMode ( ) ;
2019-10-03 13:06:44 +02:00
return getAll ( urlConfigs , fetchStrategy , externalUrlCriteria ) ;
2018-01-17 16:06:35 +01:00
}
2019-08-02 10:27:12 +02:00
@Cacheable ( " projects " )
2019-10-03 13:06:44 +02:00
public List < Map < String , String > > getProjects ( ExternalUrlCriteria externalUrlCriteria ) throws NoURLFound , HugeResultSet {
2019-09-05 16:44:25 +02:00
List < UrlConfiguration > urlConfigs = configLoader . getExternalUrls ( ) . getProjects ( ) . getUrls ( ) ;
2019-08-02 10:27:12 +02:00
FetchStrategy fetchStrategy = configLoader . getExternalUrls ( ) . getProjects ( ) . getFetchMode ( ) ;
2019-10-03 13:06:44 +02:00
return getAll ( urlConfigs , fetchStrategy , externalUrlCriteria ) ;
2019-08-02 10:27:12 +02:00
}
2019-08-20 17:22:53 +02:00
@Cacheable ( " funders " )
2019-10-03 13:06:44 +02:00
public List < Map < String , String > > getFunders ( ExternalUrlCriteria externalUrlCriteria ) throws NoURLFound , HugeResultSet {
2019-09-05 16:44:25 +02:00
List < UrlConfiguration > urlConfigs = configLoader . getExternalUrls ( ) . getFunders ( ) . getUrls ( ) ;
2019-10-01 12:19:39 +02:00
FetchStrategy fetchStrategy = configLoader . getExternalUrls ( ) . getFunders ( ) . getFetchMode ( ) ;
2019-10-03 13:06:44 +02:00
return getAll ( urlConfigs , fetchStrategy , externalUrlCriteria ) ;
2019-08-20 17:22:53 +02:00
}
2018-01-17 16:06:35 +01:00
@Cacheable ( " organisations " )
2019-10-03 13:06:44 +02:00
public List < Map < String , String > > getOrganisations ( ExternalUrlCriteria externalUrlCriteria , String key ) throws NoURLFound , HugeResultSet {
2018-06-29 10:29:43 +02:00
List < UrlConfiguration > urlConfigs =
key ! = null & & ! key . isEmpty ( ) ? configLoader . getExternalUrls ( ) . getOrganisations ( ) . getUrls ( ) . stream ( ) . filter ( item - > item . getKey ( ) . equals ( key ) ) . collect ( Collectors . toList ( ) )
2018-05-28 11:50:42 +02:00
: configLoader . getExternalUrls ( ) . getOrganisations ( ) . getUrls ( ) ;
2018-01-17 16:06:35 +01:00
FetchStrategy fetchStrategy = configLoader . getExternalUrls ( ) . getOrganisations ( ) . getFetchMode ( ) ;
2019-10-03 13:06:44 +02:00
return getAll ( urlConfigs , fetchStrategy , externalUrlCriteria ) ;
2018-01-17 16:06:35 +01:00
}
@Cacheable ( " registries " )
2019-10-03 13:06:44 +02:00
public List < Map < String , String > > getRegistries ( ExternalUrlCriteria externalUrlCriteria , String key ) throws NoURLFound , HugeResultSet {
2018-06-29 10:29:43 +02:00
List < UrlConfiguration > urlConfigs =
key ! = null & & ! key . isEmpty ( ) ? configLoader . getExternalUrls ( ) . getRegistries ( ) . getUrls ( ) . stream ( ) . filter ( item - > item . getKey ( ) . equals ( key ) ) . collect ( Collectors . toList ( ) )
2018-05-28 11:50:42 +02:00
: configLoader . getExternalUrls ( ) . getRegistries ( ) . getUrls ( ) ;
2018-01-17 16:06:35 +01:00
FetchStrategy fetchStrategy = configLoader . getExternalUrls ( ) . getRegistries ( ) . getFetchMode ( ) ;
2019-10-03 13:06:44 +02:00
return getAll ( urlConfigs , fetchStrategy , externalUrlCriteria ) ;
2018-01-17 16:06:35 +01:00
}
@Cacheable ( " services " )
2019-10-03 13:06:44 +02:00
public List < Map < String , String > > getServices ( ExternalUrlCriteria externalUrlCriteria , String key ) throws NoURLFound , HugeResultSet {
2018-06-29 10:29:43 +02:00
List < UrlConfiguration > urlConfigs =
key ! = null & & ! key . isEmpty ( ) ? configLoader . getExternalUrls ( ) . getServices ( ) . getUrls ( ) . stream ( ) . filter ( item - > item . getKey ( ) . equals ( key ) ) . collect ( Collectors . toList ( ) )
2018-05-28 11:50:42 +02:00
: configLoader . getExternalUrls ( ) . getServices ( ) . getUrls ( ) ;
2018-01-17 16:06:35 +01:00
FetchStrategy fetchStrategy = configLoader . getExternalUrls ( ) . getServices ( ) . getFetchMode ( ) ;
2019-10-03 13:06:44 +02:00
return getAll ( urlConfigs , fetchStrategy , externalUrlCriteria ) ;
2018-01-17 16:06:35 +01:00
}
@Cacheable ( " researchers " )
2019-10-03 13:06:44 +02:00
public List < Map < String , String > > getResearchers ( ExternalUrlCriteria externalUrlCriteria , String key ) throws NoURLFound , HugeResultSet {
2018-06-29 10:29:43 +02:00
List < UrlConfiguration > urlConfigs =
key ! = null & & ! key . isEmpty ( ) ? configLoader . getExternalUrls ( ) . getResearchers ( ) . getUrls ( ) . stream ( ) . filter ( item - > item . getKey ( ) . equals ( key ) ) . collect ( Collectors . toList ( ) )
2018-05-28 11:50:42 +02:00
: configLoader . getExternalUrls ( ) . getResearchers ( ) . getUrls ( ) ;
2018-01-17 16:06:35 +01:00
FetchStrategy fetchStrategy = configLoader . getExternalUrls ( ) . getResearchers ( ) . getFetchMode ( ) ;
2019-10-03 13:06:44 +02:00
return getAll ( urlConfigs , fetchStrategy , externalUrlCriteria ) ;
2018-01-17 16:06:35 +01:00
}
2020-04-01 17:17:17 +02:00
/ * @Cacheable ( " tags " )
2019-10-03 13:06:44 +02:00
public List < Map < String , String > > getTags ( ExternalUrlCriteria externalUrlCriteria , String key ) throws NoURLFound , HugeResultSet {
2018-07-11 15:47:36 +02:00
List < UrlConfiguration > urlConfigs =
key ! = null & & ! key . isEmpty ( ) ? configLoader . getExternalUrls ( ) . getTags ( ) . getUrls ( ) . stream ( ) . filter ( item - > item . getKey ( ) . equals ( key ) ) . collect ( Collectors . toList ( ) )
: configLoader . getExternalUrls ( ) . getTags ( ) . getUrls ( ) ;
FetchStrategy fetchStrategy = configLoader . getExternalUrls ( ) . getTags ( ) . getFetchMode ( ) ;
2019-10-03 13:06:44 +02:00
return getAll ( urlConfigs , fetchStrategy , externalUrlCriteria ) ;
2020-04-01 17:17:17 +02:00
} * /
2018-07-11 15:47:36 +02:00
2019-11-19 10:01:02 +01:00
@Cacheable ( " externalDatasets " )
2019-10-03 13:06:44 +02:00
public List < Map < String , String > > getDatasets ( ExternalUrlCriteria externalUrlCriteria , String key ) throws NoURLFound , HugeResultSet {
2018-06-29 10:29:43 +02:00
List < UrlConfiguration > urlConfigs =
key ! = null & & ! key . isEmpty ( ) ? configLoader . getExternalUrls ( ) . getDatasets ( ) . getUrls ( ) . stream ( ) . filter ( item - > item . getKey ( ) . equals ( key ) ) . collect ( Collectors . toList ( ) )
2018-05-28 11:50:42 +02:00
: configLoader . getExternalUrls ( ) . getDatasets ( ) . getUrls ( ) ;
2018-01-17 16:06:35 +01:00
FetchStrategy fetchStrategy = configLoader . getExternalUrls ( ) . getDatasets ( ) . getFetchMode ( ) ;
2019-10-03 13:06:44 +02:00
return getAll ( urlConfigs , fetchStrategy , externalUrlCriteria ) ;
2018-01-17 16:06:35 +01:00
}
2020-06-26 10:46:18 +02:00
@Cacheable ( " licenses " )
public List < Map < String , String > > getlicenses ( ExternalUrlCriteria externalUrlCriteria , String key ) throws NoURLFound , HugeResultSet {
List < UrlConfiguration > urlConfigs =
key ! = null & & ! key . isEmpty ( ) ? configLoader . getExternalUrls ( ) . getLicenses ( ) . getUrls ( ) . stream ( ) . filter ( item - > item . getKey ( ) . equals ( key ) ) . collect ( Collectors . toList ( ) )
: configLoader . getExternalUrls ( ) . getLicenses ( ) . getUrls ( ) ;
FetchStrategy fetchStrategy = configLoader . getExternalUrls ( ) . getLicenses ( ) . getFetchMode ( ) ;
return getAll ( urlConfigs , fetchStrategy , externalUrlCriteria ) ;
}
2018-01-17 16:06:35 +01:00
2019-10-03 13:06:44 +02:00
private List < Map < String , String > > getAll ( List < UrlConfiguration > urlConfigs , FetchStrategy fetchStrategy , ExternalUrlCriteria externalUrlCriteria ) throws NoURLFound , HugeResultSet {
2018-01-17 16:06:35 +01:00
2019-12-12 10:26:19 +01:00
List < Map < String , String > > results = new LinkedList < > ( ) ;
if ( urlConfigs = = null | | urlConfigs . isEmpty ( ) ) return results ;
// throw new NoURLFound("No Repository urls found in configuration");
2018-01-17 16:06:35 +01:00
2019-10-03 13:06:44 +02:00
urlConfigs . sort ( Comparator . comparing ( UrlConfiguration : : getOrdinal ) ) ;
2018-06-29 10:29:43 +02:00
for ( UrlConfiguration urlConfig : urlConfigs ) {
2019-10-03 13:06:44 +02:00
ifFunderQueryExist ( urlConfig , externalUrlCriteria ) ;
2019-09-05 16:44:25 +02:00
if ( urlConfig . getType ( ) = = null | | urlConfig . getType ( ) . equals ( " External " ) ) {
2020-01-14 12:00:02 +01:00
results . addAll ( getAllResultsFromUrl ( urlConfig . getUrl ( ) , fetchStrategy , urlConfig . getData ( ) , urlConfig . getPaginationPath ( ) , externalUrlCriteria , urlConfig . getLabel ( ) , urlConfig . getKey ( ) , urlConfig . getContentType ( ) , urlConfig . getFirstpage ( ) ) ) ;
2019-10-01 12:19:39 +02:00
} else if ( urlConfig . getType ( ) ! = null & & urlConfig . getType ( ) . equals ( " Internal " ) ) {
2019-10-03 13:06:44 +02:00
results . addAll ( getAllResultsFromMockUpJson ( urlConfig . getUrl ( ) , externalUrlCriteria . getLike ( ) ) ) ;
2019-09-05 16:44:25 +02:00
}
2018-06-29 10:29:43 +02:00
}
return results ;
2018-01-17 16:06:35 +01:00
}
2019-10-03 13:06:44 +02:00
private void ifFunderQueryExist ( UrlConfiguration urlConfiguration , ExternalUrlCriteria externalUrlCriteria ) {
if ( urlConfiguration . getFunderQuery ( ) ! = null ) {
2019-12-10 10:58:15 +01:00
if ( externalUrlCriteria . getFunderId ( ) ! = null & & ! urlConfiguration . getFunderQuery ( ) . startsWith ( " dmp: " ) ) {
2019-10-03 13:06:44 +02:00
urlConfiguration . setUrl ( urlConfiguration . getUrl ( ) . replace ( " {funderQuery} " , urlConfiguration . getFunderQuery ( ) ) ) ;
}
else {
urlConfiguration . setUrl ( urlConfiguration . getUrl ( ) . replace ( " {funderQuery} " , " " ) ) ;
}
2019-10-01 12:19:39 +02:00
}
2019-10-03 13:06:44 +02:00
}
2018-01-17 16:06:35 +01:00
2019-10-04 13:33:38 +02:00
private String replaceCriteriaOnUrl ( String path , ExternalUrlCriteria externalUrlCriteria , String firstPage ) {
2019-10-03 13:06:44 +02:00
String completedPath = path ;
if ( externalUrlCriteria . getLike ( ) ! = null ) {
2020-03-13 11:04:20 +01:00
if ( ( path . contains ( " openaire " ) | | path . contains ( " orcid " ) | | path . contains ( " ror " ) ) & & externalUrlCriteria . getLike ( ) . equals ( " " ) ) {
2019-10-03 13:06:44 +02:00
completedPath = completedPath . replaceAll ( " \\ {like} " , " * " ) ;
2020-02-10 17:27:38 +01:00
} else {
2019-10-03 13:06:44 +02:00
completedPath = completedPath . replaceAll ( " \\ {like} " , externalUrlCriteria . getLike ( ) ) ;
2020-02-10 17:27:38 +01:00
}
2019-10-01 12:19:39 +02:00
} else {
2019-10-03 13:06:44 +02:00
completedPath = completedPath . replace ( " {like} " , " " ) ;
}
if ( externalUrlCriteria . getFunderId ( ) ! = null ) {
2020-05-06 17:02:19 +02:00
String funderPrefix = externalUrlCriteria . getFunderId ( ) . split ( " : " ) [ 0 ] ;
String funderId = externalUrlCriteria . getFunderId ( ) . replace ( funderPrefix + " : " , " " ) ;
if ( funderId . toCharArray ( ) [ 0 ] = = ':' ) {
funderId = externalUrlCriteria . getFunderId ( ) ;
}
2019-10-03 13:06:44 +02:00
try {
2020-05-06 17:02:19 +02:00
funderId = URLEncoder . encode ( funderId , " UTF-8 " ) ;
2019-10-03 13:06:44 +02:00
} catch ( UnsupportedEncodingException e ) {
2020-01-16 16:46:24 +01:00
logger . error ( e . getMessage ( ) , e ) ;
2019-10-03 13:06:44 +02:00
}
completedPath = completedPath . replace ( " {funderId} " , funderId ) ;
2019-10-01 12:19:39 +02:00
}
2019-10-03 13:06:44 +02:00
if ( externalUrlCriteria . getPage ( ) ! = null ) {
completedPath = completedPath . replace ( " {page} " , externalUrlCriteria . getPage ( ) ) ;
} else {
2019-10-04 13:33:38 +02:00
if ( firstPage ! = null ) {
completedPath = completedPath . replace ( " {page} " , firstPage ) ;
} else {
completedPath = completedPath . replace ( " {page} " , " 1 " ) ;
}
2019-10-03 13:06:44 +02:00
}
if ( externalUrlCriteria . getPageSize ( ) ! = null ) {
completedPath = completedPath . replace ( " {pageSize} " , externalUrlCriteria . getPageSize ( ) ) ;
} else {
2019-12-13 12:08:32 +01:00
completedPath = completedPath . replace ( " {pageSize} " , " 60 " ) ;
2019-10-03 13:06:44 +02:00
}
2020-02-10 17:24:15 +01:00
if ( externalUrlCriteria . getHost ( ) ! = null ) {
completedPath = completedPath . replace ( " {host} " , externalUrlCriteria . getHost ( ) ) ;
} else {
completedPath = completedPath . replace ( " {host} " , " " ) ;
}
if ( externalUrlCriteria . getPath ( ) ! = null ) {
completedPath = completedPath . replace ( " {path} " , externalUrlCriteria . getPath ( ) ) ;
} else {
completedPath = completedPath . replace ( " {path} " , " " ) ;
}
2019-10-03 13:06:44 +02:00
return completedPath ;
}
2018-01-17 16:06:35 +01:00
2020-01-14 12:00:02 +01:00
private List < Map < String , String > > getAllResultsFromUrl ( String path , FetchStrategy fetchStrategy , final DataUrlConfiguration jsonDataPath , final String jsonPaginationPath , ExternalUrlCriteria externalUrlCriteria , String tag , String key , String contentType , String firstPage ) throws HugeResultSet {
2019-10-03 13:06:44 +02:00
Set < Integer > pages = new HashSet < > ( ) ;
2019-10-04 13:33:38 +02:00
String replacedPath = replaceCriteriaOnUrl ( path , externalUrlCriteria , firstPage ) ;
2019-10-03 13:06:44 +02:00
Results results = getResultsFromUrl ( replacedPath , jsonDataPath , jsonPaginationPath , contentType ) ;
2018-01-17 16:06:35 +01:00
if ( fetchStrategy = = FetchStrategy . FIRST )
2020-01-14 12:00:02 +01:00
return results = = null ? new LinkedList < > ( ) : results . getResults ( ) . stream ( ) . peek ( x - > x . put ( " tag " , tag ) ) . peek ( x - > x . put ( " key " , key ) ) . collect ( Collectors . toList ( ) ) ;
2018-01-17 16:06:35 +01:00
2019-10-01 12:19:39 +02:00
if ( results ! = null & & results . getPagination ( ) ! = null & & results . getPagination ( ) . get ( " pages " ) ! = null ) //if has more pages, add them to the pages set
2018-01-17 16:06:35 +01:00
for ( int i = 2 ; i < = results . getPagination ( ) . get ( " pages " ) ; i + + )
pages . add ( i ) ;
Long maxResults = configLoader . getExternalUrls ( ) . getMaxresults ( ) ;
2019-10-01 12:19:39 +02:00
if ( ( maxResults > 0 & & results ! = null ) & & ( results . getPagination ( ) . get ( " count " ) > maxResults ) )
2019-10-03 13:06:44 +02:00
throw new HugeResultSet ( " The submitted search query " + externalUrlCriteria . getLike ( ) + " is about to return " + results . getPagination ( ) . get ( " count " ) + " results... Please submit a more detailed search query " ) ;
2018-01-17 16:06:35 +01:00
Optional < Results > optionalResults = pages . parallelStream ( )
2019-10-03 13:06:44 +02:00
. map ( page - > getResultsFromUrl ( path + " &page= " + page , jsonDataPath , jsonPaginationPath , contentType ) )
2018-01-17 16:06:35 +01:00
. reduce ( ( result1 , result2 ) - > {
result1 . getResults ( ) . addAll ( result2 . getResults ( ) ) ;
return result1 ;
} ) ;
2019-10-01 12:19:39 +02:00
Results remainingResults = optionalResults . orElseGet ( Results : : new ) ;
2018-01-17 16:06:35 +01:00
remainingResults . getResults ( ) . addAll ( results . getResults ( ) ) ;
2020-01-14 12:00:02 +01:00
return remainingResults . getResults ( ) . stream ( ) . peek ( x - > x . put ( " tag " , tag ) ) . collect ( Collectors . toList ( ) ) ;
2018-01-17 16:06:35 +01:00
}
2019-10-01 12:19:39 +02:00
private Results getResultsFromUrl ( String urlString , DataUrlConfiguration jsonDataPath , String jsonPaginationPath , String contentType ) {
2018-01-17 16:06:35 +01:00
try {
2020-10-05 10:26:35 +02:00
RestTemplate restTemplate = new RestTemplate ( ) ;
HttpHeaders headers = new HttpHeaders ( ) ;
HttpEntity < String > entity ;
ResponseEntity response ;
/ *
URL url = new URL ( urlString . replaceAll ( " " , " %20 " ) ) ;
HttpURLConnection con = ( HttpURLConnection ) url . openConnection ( ) ;
con . setRequestMethod ( " GET " ) ;
* /
2020-02-10 17:24:15 +01:00
if ( contentType ! = null & & ! contentType . isEmpty ( ) ) {
2020-10-05 10:26:35 +02:00
headers . setAccept ( Collections . singletonList ( MediaType . valueOf ( contentType ) ) ) ;
2020-02-10 17:24:15 +01:00
}
2020-10-05 10:26:35 +02:00
entity = new HttpEntity < > ( " parameters " , headers ) ;
2018-01-17 16:06:35 +01:00
2020-10-05 10:26:35 +02:00
if ( contentType . contains ( " json " ) ) {
response = restTemplate . exchange ( urlString , HttpMethod . GET , entity , Object . class ) ;
} else {
response = restTemplate . exchange ( urlString , HttpMethod . GET , entity , String . class ) ;
}
if ( response . getStatusCode ( ) = = HttpStatus . OK ) { // success
2018-01-17 16:06:35 +01:00
//do here all the parsing
2020-02-10 17:24:15 +01:00
Results results = new Results ( ) ;
2020-10-05 10:26:35 +02:00
if ( response . getHeaders ( ) . get ( " Content-Type " ) . get ( 0 ) . contains ( " json " ) ) {
DocumentContext jsonContext = JsonPath . parse ( response . getBody ( ) ) ;
2020-02-10 17:24:15 +01:00
if ( jsonDataPath . getFieldsUrlConfiguration ( ) . getSource ( ) ! = null ) {
results = new Results ( jsonContext . read ( jsonDataPath . getPath ( )
+ " [ " + jsonDataPath . getFieldsUrlConfiguration ( ) . getName ( ) + " , " + jsonDataPath . getFieldsUrlConfiguration ( ) . getDescription ( )
+ " , " + jsonDataPath . getFieldsUrlConfiguration ( ) . getUri ( ) + " , " + jsonDataPath . getFieldsUrlConfiguration ( ) . getId ( )
+ " , " + jsonDataPath . getFieldsUrlConfiguration ( ) . getSource ( ) + " ] " ) ,
new HashMap < > ( 1 , 1 ) ) ;
} else if ( jsonDataPath . getFieldsUrlConfiguration ( ) . getCount ( ) ! = null ) { // parsing services.openaire.eu
results = new Results ( jsonContext . read ( jsonDataPath . getPath ( )
+ " [ " + jsonDataPath . getFieldsUrlConfiguration ( ) . getName ( )
+ " , " + jsonDataPath . getFieldsUrlConfiguration ( ) . getId ( ) + " ] " ) ,
new HashMap < > ( 1 , 1 ) ) ;
2020-05-29 16:10:18 +02:00
List < Map < String , String > > fixedResults = results . getResults ( ) . stream ( ) . map ( item - > {
2020-09-09 16:50:47 +02:00
for ( int i = 0 ; i < 2 ; i + + ) {
String id ;
if ( i = = 0 ) {
id = jsonDataPath . getFieldsUrlConfiguration ( ) . getId ( ) . replace ( " ' " , " " ) ;
} else {
id = jsonDataPath . getFieldsUrlConfiguration ( ) . getName ( ) . replace ( " ' " , " " ) ;
}
if ( ! ( item . get ( id ) instanceof String ) ) {
Object obj = item . get ( id ) ;
if ( obj instanceof JSONArray ) {
JSONArray jarr = ( JSONArray ) obj ;
if ( jarr . get ( 0 ) instanceof String ) {
item . put ( id , jarr . get ( 0 ) . toString ( ) ) ;
} else {
for ( int j = 0 ; j < jarr . size ( ) ; j + + ) {
mapToMap ( id , ( Map < String , String > ) jarr . get ( j ) , item , i = = 1 ) ;
}
}
} else {
2020-09-11 13:29:12 +02:00
if ( obj instanceof Map ) {
mapToMap ( id , ( Map < String , String > ) obj , item , i = = 1 ) ;
} else if ( obj ! = null ) {
item . put ( id , obj . toString ( ) ) ;
}
2020-09-09 16:50:47 +02:00
}
}
2020-05-29 16:10:18 +02:00
}
return item ;
} ) . collect ( Collectors . toList ( ) ) ;
results = new Results ( fixedResults , new HashMap < > ( 1 , 1 ) ) ;
2020-02-10 17:24:15 +01:00
} else if ( jsonDataPath . getFieldsUrlConfiguration ( ) . getPath ( ) ! = null ) {
results = new Results ( jsonContext . read ( jsonDataPath . getPath ( )
+ " [ " + jsonDataPath . getFieldsUrlConfiguration ( ) . getPath ( )
+ " , " + jsonDataPath . getFieldsUrlConfiguration ( ) . getHost ( ) + " ] " ) ,
new HashMap < > ( 1 , 1 ) ) ;
List < Map < String , String > > multiResults = results . results . stream ( ) . map ( result - > {
ExternalUrlCriteria externalUrlCriteria = new ExternalUrlCriteria ( ) ;
externalUrlCriteria . setPath ( result . get ( " path " ) ) ;
externalUrlCriteria . setHost ( result . get ( " host " ) ) ;
String replacedPath = replaceCriteriaOnUrl ( jsonDataPath . getUrlConfiguration ( ) . getUrl ( ) , externalUrlCriteria , jsonDataPath . getUrlConfiguration ( ) . getFirstpage ( ) ) ;
return getResultsFromUrl ( replacedPath , jsonDataPath . getUrlConfiguration ( ) . getData ( ) , jsonDataPath . getUrlConfiguration ( ) . getData ( ) . getPath ( ) , jsonDataPath . getUrlConfiguration ( ) . getContentType ( ) ) ;
} ) . filter ( Objects : : nonNull ) . map ( results1 - > results1 . results . get ( 0 ) ) . collect ( Collectors . toList ( ) ) ;
results = new Results ( multiResults , new HashMap < > ( 1 , 1 ) ) ;
2020-03-13 11:04:20 +01:00
} else if ( jsonDataPath . getFieldsUrlConfiguration ( ) . getTypes ( ) ! = null ) {
List < Map < String , Object > > tempRes = jsonContext . read ( jsonDataPath . getPath ( )
+ " [ " + jsonDataPath . getFieldsUrlConfiguration ( ) . getId ( ) + " , " + jsonDataPath . getFieldsUrlConfiguration ( ) . getName ( )
+ " , " + jsonDataPath . getFieldsUrlConfiguration ( ) . getTypes ( ) + " , " + jsonDataPath . getFieldsUrlConfiguration ( ) . getUri ( ) + " ] " ) ;
List < Map < String , String > > finalRes = new ArrayList < > ( ) ;
tempRes . forEach ( map - > {
Map < String , String > resMap = new HashMap < > ( ) ;
map . forEach ( ( key , value ) - > {
if ( key . equals ( jsonDataPath . getFieldsUrlConfiguration ( ) . getTypes ( ) . substring ( 1 , jsonDataPath . getFieldsUrlConfiguration ( ) . getTypes ( ) . length ( ) - 1 ) ) ) {
resMap . put ( " tags " , ( ( JSONArray ) value ) . toJSONString ( ) ) ;
} else if ( key . equals ( jsonDataPath . getFieldsUrlConfiguration ( ) . getUri ( ) . substring ( 1 , jsonDataPath . getFieldsUrlConfiguration ( ) . getTypes ( ) . length ( ) - 1 ) ) ) {
resMap . put ( key , ( ( JSONArray ) value ) . toJSONString ( ) ) ;
} else {
resMap . put ( key , ( String ) value ) ;
}
} ) ;
finalRes . add ( resMap ) ;
} ) ;
results = new Results ( finalRes ,
new HashMap < > ( 1 , 1 ) ) ;
2020-09-11 11:07:49 +02:00
} else if ( jsonDataPath . getFieldsUrlConfiguration ( ) . getFirstName ( ) ! = null ) {
results = new Results ( jsonContext . read ( jsonDataPath . getPath ( )
+ " [ " + jsonDataPath . getFieldsUrlConfiguration ( ) . getId ( ) + " , " + jsonDataPath . getFieldsUrlConfiguration ( ) . getFirstName ( )
+ " , " + jsonDataPath . getFieldsUrlConfiguration ( ) . getLastName ( ) + " ] " ) ,
new HashMap < > ( 1 , 1 ) ) ;
results . getResults ( ) . stream ( ) . forEach ( entry - > {
String name = entry . get ( jsonDataPath . getFieldsUrlConfiguration ( ) . getFirstName ( ) . replace ( " ' " , " " ) ) + " " + entry . get ( jsonDataPath . getFieldsUrlConfiguration ( ) . getLastName ( ) . replace ( " ' " , " " ) ) ;
entry . put ( " name " , name ) ;
entry . remove ( jsonDataPath . getFieldsUrlConfiguration ( ) . getFirstName ( ) . replace ( " ' " , " " ) ) ;
entry . remove ( jsonDataPath . getFieldsUrlConfiguration ( ) . getLastName ( ) . replace ( " ' " , " " ) ) ;
} ) ;
2020-02-10 17:24:15 +01:00
} else {
results = new Results ( jsonContext . read ( jsonDataPath . getPath ( )
+ " [ " + jsonDataPath . getFieldsUrlConfiguration ( ) . getName ( ) + " , " + jsonDataPath . getFieldsUrlConfiguration ( ) . getDescription ( )
+ " , " + jsonDataPath . getFieldsUrlConfiguration ( ) . getUri ( ) + " , " + jsonDataPath . getFieldsUrlConfiguration ( ) . getId ( ) + " ] " ) ,
new HashMap < > ( 1 , 1 ) ) ;
}
results . results = results . results . stream ( ) . map ( e - > e . entrySet ( ) . stream ( ) . collect ( Collectors . toMap ( x - > this . transformKey ( jsonDataPath , x . getKey ( ) ) , Map . Entry : : getValue ) ) )
. collect ( Collectors . toList ( ) ) ;
2020-05-29 16:10:18 +02:00
}
2020-10-05 10:26:35 +02:00
else if ( response . getHeaders ( ) . get ( " Content-Type " ) . get ( 0 ) . contains ( " xml " ) ) {
2020-02-10 17:24:15 +01:00
Class < ? > aClass = Class . forName ( jsonDataPath . getParseClass ( ) ) ;
JAXBContext jaxbContext = JAXBContext . newInstance ( aClass ) ;
Unmarshaller unmarshaller = jaxbContext . createUnmarshaller ( ) ;
2020-10-05 10:26:35 +02:00
StringReader stringReader = new StringReader ( response . getBody ( ) . toString ( ) ) ;
Object data = unmarshaller . unmarshal ( stringReader ) ;
2020-02-10 17:24:15 +01:00
Method reader = null ;
if ( jsonDataPath . getParseField ( ) ! = null & & ! jsonDataPath . getParseField ( ) . isEmpty ( ) ) {
reader = new PropertyDescriptor ( jsonDataPath . getParseField ( ) , aClass ) . getReadMethod ( ) ;
}
ObjectMapper objectMapper = new ObjectMapper ( ) ;
List < Map < String , String > > values = new ArrayList < > ( ) ;
2020-03-27 13:34:11 +01:00
int max = 1 ;
if ( reader ! = null ) {
Object invokedField = reader . invoke ( data ) ;
if ( invokedField instanceof Collection ) {
max = ( ( Collection ) invokedField ) . size ( ) ;
}
}
for ( int i = 0 ; i < max ; i + + ) {
Object value ;
if ( reader ! = null ) {
Object invokedField = reader . invoke ( data ) ;
if ( invokedField instanceof Collection ) {
value = ( ( Collection ) invokedField ) . toArray ( ) [ i ] ;
} else {
value = invokedField ;
}
} else {
value = data ;
}
Map < String , String > map = objectMapper . convertValue ( value , Map . class ) ;
if ( jsonDataPath . getMergedFields ( ) ! = null & & ! jsonDataPath . getMergedFields ( ) . isEmpty ( ) & & jsonDataPath . getMergedFieldName ( ) ! = null & & ! jsonDataPath . getMergedFieldName ( ) . isEmpty ( ) ) {
Map < String , String > finalMap = new HashMap < > ( ) ;
for ( Map . Entry < String , String > entry : map . entrySet ( ) ) {
if ( jsonDataPath . getMergedFields ( ) . contains ( entry . getKey ( ) ) ) {
if ( ! finalMap . containsKey ( jsonDataPath . getMergedFieldName ( ) ) ) {
finalMap . put ( jsonDataPath . getMergedFieldName ( ) , entry . getValue ( ) ) ;
} else {
finalMap . put ( jsonDataPath . getMergedFieldName ( ) , finalMap . get ( jsonDataPath . getMergedFieldName ( ) ) + " " + entry . getValue ( ) ) ;
}
2020-02-10 17:24:15 +01:00
} else {
2020-03-27 13:34:11 +01:00
finalMap . put ( entry . getKey ( ) , entry . getValue ( ) ) ;
2020-02-10 17:24:15 +01:00
}
}
2020-03-27 13:34:11 +01:00
values . add ( finalMap ) ;
} else {
values . add ( map ) ;
2020-02-10 17:24:15 +01:00
}
}
results = new Results ( values , new HashMap < > ( 1 , 1 ) ) ;
2019-08-27 13:24:55 +02:00
}
2020-02-10 17:24:15 +01:00
2018-01-17 16:06:35 +01:00
return results ;
}
2020-10-05 10:26:35 +02:00
} catch ( Exception exception ) {
2020-01-16 16:46:24 +01:00
logger . error ( exception . getMessage ( ) , exception ) ;
2018-01-17 16:06:35 +01:00
} //maybe print smth...
finally {
}
return null ;
2019-09-05 16:44:25 +02:00
}
2018-01-17 16:06:35 +01:00
2019-09-05 16:44:25 +02:00
private List < Map < String , String > > getAllResultsFromMockUpJson ( String path , String query ) {
List < Map < String , String > > internalResults ;
try {
2019-11-08 14:53:46 +01:00
String filePath = Paths . get ( path ) . toUri ( ) . toURL ( ) . toString ( ) ;
2019-09-05 16:44:25 +02:00
ObjectMapper mapper = new ObjectMapper ( ) ;
internalResults = mapper . readValue ( new File ( filePath ) , new TypeReference < List < Map < String , Object > > > ( ) { } ) ;
2020-01-14 12:00:02 +01:00
return searchListMap ( internalResults , query ) ;
2019-09-05 16:44:25 +02:00
} catch ( Exception e ) {
2020-01-16 16:46:24 +01:00
logger . error ( e . getMessage ( ) , e ) ;
2019-09-05 16:44:25 +02:00
return new LinkedList < > ( ) ;
}
}
private List < Map < String , String > > searchListMap ( List < Map < String , String > > internalResults , String query ) {
List < Map < String , String > > list = new LinkedList < > ( ) ;
for ( Map < String , String > map : internalResults )
{
2019-09-16 17:26:18 +02:00
if ( map . get ( " name " ) ! = null & & map . get ( " name " ) . toUpperCase ( ) . contains ( query . toUpperCase ( ) ) ) {
list . add ( map ) ;
}
if ( map . get ( " label " ) ! = null & & map . get ( " label " ) . toUpperCase ( ) . contains ( query . toUpperCase ( ) ) ) {
2019-09-05 16:44:25 +02:00
list . add ( map ) ;
}
}
return list ;
2018-01-17 16:06:35 +01:00
}
2018-06-29 10:29:43 +02:00
private String transformKey ( DataUrlConfiguration dataUrlConfiguration , String key ) {
2019-10-03 13:06:44 +02:00
if ( dataUrlConfiguration . getFieldsUrlConfiguration ( ) . getId ( ) ! = null & & key . equals ( dataUrlConfiguration . getFieldsUrlConfiguration ( ) . getId ( ) . replace ( " ' " , " " ) ) ) return " pid " ;
2019-10-01 12:19:39 +02:00
if ( dataUrlConfiguration . getFieldsUrlConfiguration ( ) . getDescription ( ) ! = null & & key . equals ( dataUrlConfiguration . getFieldsUrlConfiguration ( ) . getDescription ( ) . replace ( " ' " , " " ) ) ) return " description " ;
2019-10-03 13:06:44 +02:00
if ( dataUrlConfiguration . getFieldsUrlConfiguration ( ) . getUri ( ) ! = null & & key . equals ( dataUrlConfiguration . getFieldsUrlConfiguration ( ) . getUri ( ) . replace ( " ' " , " " ) ) ) return " uri " ;
if ( dataUrlConfiguration . getFieldsUrlConfiguration ( ) . getName ( ) ! = null & & key . equals ( dataUrlConfiguration . getFieldsUrlConfiguration ( ) . getName ( ) . replace ( " ' " , " " ) ) ) return " name " ;
2019-10-01 12:19:39 +02:00
if ( dataUrlConfiguration . getFieldsUrlConfiguration ( ) . getSource ( ) ! = null & & key . equals ( dataUrlConfiguration . getFieldsUrlConfiguration ( ) . getSource ( ) . replace ( " ' " , " " ) ) ) return " source " ;
2019-10-03 13:06:44 +02:00
if ( dataUrlConfiguration . getFieldsUrlConfiguration ( ) . getCount ( ) ! = null & & key . equals ( dataUrlConfiguration . getFieldsUrlConfiguration ( ) . getCount ( ) . replace ( " ' " , " " ) ) ) return " count " ;
2020-02-10 17:24:15 +01:00
if ( dataUrlConfiguration . getFieldsUrlConfiguration ( ) . getPath ( ) ! = null & & key . equals ( dataUrlConfiguration . getFieldsUrlConfiguration ( ) . getPath ( ) . replace ( " ' " , " " ) ) ) return " path " ;
if ( dataUrlConfiguration . getFieldsUrlConfiguration ( ) . getHost ( ) ! = null & & key . equals ( dataUrlConfiguration . getFieldsUrlConfiguration ( ) . getHost ( ) . replace ( " ' " , " " ) ) ) return " host " ;
2018-06-29 10:29:43 +02:00
return null ;
}
2018-01-17 16:06:35 +01:00
2019-09-05 16:44:25 +02:00
static class Results {
2018-01-17 16:06:35 +01:00
List < Map < String , String > > results ;
Map < String , Integer > pagination ;
2019-09-05 16:44:25 +02:00
Results ( ) {
2020-02-10 17:24:15 +01:00
this . results = new ArrayList < > ( ) ;
this . pagination = new HashMap < > ( ) ;
2018-01-17 16:06:35 +01:00
}
2019-09-05 16:44:25 +02:00
Results ( List < Map < String , String > > results , Map < String , Integer > pagination ) {
2018-01-17 16:06:35 +01:00
this . results = results ;
this . pagination = pagination ;
}
2019-09-05 16:44:25 +02:00
List < Map < String , String > > getResults ( ) {
2018-01-17 16:06:35 +01:00
return results ;
}
public void setResults ( List < Map < String , String > > results ) {
this . results = results ;
}
2019-09-05 16:44:25 +02:00
Map < String , Integer > getPagination ( ) {
2018-01-17 16:06:35 +01:00
return pagination ;
}
public void setPagination ( Map < String , Integer > pagination ) {
this . pagination = pagination ;
}
}
2020-09-09 16:50:47 +02:00
private void mapToMap ( String key , Map < String , String > source , Map < String , String > destination , boolean isTitle ) {
2020-09-11 13:29:12 +02:00
if ( source ! = null ) {
String content = source . get ( " content " ) ;
if ( isTitle ) {
String classId = source . get ( " classid " ) ;
if ( classId . equals ( " main title " ) ) {
destination . put ( key , content ) ;
}
} else {
2020-09-09 16:50:47 +02:00
destination . put ( key , content ) ;
}
}
}
2017-11-21 17:29:16 +01:00
}