forked from D-Net/dnet-hadoop
apply commits from master
This commit is contained in:
parent
88fef367b9
commit
455f2e1e07
|
@ -45,15 +45,22 @@ public class BaseCollectorPlugin implements CollectorPlugin {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Stream<String> collect(final ApiDescriptor api, final AggregatorReport report) throws CollectorException {
|
public Stream<String> collect(final ApiDescriptor api, final AggregatorReport report) throws CollectorException {
|
||||||
// get path to file
|
// the path of the dump file on HDFS
|
||||||
|
// http://oai.base-search.net/initial_load/base_oaipmh_dump-current.tar
|
||||||
|
// it could be downloaded from iis-cdh5-test-gw.ocean.icm.edu.pl and then copied on HDFS
|
||||||
final Path filePath = Optional
|
final Path filePath = Optional
|
||||||
.ofNullable(api.getBaseUrl())
|
.ofNullable(api.getBaseUrl())
|
||||||
.map(Path::new)
|
.map(Path::new)
|
||||||
.orElseThrow(() -> new CollectorException("missing baseUrl"));
|
.orElseThrow(() -> new CollectorException("missing baseUrl"));
|
||||||
|
|
||||||
|
// get the parameters for the connection to the OpenAIRE database.
|
||||||
|
// the database is used to obtain the list of the datasources that the plugin will collect
|
||||||
final String dbUrl = api.getParams().get("dbUrl");
|
final String dbUrl = api.getParams().get("dbUrl");
|
||||||
final String dbUser = api.getParams().get("dbUser");
|
final String dbUser = api.getParams().get("dbUser");
|
||||||
final String dbPassword = api.getParams().get("dbPassword");
|
final String dbPassword = api.getParams().get("dbPassword");
|
||||||
|
|
||||||
|
// the types(comma separated, empty value for all) that the plugin will collect,
|
||||||
|
// the types should be expressed in the format of the normalized types of BASE (for example 1,121,...)
|
||||||
final String acceptedNormTypesString = api.getParams().get("acceptedNormTypes");
|
final String acceptedNormTypesString = api.getParams().get("acceptedNormTypes");
|
||||||
|
|
||||||
log.info("baseUrl: {}", filePath);
|
log.info("baseUrl: {}", filePath);
|
||||||
|
|
|
@ -108,7 +108,7 @@ INSERT INTO dsm_apiparams(
|
||||||
'api_________::openaire____::base_search::dump@@acceptedNormTypes',
|
'api_________::openaire____::base_search::dump@@acceptedNormTypes',
|
||||||
'api_________::openaire____::base_search::dump',
|
'api_________::openaire____::base_search::dump',
|
||||||
'acceptedNormTypes',
|
'acceptedNormTypes',
|
||||||
''
|
'1,11,111,121,13,14,15,18,181,182,183,1A,6,7'
|
||||||
);
|
);
|
||||||
|
|
||||||
COMMIT;
|
COMMIT;
|
|
@ -2,6 +2,8 @@ select s.id as id
|
||||||
from dsm_services s
|
from dsm_services s
|
||||||
where collectedfrom = 'openaire____::opendoar'
|
where collectedfrom = 'openaire____::opendoar'
|
||||||
and jurisdiction = 'Institutional'
|
and jurisdiction = 'Institutional'
|
||||||
and s.id not in (
|
and s.id in (
|
||||||
select service from dsm_api where coalesce(compatibility_override, compatibility) like '%openaire%' or last_collection_total > 0
|
select service from dsm_api where coalesce(compatibility_override, compatibility) = 'driver' or coalesce(compatibility_override, compatibility) = 'UNKNOWN'
|
||||||
);
|
) and s.id not in (
|
||||||
|
select service from dsm_api where coalesce(compatibility_override, compatibility) like '%openaire%'
|
||||||
|
);
|
||||||
|
|
Loading…
Reference in New Issue