2020-06-11 16:40:40 +02:00
|
|
|
from elasticsearch import Elasticsearch
|
|
|
|
from elasticsearch_dsl import *
|
|
|
|
import logging
|
|
|
|
|
2020-06-15 16:13:20 +02:00
|
|
|
log = logging.getLogger('ES connector')
|
2020-06-11 16:40:40 +02:00
|
|
|
log.setLevel(logging.INFO)
|
|
|
|
|
2020-06-15 16:13:20 +02:00
|
|
|
def load_properties():
|
|
|
|
with open('configuration.properties') as f:
|
|
|
|
p = {}
|
|
|
|
for line in f:
|
|
|
|
if not line.startswith("#"):
|
|
|
|
data = line.strip().split("=")
|
|
|
|
p[data[0].strip()] = data[1].strip()
|
|
|
|
return p
|
2020-06-11 16:40:40 +02:00
|
|
|
|
|
|
|
class ESObject(object):
|
|
|
|
def __init__(self, id, pid, type, title, abstract, propagated_abstract):
|
|
|
|
self.id = id
|
|
|
|
self.pid = pid
|
|
|
|
self.type = type
|
|
|
|
self.title = title
|
|
|
|
self.abstract = abstract
|
|
|
|
self.propagated_abstract = propagated_abstract
|
|
|
|
|
|
|
|
|
|
|
|
class ESResponse(object):
|
|
|
|
def __init__(self, count=0, hits=[]):
|
|
|
|
self.count = count
|
|
|
|
self.hits = hits
|
|
|
|
|
|
|
|
|
|
|
|
class ESConnector(object):
|
|
|
|
def __init__(self):
|
2020-06-15 16:13:20 +02:00
|
|
|
self.index_host = [x.strip() for x in load_properties()['ES_INDEX'].split(',')]
|
2020-06-11 16:40:40 +02:00
|
|
|
self.client = Elasticsearch(hosts=self.index_host, timeout=600000)
|
|
|
|
|
|
|
|
def query_after(self, query_string=None, start=0, i='propagation-after'):
|
|
|
|
s = Search(using=self.client, index=i)
|
|
|
|
if query_string is not None:
|
|
|
|
q = Q('query_string', query=query_string)
|
|
|
|
s = s.query(q)
|
|
|
|
s = s[start:start+10]
|
|
|
|
response = s.execute()
|
|
|
|
|
|
|
|
hits = []
|
|
|
|
for hit in response.hits:
|
|
|
|
hits.append(ESObject(hit.id,
|
|
|
|
hit.pid,
|
|
|
|
hit.type,
|
|
|
|
hit.title,
|
|
|
|
hit.abstract if 'abstract' in hit else '',
|
|
|
|
hit.propagated_abstract if 'propagated_abstract' in hit else ''))
|
|
|
|
|
|
|
|
return ESResponse(hits=hits, count=response.hits.total)
|