TPDL2020-context-propagation/es_connector.py

55 lines
1.8 KiB
Python

from elasticsearch import Elasticsearch
from elasticsearch_dsl import *
import logging
log = logging.getLogger('ES connector')
log.setLevel(logging.INFO)
def load_properties():
with open('configuration.properties') as f:
p = {}
for line in f:
if not line.startswith("#"):
data = line.strip().split("=")
p[data[0].strip()] = data[1].strip()
return p
class ESObject(object):
def __init__(self, id, pid, type, title, abstract, propagated_abstract):
self.id = id
self.pid = pid
self.type = type
self.title = title
self.abstract = abstract
self.propagated_abstract = propagated_abstract
class ESResponse(object):
def __init__(self, count=0, hits=[]):
self.count = count
self.hits = hits
class ESConnector(object):
def __init__(self):
self.index_host = [x.strip() for x in load_properties()['ES_INDEX'].split(',')]
self.client = Elasticsearch(hosts=self.index_host, timeout=600000)
def query_after(self, query_string=None, start=0, i='propagation-after'):
s = Search(using=self.client, index=i)
if query_string is not None:
q = Q('query_string', query=query_string)
s = s.query(q)
s = s[start:start+10]
response = s.execute()
hits = []
for hit in response.hits:
hits.append(ESObject(hit.id,
hit.pid,
hit.type,
hit.title,
hit.abstract if 'abstract' in hit else '',
hit.propagated_abstract if 'propagated_abstract' in hit else ''))
return ESResponse(hits=hits, count=response.hits.total)