Merge pull request #1 from sosguns2002/master

Suggested statements support with parameters
This commit is contained in:
sosguns2002 2017-06-04 16:10:50 +03:00 committed by GitHub
commit 8ebe2b9f32
3 changed files with 61 additions and 8 deletions

View File

@ -100,7 +100,7 @@ def auth_callback(request, realm, username, password):
def numberOfGrantsUploaded(user_id, cookie_set):
if cookie_set and user_id:
file_name = "/tmp/p%s.csv" % (user_id)
file_name = "/tmp/p%s.tsv" % (user_id)
if os.path.isfile(file_name):
num_lines = sum(1 for line in open(file_name))
if str(num_lines) == cookie_set:
@ -371,10 +371,40 @@ class madAppQueryGenerator(BaseHandler):
# get the database cursor
cursor=msettings.Connection.cursor()
# clean grants and acknowledgments
ackn_filters = "regexpr(\"\\'\", c2,'')"
if 'ackn-keywords' in self.request.arguments and self.request.arguments['ackn-keywords'][0] in trueset:
ackn_filters = 'keywords('+ackn_filters+')'
if 'ackn-lowercase' in self.request.arguments and self.request.arguments['ackn-lowercase'][0] in trueset:
ackn_filters = 'lower('+ackn_filters+')'
if 'ackn-stopwords' in self.request.arguments and self.request.arguments['ackn-stopwords'][0] in trueset:
ackn_filters = 'filterstopwords('+ackn_filters+')'
list(cursor.execute("drop table if exists grantstemp"+user_id, parse=False))
query_pre_grants = "create temp table grantstemp{0} as select stripchars(c1) as c1, case when c2 is null then null else {1} end as c2 from (setschema 'c1,c2' file '/tmp/p{0}.tsv' dialect:tsv)".format(user_id, ackn_filters)
cursor.execute(query_pre_grants)
# create temp table with grants
# select c1, jmergeregexp(jgroup('(\b\s)'||middle||'(\b\s)')) from (select c1,textwindow2s(regexpr("([\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|])", regexpr("\'",c2,""),'\\\1'),0,2,0) from (setschema 'c1,c2' file '/tmp/p{1}.csv' dialect:tsv) where c1 or c1!='') group by c1;
# select c1, jmergeregexp(jgroup(case when middle=" " then '(?!.*)' else '(\b\s)'||middle||'(\b\s)' end)) as c2 froselect c1,textwindow2s(regexpr("([\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|])", regexpr("\'",case when c2 is null then " " else c2 end,""),'\\\1'),0,2,0) from (setschema 'c1,c2' file 'fp7grants.csv' dialect:tsv) where c1 or c1!='') group by c1;
# select c1, jmergeregexp(jgroup('(\b\s)'||middle||'(\b\s)')) as c2 from (setschema 'c1,prev,middle,next' select c1,textwindow2s(regexpr("([\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|])", regexpr("\'",c2,""),'\\\1'),0,2,0) from grantstemp{0} where (c1 or c1!='') and c2 not null) group by c1 union all select distinct c1, "(?!.*)" as c2 from grantstemp{0} where (c1 or c1!='') and c2 is null;
list(cursor.execute("drop table if exists grants"+user_id, parse=False))
query0 = "create temp table grants{0} as select stripchars(c1) as c3 from (file '/tmp/p{1}.csv')".format(user_id, user_id)
# string concatenation workaround because of the special characters conflicts
if 'word-split' in self.request.arguments and self.request.arguments['word-split'][0] != '':
words_split = int(self.request.arguments['word-split'][0])
if 0 < words_split and words_split <= 10:
acknowledgment_split = r'textwindow2s(regexpr("([\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|])", c2, "\\\1"),0,'+str(words_split)+r',0)'
else:
acknowledgment_split = r'"prev" as prev, regexpr("([\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|])", c2, "\\\1") as middle, "next" as next'
query0 = r"create temp table grants"+user_id+r' as select c1 as c3, jmergeregexp(jgroup("(?<=[\s\b])"||middle||"(?=[\s\b])")) as c4 from '+r"(setschema 'c1,prev,middle,next' select c1, "+acknowledgment_split+r' from grantstemp'+user_id+r' where (c1 or c1!="") and c2 not null) group by c1 union all select distinct c1 as c3, "(?!.*)" as c4 from grantstemp'+user_id+r" where (c1 or c1!='') and c2 is null"
cursor.execute(query0)
# for r in cursor.execute("select * from grants"+user_id):
# print r[0], r[1]
# create temp table with grants
# list(cursor.execute("drop table if exists grants"+user_id, parse=False))
# query0 = "create temp table grants{0} as select stripchars(c1) as c3, c2 as c4 from (setschema 'c1,c2' file '/tmp/p{1}.csv' dialect:tsv) where c1 or c1!=''".format(user_id, user_id)
# cursor.execute(query0)
if 'document' in self.request.arguments and self.request.arguments['document'][0] != '':
doc_filters = "regexpr('[\n|\r]',?,' ')"
if 'keywords' in self.request.arguments and self.request.arguments['keywords'][0] in trueset:
@ -386,7 +416,8 @@ class madAppQueryGenerator(BaseHandler):
list(cursor.execute("select var('doc"+user_id+"', "+doc_filters+")", (doc,), parse=False))
#print 'query', [r for r in cursor.execute("select middle, j2s(prev,middle,next) %s from (select textwindow2s(var('doc%s'),10,1,5))" % (conf, user_id))]
query1 = "select c1 {0} from (select textwindow2s(var('doc{1}'),10,1,5)), (cache select stripchars(c1) as c1 from (file '/tmp/p{2}.csv')) T where middle = T.c1 {3}".format(conf, user_id, user_id, whr_conf)
query1 = "select c1, max(confidence) as confidence from (select c1, regexpcountuniquematches(c2, j2s(prev,middle,next)) as confidence {0} from (select textwindow2s(var('doc{1}'),10,1,5)), (cache select c3 as c1, c4 as c2 from grants{1}) T where middle = T.c1 {2}) group by c1".format(conf, user_id, whr_conf)
# query1 = "select c1, regexpcountuniquematches(c2, j2s(prev,middle,next)) as confidence {0} from (select textwindow2s(var('doc{1}'),10,1,5)), (cache select c3 as c1, c4 as c2 from grants{1}) T where middle = T.c1 {2}".format(conf, user_id, whr_conf)
data['funding_info'] = [{"code": r[0]} for r in cursor.execute(query1)]
elif numberOfDocsUploaded(user_id) != 0:
doc_filters = "regexpr('[\n|\r]',c2,' ')"
@ -397,10 +428,11 @@ class madAppQueryGenerator(BaseHandler):
if 'stopwords' in self.request.arguments and self.request.arguments['stopwords'][0] in trueset:
doc_filters = 'filterstopwords('+doc_filters+')'
list(cursor.execute("drop table if exists docs"+user_id, parse=False))
query1 = "create temp table docs{0} as select c1, {1} as c2 from (setschema 'c1,c2' select jsonpath(c1, '$.id', '$.text') from (file '/tmp/docs{2}.json'))".format(user_id, doc_filters, user_id)
query1 = "create temp table docs{0} as select c1, {1} as c2 from (setschema 'c1,c2' select jsonpath(c1, '$.id', '$.text') from (file '/tmp/docs{0}.json'))".format(user_id, doc_filters)
cursor.execute(query1)
query2 = "select c1, c3 {0} from (select c1, textwindow2s(c2,10,1,5) from (select * from docs{1})), (select c3 from grants{2}) T where middle = T.c3 {3}".format(conf, user_id, user_id, whr_conf)
query2 = "select c1, c3, max(confidence) as confidence from (select c1, c3, regexpcountuniquematches(c4, j2s(prev,middle,next)) as confidence {0} from (select c1, textwindow2s(c2,10,1,5) from (select * from docs{1})), (select c3, c4 from grants{1}) T where middle = T.c3 {2}) group by c1".format(conf, user_id, whr_conf)
# query2 = "select c1, c3 {0} from (select c1, textwindow2s(c2,10,1,5) from (select * from docs{1})), (select c3 from grants{1}) T where middle = T.c3 {2}".format(conf, user_id, whr_conf)
results = [r for r in cursor.execute(query2)]
data['funding_info'] = [{"code": r[1]} for r in results]
@ -575,7 +607,7 @@ class importingControllerHandler(BaseHandler):
user_id = self.get_secure_cookie('madgikmining')
if user_id is None:
return
csv_file_name = "/tmp/p{0}.csv".format(user_id)
csv_file_name = "/tmp/p{0}.tsv".format(user_id)
csv_file = open(csv_file_name, 'w')
body_split = [l.strip() for l in StringIO.StringIO(self.request.body).readlines()]

View File

@ -11,7 +11,8 @@ h4 {
.file-upload-wrapper,
.list-wrapper,
.filters-wrapper {
.filters-wrapper,
.acknowledgement-wrapper {
margin-right: 30px;
margin-bottom: 30px;
background-color: #f5f5f5;
@ -30,7 +31,8 @@ h4 {
width: 400px;
}
.filters-wrapper {
.filters-wrapper,
.acknowledgement-wrapper {
max-width: 850px;
min-width: 500px;
}

View File

@ -161,6 +161,25 @@
<input type="checkbox" name="keywords" id="keywords-filter" checked>
<label for="keywords-filter">Keywords</label>
</div>
<div class="acknowledgement-wrapper">
<h2>Select the <b>configuration</b> for the suggested statements.</h2>
<h4>Choose in how many word-pairs each suggested statement will be splitted. (Use 0 to not split the suggested statements).</h4>
<input type="number" name="word-split" min="0" max="10" id="word-split" placeholder="Word pairs" value="0"/>
<h4>Remove stop words from the suggested statements.</h4>
<input type="checkbox" name="ackn-stopwords" id="ackn-stop-words-filter" checked>
<label for="ackn-stop-words-filter">Stop Words</label>
<h4>Normalize suggested statements to lower case.</h4>
<input type="checkbox" name="ackn-lowercase" id="ackn-lowercase-filter" checked>
<label for="ackn-lowercase-filter">Lowercase</label>
<h4>Split the suggested statements into words on intra-word delimiters (all non alpha-numeric characters).</h4>
<input type="checkbox" name="ackn-keywords" id="ackn-keywords-filter" checked>
<label for="ackn-keywords-filter">Keywords</label>
</div>
<!--end filters-wrapper-->
<input id="docs-file-uploaded" type="checkbox" name="docsfileuploaded" hidden>