From c13b86c0317e7b280f407a975ad9c9d536dd285b Mon Sep 17 00:00:00 2001 From: tasosgig Date: Wed, 28 Nov 2018 18:55:22 +0200 Subject: [PATCH] Regex support --- .../functionslocal/row/textacknowledgments.py | 328 ++++++++++++++++++ .../configuration/configuration.component.ts | 3 + .../resultspreview.component.css | 6 + .../resultspreview.component.html | 4 +- .../resultspreview.component.ts | 1 + .../settings/settings.component.css | 6 + .../settings/settings.component.html | 27 +- .../settings/settings.component.ts | 7 + .../app/configuration/settings/settings.ts | 1 + .../src/app/contents/contents.component.html | 2 +- .../src/app/contents/contents.component.ts | 8 +- .../manageprofiles.component.ts | 5 + .../src/app/manageprofiles/profile-data.ts | 1 + .../src/app/util.ts | 4 +- .../src/environments/environment.prod.ts | 3 +- .../src/environments/environment.ts | 3 +- .../madoap/src/madserverv3.py | 144 ++++---- .../madoap/src/static/database.db | Bin 65536 -> 65536 bytes 18 files changed, 478 insertions(+), 75 deletions(-) create mode 100755 interactive-mining-3rdparty-madis/madis/src/functionslocal/row/textacknowledgments.py diff --git a/interactive-mining-3rdparty-madis/madis/src/functionslocal/row/textacknowledgments.py b/interactive-mining-3rdparty-madis/madis/src/functionslocal/row/textacknowledgments.py new file mode 100755 index 0000000..2a1aae6 --- /dev/null +++ b/interactive-mining-3rdparty-madis/madis/src/functionslocal/row/textacknowledgments.py @@ -0,0 +1,328 @@ +# coding: utf-8 +import setpath +import re +from lib import porter2 as porter +import functions +import unicodedata +import itertools + +# Increase regular expression cache +try: + re._MAXCACHE = 1000 +except: + pass + +# Every regular expression containing \W \w \D \d \b \S \s needs to be compiled +# like below. If you want to embed the UNICODE directive inside the +# regular expression use: +# (?u) like re.sub(ur'(?u)[\W\d]', ' ', o) +delete_numbers_and_non_letters=re.compile(ur'[\W]',re.UNICODE) +delete_non_letters=re.compile(ur'[\W]',re.UNICODE) +delete_word_all=re.compile(ur'\w+\sall',re.UNICODE) +delete_word_all_and_or=re.compile(ur'\w+\sall\s(?:and|or)',re.UNICODE) +text_tokens = re.compile(ur'([\d.]+\b|\w+|\$[\d.]+)', re.UNICODE) +strip_remove_newlines=re.compile(u'(?:\\s+$|^\\s+|(?<=[^\\s\\d\\w.;,!?])\n+)', re.UNICODE) +reduce_spaces=re.compile(ur'\s+', re.UNICODE) +cqlterms=('title', 'subject', 'person', 'enter', 'creator', 'isbn') +replchars = re.compile(r'[\n\r]') + + +def textacknowledgments(txt,span = 10,maxlen = 3,pattern = r'(?:support)|(?:thank)|(?:in part)|(?:research)|(?:\bwork\b)|(?:\bgrants?\b)|(?:project)|(?:science)|(?:fund)|(?:nation)|(?:author)|(?:foundation)|(?:\bprogram\b)|(?:\bhelp\b)|(?:paper)|(?:technolog)|(?:partial)|(?:acknowledg)|(?:provid)|(?:grate)|(?:\bcenter\b)|(?:study)|(?:discuss)|(?:particip)|(?:ministry)|(?:contribut)|(?:european)|(?:number)|(?:valuabl)|(?:education)|(?:council)|(?:award)|(?:contract)|(?:institut)' ): + """ + .. function:: textacknowledgments(text, span = 10, maxlen = 5, pattern = (\b|_)(1|2)\d{3,3}(\b|_)) + + Returns the "Reference" section of documents. To find it, it searches for parts of the document that + have a high density of pattern matches. + + .. parameters:: txt,maxlen,pattern + txt: input text. + span: the size of the string in words that the txt is splited + maxlen: the size of the scrolling window over the text in which the density is calculated. + pattern: regular expression that is matched against the lines of the text. By default the pattern matches + year occurences so as to extract sections that look like references. + + + Examples: + + >>> sql("select textacknowledgments('')") + textacknowledgments('') + ------------------ + + """ + + exp = re.sub('\r\n','\n',txt) + exp = reduce_spaces.sub(' ', strip_remove_newlines.sub('', exp)) + + if exp.count(' ') < span * 10: + return exp + + acknowledgments = [] + origwords = exp.split(' ') + words = exp.lower() + words = words.split(' ') + stemed = [] + # for k in words: + # if len(k) > 0: + # stemed.append(porter.stem(k)) + spanedorigtext = [' '.join(origwords[i:i+span]) for i in range(0, len(origwords), span)] + spanedstemtext = [' '.join(words[i:i+span]) for i in range(0, len(words), span)] + reversedtext = iter(spanedstemtext) + results = [] + densities = [] + + for i in xrange(maxlen/2): + results.append(0) + for i in reversedtext: + count = sum(1 for m in re.finditer(pattern, i)) + if count: + results.append(count) + else: + results.append(0) + + for i in xrange(maxlen/2): + results.append(0) + + #print len(spanedorigtext), len(spanedstemtext), len(results), len(results)-maxlen/2 - maxlen/2 + + out = 0 + temp = 0 + for i in xrange(maxlen/2,len(results)-maxlen/2): + densities.append(sum(results[i-maxlen/2:i-maxlen/2+maxlen])*1.0/maxlen) + + # for cnt, i in enumerate(spanedorigtext): + # print i, results[maxlen/2+cnt], densities[cnt] + + threshold = 1 + + paragraphsum = [] + paragraphs = [] + prev = -10 + current = 0 + maxsum = 0 + maxi = 0 + for line in spanedorigtext: + if densities[current] > threshold: + # new paragraph first visit + if (prev+1) != current: + paragraphsum.append(0) + paragraphs.append([]) + paragraphsum[-1] += results[maxlen/2+current] + paragraphs[-1].append(line) + prev = current + current += 1 + + for cnt, paragraph in enumerate(paragraphs): + if paragraphsum[cnt] > maxsum: + maxsum = paragraphsum[cnt] + maxi = cnt + # print '\n'.join(paragraph), paragraphsum[cnt], '\n' + # print '!!!!!!!!', maxsum, maxi + + paragraphsum.append(0) + paragraphs.append([]) + if paragraphsum[maxi] > 2: + return '\n'.join(paragraphs[maxi]) + #return ('\n'.join(paragraphs[maxi]))+" "+str(paragraphsum[maxi]) + else: + return '' + +textacknowledgments.registered=True + + +def textacknowledgmentsstem(txt,span = 10,maxlen = 3,pattern = r'(?:support)|(?:thank)|(?:research)|(?:\bwork\b)|(?:\bgrant\b)|(?:project)|(?:scienc)|(?:\bfund\b)|(?:nation)|(?:author)|(?:foundat)|(?:\bprogram\b)|(?:\bhelp\b)|(?:univers)|(?:paper)|(?:technolog)|(?:partial)|(?:comment)|(?:develop)|(?:acknowledg)|(?:review)|(?:provid)|(?:grate)|(?:\bcenter\b)|(?:studi)|(?:discuss)|(?:particip)|(?:ministri)|(?:contribut)|(?:european)|(?:system)|(?:comput)|(?:number)|(?:valuabl)|(?:educ)|(?:council)|(?:award)|(?:contract)|(?:inform)|(?:institut)' ): + """ + .. function:: textacknowledgmentsstem(text, span = 10, maxlen = 5, pattern = (\b|_)(1|2)\d{3,3}(\b|_)) + + Returns the "Reference" section of documents. To find it, it searches for parts of the document that + have a high density of pattern matches. + + .. parameters:: txt,maxlen,pattern + txt: input text. + span: the size of the string in words that the txt is splited + maxlen: the size of the scrolling window over the text in which the density is calculated. + pattern: regular expression that is matched against the lines of the text. By default the pattern matches + year occurences so as to extract sections that look like references. + + + Examples: + + >>> sql("select textacknowledgmentsstem('')") + textacknowledgmentsstem('') + ------------------ + + """ + + exp = re.sub('\r\n','\n',txt) + exp = reduce_spaces.sub(' ', strip_remove_newlines.sub('', exp)) + + if exp.count(' ') < span * 10: + return exp + + acknowledgments = [] + origwords = exp.split(' ') + words = exp.lower() + words = words.split(' ') + stemed = [] + for k in words: + if len(k) > 0: + try: + stemed.append(porter.stem(k)) + except Exception: + stemed.append(k) + spanedorigtext = [' '.join(origwords[i:i+span]) for i in range(0, len(origwords), span)] + spanedstemtext = [' '.join(stemed[i:i+span]) for i in range(0, len(stemed), span)] + reversedtext = iter(spanedstemtext) + results = [] + densities = [] + + for i in xrange(maxlen/2): + results.append(0) + for i in reversedtext: + count = sum(1 for m in re.finditer(pattern, i)) + if count: + results.append(count) + else: + results.append(0) + + for i in xrange(maxlen/2): + results.append(0) + + for i in xrange(maxlen/2,len(results)-maxlen/2): + densities.append(sum(results[i-maxlen/2:i-maxlen/2+maxlen])*1.0/maxlen) + + threshold = 1 + + current = 0 + for i in spanedorigtext: + if len(i)>10: + if densities[current] > threshold: + acknowledgments.append(i) + current+=1 + return '\n'.join(acknowledgments) + +textacknowledgmentsstem.registered=True + +# without tara: pattern=r'(?:\bthank)|(?:\barticl)|(?:\bpmc\b)|(?:\bsupport)|(?:\bsampl)|(?:\bexpedit)|(?:\bfoundat)|(?:\bresearch)|(?:\bhelp)|(?:\binstitut)|(?:\bmarin)|(?:\bnation)|(?:\backnowledg)|(?:\bcomment)|(?:\bcontribut)|(?:\bfund)|(?:\bgrate)|(?:\bprovid)|(?:\bproject)|(?:\bpossibl)|(?:\bscienc)|(?:author)|(?:grant)|(?:fellowship)|(?:program)|(?:programm)|(?:suggest)|(?:taraexpedit)|(?:université)|(?:valuabl)|(?:without)|(?:pmc articles)|(?:oceans expedition)|(?:oceans consortium)|(?:anonymous reviewers)|(?:article contribution)|(?:environment foundation)|(?:people sponsors)|(?:projects? poseidon)|(?:wish thank)|(?:commitment following)|(?:continuous support)|(?:data analysis)|(?:exist without)|(?:tara girus)|(?:keen thank)|(?:oceans taraexpeditions)|(?:possible thanks)|(?:sponsors made)|(?:technical assistance)|(?:thank commitment)|(?:without continuous)' +# with tara: pattern=r'(?:\bthank)|(?:\btara\b)|(?:\barticl)|(?:\bocean\b)|(?:\bpmc\b)|(?:\bsupport)|(?:\bsampl)|(?:\bexpedit)|(?:\bfoundat)|(?:\bresearch)|(?:\bhelp)|(?:\binstitut)|(?:\bmarin)|(?:\bnation)|(?:\backnowledg)|(?:\bcomment)|(?:\bcontribut)|(?:\bfund)|(?:\bgrate)|(?:\bprovid)|(?:\bproject)|(?:\bpossibl)|(?:\bscienc)|(?:author)|(?:grant)|(?:fellowship)|(?:program)|(?:programm)|(?:suggest)|(?:taraexpedit)|(?:université)|(?:valuabl)|(?:without)|(?:pmc articles)|(?:tara oceans)|(?:oceans expedition)|(?:oceans consortium)|(?:anonymous reviewers)|(?:article contribution)|(?:environment foundation)|(?:people sponsors)|(?:projects? poseidon)|(?:wish thank)|(?:commitment following)|(?:continuous support)|(?:data analysis)|(?:exist without)|(?:tara girus)|(?:tara schooner)|(?:keen thank)|(?:oceans taraexpeditions)|(?:possible thanks)|(?:sponsors made)|(?:technical assistance)|(?:thank commitment)|(?:without continuous)' +# with tara v1: pattern=r'(?:\bthank)|(?:\bpmc\b)|(?:\bsupport)|(?:\bsampl)|(?:\bfoundat)|(?:\bresearch)|(?:\bhelp)|(?:\binstitut)|(?:\bnation)|(?:\backnowledg)|(?:\bcomment)|(?:\bcontribut)|(?:\bfund)|(?:\bgrate)|(?:\bprovid)|(?:\bproject)|(?:\bpossibl)|(?:\bscienc)|(?:author)|(?:grant)|(?:fellowship)|(?:program)|(?:suggest)|(?:université)|(?:valuabl)|(?:without)|(?:pmc articles)|(?:oceans consortium)|(?:anonymous reviewers)|(?:article contribution)|(?:environment foundation)|(?:people sponsors)|(?:projects? poseidon)|(?:wish thank)|(?:commitment following)|(?:continuous support)|(?:data analysis)|(?:exist without)|(?:keen thank)|(?:possible thanks)|(?:sponsors made)|(?:technical assistance)|(?:thank commitment)|(?:without continuous)' +def textacknowledgmentstara(txt, span=20, maxlen=7, + pattern=r'(?:crew)|(?:research)|(?:acknowledgements)|(?:acknowledge)|(?:acknowledg)|(?:assistance)|(?:commitment of)|(?:comments)|(?:particular(?:ly)?)|(?:fellowships?)|(?:authors?)|(?:program(?:s|mmes?)?)|(?:projects?)|(?:institutes?)|(?:sponsors)|(?:\bthanks?\b)|(?:possible)|(?:\bgrant(?:ed|s)?)|(?:\bsampl(?:e[sd]?|ing))|(?:\bsupport(?:ing)?\b)|(?:foundation)|(?:expedition)|(?:anr-)|(?:\bthis work\b)|(?:\bfunded by\b)|(?:\bthis study\b)|(?:following (?:people|individuals|institutions?|organizations?|sponsors))|(?:contribution (?:no|number))|(?:\bwish thanks?\b)|(?:\b23 institutes\b)|(?:\bgrateful(?: in| to| for|ly))|(?:supported (?:by|in))|(?:continuous support)|(?:exist without)|(?:following people)|(?:without continuous support)|(?:part of the)'): + """ + .. function:: textacknowledgments(text, span = 10, maxlen = 5, pattern = (\b|_)(1|2)\d{3,3}(\b|_)) + + Returns the "Reference" section of documents. To find it, it searches for parts of the document that + have a high density of pattern matches. + + .. parameters:: txt,maxlen,pattern + txt: input text. + span: the size of the string in words that the txt is splited + maxlen: the size of the scrolling window over the text in which the density is calculated. + pattern: regular expression that is matched against the lines of the text. By default the pattern matches + year occurences so as to extract sections that look like references. + + + Examples: + + >>> sql("select textacknowledgments('')") + textacknowledgments('') + ------------------ + + """ + + # clean text from \r\n + exp = re.sub('\r\n', '\n', txt) + # dedublicate spaces + exp = reduce_spaces.sub(' ', strip_remove_newlines.sub('', exp)) + + # if text is small, return it + if exp.count(' ') < span * 10: + return exp + + acknowledgments = [] + origwords = exp.split(' ') + words = exp.lower() + words = words.split(' ') + stemed = [] + # for k in words: + # if len(k) > 0: + # stemed.append(porter.stem(k)) + spanedorigtext = [' '.join(origwords[i:i + span]) for i in range(0, len(origwords), span)] + spanedstemtext = [' '.join(words[i:i + span]) for i in range(0, len(words), span)] + reversedtext = iter(spanedstemtext) + results = [] + densities = [] + + for i in xrange(maxlen / 2): + results.append(0) + for i in reversedtext: + count = sum(1 for m in re.finditer(pattern, i)) + if count: + results.append(count) + else: + results.append(0) + + for i in xrange(maxlen / 2): + results.append(0) + + # print len(spanedorigtext), len(spanedstemtext), len(results), len(results)-maxlen/2 - maxlen/2 + + out = 0 + temp = 0 + for i in xrange(maxlen / 2, len(results) - maxlen / 2): + densities.append(sum(results[i - maxlen / 2:i - maxlen / 2 + maxlen]) * 1.0 / maxlen) + + # for cnt, i in enumerate(spanedorigtext): + # print results[maxlen/2+cnt], densities[cnt], i + + threshold = 1 + + paragraphsum = [] + paragraphs = [] + prev = -10 + current = 0 + maxsum = 0 + maxi = 0 + for line in spanedorigtext: + if densities[current] > threshold: + # new paragraph first visit + if (prev + 1) != current: + paragraphsum.append(0) + paragraphs.append([]) + paragraphsum[-1] += results[maxlen / 2 + current] + paragraphs[-1].append(line) + prev = current + current += 1 + + for cnt, paragraph in enumerate(paragraphs): + if paragraphsum[cnt] > maxsum: + maxsum = paragraphsum[cnt] + maxi = cnt + # print '\n'.join(paragraph), paragraphsum[cnt], '\n' + # print '!!!!!!!!', maxsum, maxi + + paragraphsum.append(0) + paragraphs.append([]) + if paragraphsum[maxi] > 2: + return '\n'.join(paragraphs[maxi]) + # return ('\n'.join(paragraphs[maxi]))+" "+str(paragraphsum[maxi]) + else: + return '' + + +textacknowledgmentstara.registered = True + + +if not ('.' in __name__): + """ + This is needed to be able to test the function, put it at the end of every + new function you create + """ + import sys + import setpath + from functions import * + testfunction() + if __name__ == "__main__": + reload(sys) + sys.setdefaultencoding('utf-8') + import doctest + doctest.testmod() diff --git a/interactive-mining-angular-frontend/src/app/configuration/configuration.component.ts b/interactive-mining-angular-frontend/src/app/configuration/configuration.component.ts index 4a333e5..0e1ab5b 100755 --- a/interactive-mining-angular-frontend/src/app/configuration/configuration.component.ts +++ b/interactive-mining-angular-frontend/src/app/configuration/configuration.component.ts @@ -49,6 +49,9 @@ export class ConfigurationComponent implements OnInit, AfterViewInit { if (!localStorage.getItem('stemming') || localStorage.getItem('stemming') === 'undefined') { localStorage.setItem('stemming', '0'); } + if (!localStorage.getItem('documentarea') || localStorage.getItem('documentarea') === 'undefined') { + localStorage.setItem('documentarea', 'full'); + } } promptToLeave(nextUrl: string): boolean { diff --git a/interactive-mining-angular-frontend/src/app/configuration/resultspreview/resultspreview.component.css b/interactive-mining-angular-frontend/src/app/configuration/resultspreview/resultspreview.component.css index e69de29..4ba1a81 100755 --- a/interactive-mining-angular-frontend/src/app/configuration/resultspreview/resultspreview.component.css +++ b/interactive-mining-angular-frontend/src/app/configuration/resultspreview/resultspreview.component.css @@ -0,0 +1,6 @@ +.uk-accordion-title::after { + background-image: url(data:image/svg+xml;charset=utf-8,%3Csvg%20width%3D%2220%22%20height%3D%2220%22%20viewBox%3D%220%200%2020%2020%22%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%3E%20%3Cpolyline%20fill%3D%22none%22%20stroke%3D%22%23000%22%20stroke-width%3D%221.03%22%20points%3D%2216%207%2010%2013%204%207%22%3E%3C%2Fpolyline%3E%3C%2Fsvg%3E); +} +.uk-open>.uk-accordion-title::after { + background-image: url(data:image/svg+xml;charset=utf-8,%3Csvg%20width%3D%2220%22%20height%3D%2220%22%20viewBox%3D%220%200%2020%2020%22%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%3E%20%3Cpolyline%20fill%3D%22none%22%20stroke%3D%22%23000%22%20stroke-width%3D%221.03%22%20points%3D%224%2013%2010%207%2016%2013%22%3E%3C%2Fpolyline%3E%3C%2Fsvg%3E); +} diff --git a/interactive-mining-angular-frontend/src/app/configuration/resultspreview/resultspreview.component.html b/interactive-mining-angular-frontend/src/app/configuration/resultspreview/resultspreview.component.html index c98b835..553f38a 100755 --- a/interactive-mining-angular-frontend/src/app/configuration/resultspreview/resultspreview.component.html +++ b/interactive-mining-angular-frontend/src/app/configuration/resultspreview/resultspreview.component.html @@ -57,10 +57,10 @@
  • -

    {{result.docTitle}}

    +

    document: {{result.docTitle}}

    -
    Match {{match.matchcounter}}: {{match.match}}
    +
    match #{{match.matchcounter}}: {{match.match}}

    {{match.extraprev}} {{match.extranext}}

    diff --git a/interactive-mining-angular-frontend/src/app/configuration/resultspreview/resultspreview.component.ts b/interactive-mining-angular-frontend/src/app/configuration/resultspreview/resultspreview.component.ts index 2b10936..9e7ae69 100755 --- a/interactive-mining-angular-frontend/src/app/configuration/resultspreview/resultspreview.component.ts +++ b/interactive-mining-angular-frontend/src/app/configuration/resultspreview/resultspreview.component.ts @@ -87,6 +87,7 @@ export class ResultspreviewComponent implements OnInit { stopwords: Number.parseInt(localStorage.getItem('stopwords')), lowercase: Number.parseInt(localStorage.getItem('lowercase')), stemming: Number.parseInt(localStorage.getItem('stemming')), + documentarea: localStorage.getItem('documentarea'), }; } diff --git a/interactive-mining-angular-frontend/src/app/configuration/settings/settings.component.css b/interactive-mining-angular-frontend/src/app/configuration/settings/settings.component.css index e69de29..4ba1a81 100755 --- a/interactive-mining-angular-frontend/src/app/configuration/settings/settings.component.css +++ b/interactive-mining-angular-frontend/src/app/configuration/settings/settings.component.css @@ -0,0 +1,6 @@ +.uk-accordion-title::after { + background-image: url(data:image/svg+xml;charset=utf-8,%3Csvg%20width%3D%2220%22%20height%3D%2220%22%20viewBox%3D%220%200%2020%2020%22%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%3E%20%3Cpolyline%20fill%3D%22none%22%20stroke%3D%22%23000%22%20stroke-width%3D%221.03%22%20points%3D%2216%207%2010%2013%204%207%22%3E%3C%2Fpolyline%3E%3C%2Fsvg%3E); +} +.uk-open>.uk-accordion-title::after { + background-image: url(data:image/svg+xml;charset=utf-8,%3Csvg%20width%3D%2220%22%20height%3D%2220%22%20viewBox%3D%220%200%2020%2020%22%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%3E%20%3Cpolyline%20fill%3D%22none%22%20stroke%3D%22%23000%22%20stroke-width%3D%221.03%22%20points%3D%224%2013%2010%207%2016%2013%22%3E%3C%2Fpolyline%3E%3C%2Fsvg%3E); +} diff --git a/interactive-mining-angular-frontend/src/app/configuration/settings/settings.component.html b/interactive-mining-angular-frontend/src/app/configuration/settings/settings.component.html index d1909d0..366a07f 100755 --- a/interactive-mining-angular-frontend/src/app/configuration/settings/settings.component.html +++ b/interactive-mining-angular-frontend/src/app/configuration/settings/settings.component.html @@ -27,7 +27,7 @@ {{positivePhrasesArray.length}} phrase{{positivePhrasesArray.length===1?'':'s'}}
    -

    Add phrases that are very likely to be near a match. You can use different weights to divide between important and less important phrases.

    +

    Add phrases that are very likely to be near a match. You can use different weights to divide between important and less important phrases. Phrase can be a valid Regular expression

    @@ -71,7 +71,7 @@ {{negativePhrasesArray.length}} phrase{{negativePhrasesArray.length===1?'':'s'}}
    -

    Add negative phrases. If these phrases are found around the match, this is possibly a false possitive. You can use different weights to assign importance.

    +

    Add negative phrases. If these phrases are found around the match, this is possibly a false possitive. You can use different weights to assign importance. Phrase can be a valid Regular expression

    @@ -135,6 +135,29 @@
  • +
  • +
    + Document section + acknowledgment + citations +
    +
    +

    Select the part of the document that will be processed.

    +
    +
    +
    + +
    +
    + +
    +
    + +
    +
    +
    +
    +
  • Mining area size diff --git a/interactive-mining-angular-frontend/src/app/configuration/settings/settings.component.ts b/interactive-mining-angular-frontend/src/app/configuration/settings/settings.component.ts index 5658fff..6766931 100755 --- a/interactive-mining-angular-frontend/src/app/configuration/settings/settings.component.ts +++ b/interactive-mining-angular-frontend/src/app/configuration/settings/settings.component.ts @@ -68,6 +68,7 @@ export class SettingsComponent implements OnInit { stopwords: Number.parseInt(localStorage.getItem('stopwords')), lowercase: Number.parseInt(localStorage.getItem('lowercase')), stemming: Number.parseInt(localStorage.getItem('stemming')), + documentarea: localStorage.getItem('documentarea'), }; // show positive phrases this.positivePhrasesArray.length = 0; @@ -225,6 +226,11 @@ export class SettingsComponent implements OnInit { this.settings.stemming = value ? 1 : 0; } + documentAreaChange(value: string): void { + localStorage.setItem('documentarea', value); + this.settings.documentarea = value; + } + getSettingsFromLocalStorage(): Settings { return this.settings = { docname: localStorage.getItem('docname'), @@ -239,6 +245,7 @@ export class SettingsComponent implements OnInit { stopwords: Number.parseInt(localStorage.getItem('stopwords')), lowercase: Number.parseInt(localStorage.getItem('lowercase')), stemming: Number.parseInt(localStorage.getItem('stemming')), + documentarea: localStorage.getItem('documentarea') }; } diff --git a/interactive-mining-angular-frontend/src/app/configuration/settings/settings.ts b/interactive-mining-angular-frontend/src/app/configuration/settings/settings.ts index 6f18162..babf185 100755 --- a/interactive-mining-angular-frontend/src/app/configuration/settings/settings.ts +++ b/interactive-mining-angular-frontend/src/app/configuration/settings/settings.ts @@ -11,4 +11,5 @@ export interface Settings { stopwords: number; lowercase: number; stemming: number; + documentarea: string; } diff --git a/interactive-mining-angular-frontend/src/app/contents/contents.component.html b/interactive-mining-angular-frontend/src/app/contents/contents.component.html index 82f3a95..e8d9aa5 100755 --- a/interactive-mining-angular-frontend/src/app/contents/contents.component.html +++ b/interactive-mining-angular-frontend/src/app/contents/contents.component.html @@ -2,7 +2,7 @@
    -

    Use the table or the upload form to add your concepts

    +

    Use the table or the upload form to add your concepts

    diff --git a/interactive-mining-angular-frontend/src/app/contents/contents.component.ts b/interactive-mining-angular-frontend/src/app/contents/contents.component.ts index 81e134b..7b3ae02 100755 --- a/interactive-mining-angular-frontend/src/app/contents/contents.component.ts +++ b/interactive-mining-angular-frontend/src/app/contents/contents.component.ts @@ -1,6 +1,6 @@ import {Component, OnInit, ViewChildren} from '@angular/core'; import UIkit from 'uikit'; -import {ActivatedRoute, Router} from '@angular/router'; +import {ActivatedRoute, NavigationEnd, Router} from '@angular/router'; import {Content} from './content'; import {ContentsService} from './contents.service'; @@ -21,6 +21,12 @@ export class ContentComponent implements OnInit { ngOnInit() { this.getContent(); + this.router.events.subscribe((evt) => { + if (!(evt instanceof NavigationEnd)) { + return; + } + window.scrollTo(0, 0); + }); } getContent(): void { diff --git a/interactive-mining-angular-frontend/src/app/manageprofiles/manageprofiles.component.ts b/interactive-mining-angular-frontend/src/app/manageprofiles/manageprofiles.component.ts index 53dd469..cd95d5f 100755 --- a/interactive-mining-angular-frontend/src/app/manageprofiles/manageprofiles.component.ts +++ b/interactive-mining-angular-frontend/src/app/manageprofiles/manageprofiles.component.ts @@ -86,6 +86,7 @@ export class ManageprofilesComponent implements OnInit { localStorage.setItem('stopwords', res.stopwords); localStorage.setItem('lowercase', res.lowercase); localStorage.setItem('stemming', res.stemming); + localStorage.setItem('documentarea', res.documentarea); this.router.navigate(['../upload-content'], {relativeTo: this.route, queryParamsHandling: 'preserve'}); }); } @@ -133,6 +134,7 @@ export class ManageprofilesComponent implements OnInit { localStorage.setItem('stopwords', res.stopwords); localStorage.setItem('lowercase', res.lowercase); localStorage.setItem('stemming', res.stemming); + localStorage.setItem('documentarea', res.documentarea); this.router.navigate(['../upload-content'], {relativeTo: this.route, queryParamsHandling: 'preserve'}); }); } @@ -180,6 +182,7 @@ export class ManageprofilesComponent implements OnInit { localStorage.setItem('stopwords', res.stopwords); localStorage.setItem('lowercase', res.lowercase); localStorage.setItem('stemming', res.stemming); + localStorage.setItem('documentarea', res.documentarea); this.router.navigate(['../upload-content'], {relativeTo: this.route, queryParamsHandling: 'preserve'}); }); } @@ -210,6 +213,7 @@ export class ManageprofilesComponent implements OnInit { localStorage.setItem('stopwords', res.stopwords); localStorage.setItem('lowercase', res.lowercase); localStorage.setItem('stemming', res.stemming); + localStorage.setItem('documentarea', res.documentarea); this.router.navigate(['../upload-content'], {relativeTo: this.route, queryParamsHandling: 'preserve'}); }); } @@ -232,6 +236,7 @@ export class ManageprofilesComponent implements OnInit { localStorage.removeItem('stopwords'); localStorage.removeItem('lowercase'); localStorage.removeItem('stemming'); + localStorage.removeItem('documentarea'); } } diff --git a/interactive-mining-angular-frontend/src/app/manageprofiles/profile-data.ts b/interactive-mining-angular-frontend/src/app/manageprofiles/profile-data.ts index 0979ce4..3c11b63 100755 --- a/interactive-mining-angular-frontend/src/app/manageprofiles/profile-data.ts +++ b/interactive-mining-angular-frontend/src/app/manageprofiles/profile-data.ts @@ -12,4 +12,5 @@ export interface ProfileData { stopwords: string; lowercase: string; stemming: string; + documentarea: string; } diff --git a/interactive-mining-angular-frontend/src/app/util.ts b/interactive-mining-angular-frontend/src/app/util.ts index 4c2f4dc..ad462ec 100755 --- a/interactive-mining-angular-frontend/src/app/util.ts +++ b/interactive-mining-angular-frontend/src/app/util.ts @@ -3,6 +3,7 @@ import {HttpErrorResponse} from '@angular/common/http'; import { saveAs } from 'file-saver/FileSaver'; import { Response } from '@angular/http'; import {ErrorObservable} from 'rxjs/observable/ErrorObservable'; +import { environment } from '../environments/environment'; import {Observable} from 'rxjs'; export class Util { @@ -28,7 +29,8 @@ export class Util { } public getBackendServerAddress(): string { - return localStorage.getItem('mining_backend_address'); + // return localStorage.getItem('mining_backend_address'); + return environment.miningbackendserveraddress; } public getIsCommunityManager(): string { diff --git a/interactive-mining-angular-frontend/src/environments/environment.prod.ts b/interactive-mining-angular-frontend/src/environments/environment.prod.ts index 3612073..a21d54d 100755 --- a/interactive-mining-angular-frontend/src/environments/environment.prod.ts +++ b/interactive-mining-angular-frontend/src/environments/environment.prod.ts @@ -1,3 +1,4 @@ export const environment = { - production: true + production: true, + miningbackendserveraddress: 'https://beta.services.openaire.eu/interactive-mining' }; diff --git a/interactive-mining-angular-frontend/src/environments/environment.ts b/interactive-mining-angular-frontend/src/environments/environment.ts index b7f639a..0171044 100755 --- a/interactive-mining-angular-frontend/src/environments/environment.ts +++ b/interactive-mining-angular-frontend/src/environments/environment.ts @@ -4,5 +4,6 @@ // The list of which env maps to which file can be found in `.angular-cli.json`. export const environment = { - production: false + production: false, + miningbackendserveraddress: 'http://localhost:8080' }; diff --git a/interactive-mining-backend/madoap/src/madserverv3.py b/interactive-mining-backend/madoap/src/madserverv3.py index 1c83cd5..2aa00d7 100755 --- a/interactive-mining-backend/madoap/src/madserverv3.py +++ b/interactive-mining-backend/madoap/src/madserverv3.py @@ -387,7 +387,7 @@ class InitialClientHandshakeHandler(BaseHandler): self.set_status(400) self.write("Missing arguement community id.") return - community_id = self.request.arguments['communityId'][0] + community_id = self.request.arguments['communityId'][0][:128] import sys sys.path.append(msettings.MADIS_PATH) import madis @@ -398,17 +398,17 @@ class InitialClientHandshakeHandler(BaseHandler): # get the database cursor cursor=madis.functions.Connection(database_file_name).cursor() # Create database table - cursor.execute("drop table if exists community", parse=False) - cursor.execute("create table community(id)", parse=False) - cursor.execute('INSERT INTO community VALUES("{0}")'.format(community_id), parse=False) - cursor.execute("drop table if exists database", parse=False) - cursor.execute("create table database(id,name,datecreated,status,matches,docname,docsnumber)", parse=False) + cursor.execute('''DROP TABLE IF EXISTS community''', parse=False) + cursor.execute('''CREATE TABLE community(id)''', parse=False) + cursor.execute('''INSERT INTO community VALUES(?)''', (community_id,), parse=False) + cursor.execute('''DROP TABLE IF EXISTS database''', parse=False) + cursor.execute('''CREATE TABLE database(id,name,datecreated,status,matches,docname,docsnumber)''', parse=False) cursor.close() else: cursor=madis.functions.Connection(database_file_name).cursor() - cursor.execute("drop table if exists community", parse=False) - cursor.execute("create table community(id)", parse=False) - cursor.execute('INSERT INTO community VALUES("{0}")'.format(community_id), parse=False) + cursor.execute('''DROP TABLE IF EXISTS community''', parse=False) + cursor.execute('''CREATE TABLE community(id)''', parse=False) + cursor.execute('''INSERT INTO community VALUES(?)''', (community_id,), parse=False) cursor.close() else: self.set_status(400) @@ -444,7 +444,8 @@ class GetUsersProfilesHandler(BaseHandler): self.write("Must be an admin") return # list users - users = [re.search('OAMiningProfilesDatabase_([w0-9]+).+', f).group(1) for f in os.listdir('./users_files') if re.match(r'OAMiningProfilesDatabase_[w0-9]+\.db', f)] + users = [re.search('OAMiningProfilesDatabase_([\\w0-9]+).+', f).group(1) for f in os.listdir('./users_files') if re.match(r'OAMiningProfilesDatabase_[\w0-9]+\.db', f)] + print users # for every user, read its database to find his profiles import sys sys.path.append(msettings.MADIS_PATH) @@ -462,11 +463,11 @@ class GetUsersProfilesHandler(BaseHandler): cursor=madis.functions.Connection(database_file_name).cursor() try: # get community id - community_id = [r for r in cursor.execute("SELECT id FROM community")][0] + community_id = [r for r in cursor.execute('''SELECT id FROM community''')][0] except Exception as ints: print ints community_id = 'Unkown '+user - for r in cursor.execute("SELECT id,name,datecreated,status,matches,docname FROM database order by rowid desc"): + for r in cursor.execute('''SELECT id,name,datecreated,status,matches,docname FROM database order by rowid desc'''): users_profiles.append({"user":community_id,"userId":user,"profileId":r[0], "profile": r[1], "datecreated": r[2], "status": r[3], "matches": r[4], "docname": r[5]}) data['profiles'] = users_profiles self.write(json.dumps(data)) @@ -502,8 +503,8 @@ class UpdateProfileStatusHandler(BaseHandler): import sys sys.path.append(msettings.MADIS_PATH) import madis - user = request_arguments['user'] - profile_id = request_arguments['id'] + user = request_arguments['user'][:128] + profile_id = request_arguments['id'][:128] database_file_name = "users_files/OAMiningProfilesDatabase_{0}.db".format(user) if not os.path.isfile(database_file_name): self.set_status(400) @@ -512,7 +513,7 @@ class UpdateProfileStatusHandler(BaseHandler): cursor=madis.functions.Connection(database_file_name).cursor() # Write new Profile status to users database status = request_arguments['status'] - cursor.execute('UPDATE database set status="{1}" where id="{0}"'.format(profile_id,status), parse=False) + cursor.execute('''UPDATE database set status=? where id=?''', (profile_id,status,), parse=False) cursor.close() self.write(json.dumps({})) self.finish() @@ -543,7 +544,7 @@ class GetUserProfilesHandler(BaseHandler): self.set_status(400) self.write("Missing user's id parameter") return - user_id = self.request.arguments['user'][0] + user_id = self.request.arguments['user'][0][:128] # extract data from database import sys sys.path.append(msettings.MADIS_PATH) @@ -559,7 +560,7 @@ class GetUserProfilesHandler(BaseHandler): # data to be sent data = {} user_profiles = [] - for r in cursor.execute("SELECT id,name,datecreated,status,matches,docname FROM database order by rowid desc"): + for r in cursor.execute('''SELECT id,name,datecreated,status,matches,docname FROM database order by rowid desc'''): user_profiles.append({"id":r[0], "name": r[1], "datecreated": r[2], "status": r[3], "matches": r[4], "docname": r[5]}) data['profiles'] = user_profiles cursor.close() @@ -593,13 +594,13 @@ class LoadUserProfileHandler(BaseHandler): self.set_status(400) self.write("Missing user's id argument") return - user_id = request_arguments['user'] + user_id = request_arguments['user'][:128] # get data if 'id' not in request_arguments or request_arguments['id'] == '': self.set_status(400) self.write("Missing profiles id argument") return - profile_id = request_arguments['id'] + profile_id = request_arguments['id'][:128] # delete profile from database import sys sys.path.append(msettings.MADIS_PATH) @@ -609,7 +610,7 @@ class LoadUserProfileHandler(BaseHandler): # get the database cursor cursor=madis.functions.Connection(database_file_name).cursor() # check if this profile exists - profile_data = [r for r in cursor.execute('select docname,docsnumber from database where id="{0}"'.format(profile_id))] + profile_data = [r for r in cursor.execute('''SELECT docname,docsnumber FROM database WHERE id=?''', (profile_id,))] if len(profile_data) == 0: self.set_status(400) self.write("There is no profile with this name") @@ -658,13 +659,13 @@ class DeleteUserProfileHandler(BaseHandler): self.set_status(400) self.write("Missing user's id argument") return - user_id = request_arguments['user'] + user_id = request_arguments['user'][:128] # get data if 'id' not in request_arguments or request_arguments['id'] == '': self.set_status(400) self.write("Missing profiles id argument") return - profile_id = request_arguments['id'] + profile_id = request_arguments['id'][:128] # delete profile from database import sys sys.path.append(msettings.MADIS_PATH) @@ -674,7 +675,7 @@ class DeleteUserProfileHandler(BaseHandler): # get the database cursor cursor=madis.functions.Connection(database_file_name).cursor() # data to be sent - cursor.execute('delete from database where id="{0}"'.format(profile_id), parse=False) + cursor.execute('''DELETE FROM database WHERE id=?''',(profile_id,), parse=False) cursor.close() # delete profile from disk file_name = "users_files/OAMiningProfile_%s_%s.oamp" % (user_id,profile_id) @@ -745,7 +746,7 @@ class CreateNewProfileHandler(BaseHandler): self.set_status(400) self.write("Missing user's id parameter") return - user_id = self.request.arguments['user'][0] + user_id = self.request.arguments['user'][0][:128] deleteAllUserFiles(user_id) self.write(json.dumps({})) self.finish() @@ -777,13 +778,13 @@ class LoadExampleProfileHandler(BaseHandler): self.set_status(400) self.write("Missing user's id parameter") return - user_id = request_arguments['user'] + user_id = request_arguments['user'][:128] # get data if 'name' not in request_arguments or request_arguments['name'] == '': self.set_status(400) self.write("Missing example profiles name parameter") return - example_name = request_arguments['name'] + example_name = request_arguments['name'][:128] # reset everything deleteAllUserFiles(user_id) data = {} @@ -825,9 +826,9 @@ class UploadProfileHandler(BaseHandler): self.set_status(400) self.write("Missing user's id parameter") return - user_id = self.request.arguments['user'][0] + user_id = self.request.arguments['user'][0][:128] # get file info and body from post data - fileinfo = self.request.files['upload'][0] + fileinfo = self.request.files['upload'][0][:128] fname = fileinfo['filename'] extn = os.path.splitext(fname)[1] # must be .pdf or .json @@ -869,7 +870,7 @@ class AlreadyConceptsHandler(BaseHandler): self.set_status(400) self.write("Missing user's id parameter") return - user_id = self.request.arguments['user'][0] + user_id = self.request.arguments['user'][0][:128] data = {} data['data'] = {} file_name = "users_files/p%s.tsv" % (user_id) @@ -917,9 +918,9 @@ class UploadContentFileHandler(BaseHandler): self.set_status(400) self.write("Missing user's id parameter") return - user_id = self.request.arguments['user'][0] + user_id = self.request.arguments['user'][0][:128] # get file info and body from post data - fileinfo = self.request.files['upload'][0] + fileinfo = self.request.files['upload'][0][:128] fname = fileinfo['filename'] extn = os.path.splitext(fname)[1] # must be .pdf or .json @@ -975,7 +976,7 @@ class UpdateConceptsHandler(BaseHandler): self.set_status(400) self.write("Missing user's id argument") return - user_id = request_arguments['user'] + user_id = request_arguments['user'][:128] # get data concepts = json.loads(json.loads(self.request.body)['concepts']) # write data to physical file @@ -1026,7 +1027,7 @@ class GetDocSamplesHandler(BaseHandler): self.set_status(400) self.write("Missing user's id parameter") return - user_id = self.request.arguments['user'][0] + user_id = self.request.arguments['user'][0][:128] data = {} doc_samples = [] doc_samples.append({'name': 'Egi', 'documents': 104}) @@ -1066,8 +1067,8 @@ class UploadDocumentsHandler(BaseHandler): self.set_status(400) self.write("Missing user's id parameter") return - user_id = self.request.arguments['user'][0] - fileinfo = self.request.files['upload'][0] + user_id = self.request.arguments['user'][0][:128] + fileinfo = self.request.files['upload'][0][:128] fname = fileinfo['filename'] extn = os.path.splitext(fname)[1] # data to be sent @@ -1151,12 +1152,12 @@ class ChooseDocSampleHandler(BaseHandler): self.set_status(400) self.write("Missing user's id argument") return - user_id = request_arguments['user'] + user_id = request_arguments['user'][:128] if 'docsample' not in request_arguments or request_arguments['docsample'] == '': self.set_status(400) self.write("A doc sample name must be provided") return - doc_sample = request_arguments['docsample'] + doc_sample = request_arguments['docsample'][:128] sample_file_name = "" if doc_sample == "Egi": sample_file_name = "static/egi_sample.tsv" @@ -1218,7 +1219,7 @@ class AlreadyDocumentsHandler(BaseHandler): self.set_status(400) self.write("Missing user's id parameter") return - user_id = self.request.arguments['user'][0] + user_id = self.request.arguments['user'][0][:128] data = {} if msettings.RESET_FIELDS == 1: data['data'] = -1 @@ -1258,7 +1259,7 @@ class RunMiningHandler(BaseHandler): self.set_status(400) self.write("Missing user's id argument") return - user_id = request_arguments['user'] + user_id = request_arguments['user'][:128] mining_parameters = request_arguments['parameters'] # get the database cursor cursor=msettings.Connection.cursor() @@ -1270,7 +1271,7 @@ class RunMiningHandler(BaseHandler): contextprev = 10 contextnext = 5 # Automatically find middle size from grant codes white spaces - querygrantsize = "select max(p1) from (select regexpcountwords('\s',stripchars(p1)) as p1 from (setschema 'p1,p2' file 'users_files/p{0}.tsv' dialect:tsv))".format(user_id) + querygrantsize = '''SELECT max(p1) FROM (SELECT regexpcountwords('\s',stripchars(p1)) AS p1 FROM (setschema 'p1,p2' file 'users_files/p{0}.tsv' dialect:tsv))'''.format(user_id) contextmiddle = [r for r in cursor.execute(querygrantsize)][0][0]+1 if 'contextprev' in mining_parameters and mining_parameters['contextprev'] != '': contextprev = int(mining_parameters['contextprev']) @@ -1357,8 +1358,16 @@ class RunMiningHandler(BaseHandler): whr_conf = 'and conf>=0' print conf + # docs proccess if numberOfDocsUploaded(user_id) != 0: - doc_filters = "comprspaces(regexpr('[\n|\r]',d2,' '))" + document_source = 'd2' + if 'documentarea' in mining_parameters and mining_parameters['documentarea'] != '': + print mining_parameters['documentarea'] + if mining_parameters['documentarea'] == 'acknowledgment': + document_source = 'textacknowledgments('+document_source+')' + elif mining_parameters['documentarea'] == 'citations': + document_source = 'textreferences('+document_source+')' + doc_filters = "comprspaces(regexpr('[\n|\r]',"+document_source+",' '))" grant_filters = "stripchars(comprspaces(regexpr(\"\\'\", p1,'')))" ackn_filters = "comprspaces(regexpr(\"\\'\", p2,''))" if 'punctuation' in mining_parameters and mining_parameters['punctuation'] == 1: @@ -1380,9 +1389,9 @@ class RunMiningHandler(BaseHandler): list(cursor.execute("drop table if exists grantstemp"+user_id, parse=False)) query_pre_grants = "create temp table grantstemp{0} as select {1} as gt1, case when p2 is null then null else {2} end as gt2 from (setschema 'p1,p2' file 'users_files/p{0}.tsv' dialect:tsv)".format(user_id, grant_filters, ackn_filters) cursor.execute(query_pre_grants) - query00get = "select * from grantstemp{0}".format(user_id) - results00get = [r for r in cursor.execute(query00get)] - print results00get + # query00get = "select * from grantstemp{0}".format(user_id) + # results00get = [r for r in cursor.execute(query00get)] + # print results00get list(cursor.execute("drop table if exists docs"+user_id, parse=False)) query1 = "create temp table docs{0} as select d1, {1} as d2 from (setschema 'd1,d2' select jsonpath(c1, '$.id', '$.text') from (file 'users_files/docs{0}.json'))".format(user_id, doc_filters) cursor.execute(query1) @@ -1391,6 +1400,7 @@ class RunMiningHandler(BaseHandler): self.write("You have to provide atleast 1 document...") return + # grants proccess list(cursor.execute("drop table if exists grants"+user_id, parse=False)) # string concatenation workaround because of the special characters conflicts if 'wordssplitnum' in mining_parameters and mining_parameters['wordssplitnum'] != '': @@ -1465,7 +1475,7 @@ class PrepareSavedProfileHandler(BaseHandler): self.set_status(400) self.write("Missing user's id argument") return - user_id = request_arguments['user'] + user_id = request_arguments['user'][:128] profile_parameters = request_arguments['parameters'] import sys sys.path.append(msettings.MADIS_PATH) @@ -1475,21 +1485,21 @@ class PrepareSavedProfileHandler(BaseHandler): profile_file_name = "users_files/OAMiningProfile_{0}.oamp".format(user_id) cursor=madis.functions.Connection(profile_file_name).cursor() # Create poswords table - cursor.execute("drop table if exists poswords", parse=False) - cursor.execute("create table poswords(c1,c2)", parse=False) + cursor.execute('''DROP TABLE IF EXISTS poswords''', parse=False) + cursor.execute('''CREATE TABLE poswords(c1,c2)''', parse=False) # Create negwords table - cursor.execute("drop table if exists negwords", parse=False) - cursor.execute("create table negwords(c1,c2)", parse=False) + cursor.execute('''DROP TABLE IF EXISTS negwords''', parse=False) + cursor.execute('''CREATE TABLE negwords(c1,c2)''', parse=False) # Create filters table - cursor.execute("drop table if exists filters", parse=False) - cursor.execute("create table filters(c1,c2)", parse=False) + cursor.execute('''DROP TABLE IF EXISTS filters''', parse=False) + cursor.execute('''CREATE TABLE filters(c1,c2)''', parse=False) # Create grants table - cursor.execute("drop table if exists grants", parse=False) - cursor.execute("create table grants(c1,c2)", parse=False) + cursor.execute('''DROP TABLE IF EXISTS grants''', parse=False) + cursor.execute('''CREATE TABLE grants(c1,c2)''', parse=False) if 'poswords' in profile_parameters and profile_parameters['poswords'] != '{}': # construct math string for positive words matching calculation with weights pos_words = json.loads(profile_parameters['poswords']) - cursor.executemany("insert into poswords(c1,c2) values(?,?)", + cursor.executemany('''INSERT INTO poswords(c1,c2) VALUES(?,?)''', ( (key, value,) for key, value in pos_words.iteritems() ) @@ -1497,7 +1507,7 @@ class PrepareSavedProfileHandler(BaseHandler): if 'negwords' in profile_parameters and profile_parameters['negwords'] != '{}': # construct math string for negative words matching calculation with weights neg_words = json.loads(profile_parameters['negwords']) - cursor.executemany("insert into negwords(c1,c2) values(?,?)", + cursor.executemany('''INSERT INTO negwords(c1,c2) VALUES(?,?)''', ( (key, value,) for key, value in neg_words.iteritems() ) @@ -1517,13 +1527,16 @@ class PrepareSavedProfileHandler(BaseHandler): filters['punctuation'] = profile_parameters['punctuation'] if 'stemming' in profile_parameters and profile_parameters['stemming'] != '': filters['stemming'] = profile_parameters['stemming'] - cursor.executemany("insert into filters(c1,c2) values(?,?)", + if 'documentarea' in profile_parameters and profile_parameters['documentarea'] != '': + filters['documentarea'] = profile_parameters['documentarea'] + cursor.executemany('''INSERT INTO filters(c1,c2) VALUES(?,?)''', ( (key, value,) for key, value in filters.iteritems() ) ) if numberOfGrantsUploaded(user_id, request_arguments['concepts']) != 0: - cursor.execute("insert into grants select stripchars(c1) as c1, stripchars(c2) as c2 from (file 'users_files/p{0}.tsv')".format(user_id)) + # cursor.execute('''VAR 'currprofile' VALUES(?)''', ('users_files/p{0}.tsv'.format(user_id),)) + cursor.execute('''INSERT INTO grants SELECT stripchars(c1) as c1, stripchars(c2) as c2 FROM (file 'users_files/p{0}.tsv')'''.format(user_id)) cursor.close() data = {} @@ -1559,11 +1572,11 @@ class SaveProfileToDatabaseHandler(BaseHandler): self.set_status(400) self.write("Missing user's id argument") return - user_id = request_arguments['user'] + user_id = request_arguments['user'][:128] # get data - profile_id = request_arguments['id'] - profile_name = request_arguments['name'] - doc_name = request_arguments['docname'] + profile_id = request_arguments['id'][:128] + profile_name = request_arguments['name'][:128] + doc_name = request_arguments['docname'][:128] docs_number = request_arguments['docsnumber'] # copy profile file to a unique user profile file profile_file_name = "users_files/OAMiningProfile_{0}.oamp".format(user_id) @@ -1590,10 +1603,9 @@ class SaveProfileToDatabaseHandler(BaseHandler): cursor=madis.functions.Connection(database_file_name).cursor() user_profiles = [] if old_profile: - query = 'UPDATE database set datecreated="{2}", status="{3}", matches="{4}", docname="{5}", docsnumber="{6}" where id="{0}"'.format(profile_id,profile_name,datetime.date.today().strftime("%B %d %Y"),"Ready","8/8",doc_name,docs_number) + cursor.execute('''UPDATE database SET datecreated=?, status=?, matches=?, docname=?, docsnumber=? WHERE id=?''', (datetime.date.today().strftime("%B %d %Y"),"Ready","8/8",doc_name,docs_number,profile_id), parse=False) else: - query = 'INSERT INTO database VALUES("{0}","{1}","{2}","{3}","{4}","{5}","{6}")'.format(profile_id,profile_name,datetime.date.today().strftime("%B %d %Y"),"Saved","8/8",doc_name,docs_number) - cursor.execute(query, parse=False) + cursor.execute('''INSERT INTO database VALUES(?,?,?,?,?,?,?)''', (profile_id,profile_name,datetime.date.today().strftime("%B %d %Y"),"Saved","8/8",doc_name,docs_number,), parse=False) cursor.close() self.write(json.dumps({})) self.finish() @@ -1625,8 +1637,8 @@ class DownloadProfileHandler(BaseHandler): self.set_status(400) self.write("Missing user's id argument") return - user_id = request_arguments['user'] - profile_id = request_arguments['id'] + user_id = request_arguments['user'][:128] + profile_id = request_arguments['id'][:128] unique_profile_file_name = "users_files/OAMiningProfile_{0}_{1}.oamp".format(user_id,profile_id) buf_size = 4096 self.set_header('Content-Type', 'application/octet-stream') diff --git a/interactive-mining-backend/madoap/src/static/database.db b/interactive-mining-backend/madoap/src/static/database.db index c23b22eb71252f8dbdb9be63b6066af6d8097a95..efb3ed1d861a3fd6e27d1080f84f895a999e1c94 100755 GIT binary patch delta 19 acmZo@U}