Final Deliverable
This commit is contained in:
parent
efa56a2bb2
commit
009cb1e368
|
@ -50,8 +50,8 @@
|
|||
<div id="cm-run-test-section">
|
||||
<div class="uk-display-inline">
|
||||
<button id="run-mining-btn" class="uk-button cm-run-test-button" [disabled]="documentsLoaded<1" (click)="runMining()">Run rules test</button>
|
||||
<span *ngIf="matches_number>0" id="results-number" class="cm-coloured-text uk-margin-left uk-text-bottom">{{matches_number}} matche{{matches_number===1?'':'s'}} found</span>
|
||||
<span *ngIf="prev_matches_number>0" id="results-number-previous" class="cm-text-muted uk-text-bottom">, {{prev_matches_number}} matche{{prev_matches_number===1?'':'s'}} found previously</span>
|
||||
<span *ngIf="matches_number !== ''" id="results-number" class="cm-coloured-text uk-margin-left uk-text-bottom">{{matches_number}} matche{{matches_number===1?'':'s'}} found</span>
|
||||
<span *ngIf="prev_matches_number !== ''" id="results-number-previous" class="cm-text-muted uk-text-bottom">, {{prev_matches_number}} matche{{prev_matches_number===1?'':'s'}} found previously</span>
|
||||
</div>
|
||||
</div>
|
||||
<div id="results-section" class="cm-results-rows">
|
||||
|
|
|
@ -152,7 +152,6 @@ export class ResultspreviewComponent implements OnInit {
|
|||
animation: "uk-animation-slide-top",
|
||||
bottom: ".cm-results-section"
|
||||
});
|
||||
console.log(res.matches.length);
|
||||
this.resultsArray.length = 0;
|
||||
let matchcounter = 0;
|
||||
for (let title in res.matches) {
|
||||
|
@ -169,17 +168,17 @@ export class ResultspreviewComponent implements OnInit {
|
|||
match.extranext = values.extranext;
|
||||
let context = values.prev + ' ' + values.middle + ' ' + values.next;
|
||||
// hightlight positive words
|
||||
for (let posword of JSON.parse(localStorage.getItem('poswords'))) {
|
||||
for (let posword in JSON.parse(localStorage.getItem('poswords'))) {
|
||||
const search_regexp = new RegExp(posword, 'g');
|
||||
context = context.replace(search_regexp, '<span class="positive">' + posword + '</span>');
|
||||
}
|
||||
// hightlight acknowledgment keywords
|
||||
for (let ackn of values.acknmatch) {
|
||||
const search_regexp = new RegExp(ackn, 'g');
|
||||
const search_regexp = new RegExp(ackn.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, '\\$&'), 'g');
|
||||
context = context.replace(search_regexp, '<span class="positive">' + ackn + '</span>');
|
||||
}
|
||||
// hightlight negative words
|
||||
for (let negword of JSON.parse(localStorage.getItem('negwords'))) {
|
||||
for (let negword in JSON.parse(localStorage.getItem('negwords'))) {
|
||||
const search_regexp = new RegExp(negword, 'g');
|
||||
context = context.replace(search_regexp, '<span class="negative">' + negword + '</span>');
|
||||
}
|
||||
|
@ -189,10 +188,10 @@ export class ResultspreviewComponent implements OnInit {
|
|||
matchesArray.push(match);
|
||||
}
|
||||
this.resultsArray.push(resultClass);
|
||||
this.prev_matches_number = this.matches_number;
|
||||
this.matches_number = matchcounter + '';
|
||||
}
|
||||
}
|
||||
this.prev_matches_number = this.matches_number;
|
||||
this.matches_number = matchcounter + '';
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -111,30 +111,6 @@
|
|||
</div>
|
||||
</div>
|
||||
</li>
|
||||
<li>
|
||||
<div class="uk-accordion-title">
|
||||
<span class="uk-text-bold uk-text-uppercase">Mining area size</span>
|
||||
<span class="uk-text-small uk-margin-small-left">before: {{settings.contextprev}}</span>
|
||||
<span class="uk-text-small uk-margin-small-left">after: {{settings.contextnext}}</span>
|
||||
</div>
|
||||
<div class="uk-accordion-content">
|
||||
<p class="uk-text-small">You may edit the length of the text area that the algorithm uses to decide if a match is a true positive.<br>This is mainly for advanced users, so you ‘ll rarely need to change these values.</p>
|
||||
<form class="">
|
||||
<div class="cm-match-area left">
|
||||
<label class="uk-form-label" for="context-prev-words">Number of words before the match</label>
|
||||
<div class="uk-form-controls">
|
||||
<input class="uk-input" type="number" name="context-prev-words" min="0" max="20" id="context-prev-words" placeholder="Before match, words" value="10" [value]="settings.contextprev" (change)="contextprevChange($event.target.value)"/>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cm-match-area right cm-margin-medium-bottom">
|
||||
<label class="uk-form-label" for="context-next-words">Number of words after the match</label>
|
||||
<div class="uk-form-controls">
|
||||
<input class="uk-input" type="number" name="context-next-words" min="0" max="20" id="context-next-words" placeholder="After match, words" value="5" [value]="settings.contextnext" (change)="contextnextChange($event.target.value)"/>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</li>
|
||||
<li>
|
||||
<div class="uk-accordion-title">
|
||||
<span class="uk-text-bold uk-text-uppercase">Text preprocessing</span>
|
||||
|
@ -159,6 +135,30 @@
|
|||
</form>
|
||||
</div>
|
||||
</li>
|
||||
<li>
|
||||
<div class="uk-accordion-title">
|
||||
<span class="uk-text-bold uk-text-uppercase">Mining area size</span>
|
||||
<span *ngIf="settings.contextprev !== 10" class="uk-text-small uk-margin-small-left">before: {{settings.contextprev}}</span>
|
||||
<span *ngIf="settings.contextnext !== 20" class="uk-text-small uk-margin-small-left">after: {{settings.contextnext}}</span>
|
||||
</div>
|
||||
<div class="uk-accordion-content">
|
||||
<p class="uk-text-small">You may edit the length of the text area that the algorithm uses to decide if a match is a true positive.<br><span class="uk-text-danger">This is mainly for advanced users, so you ‘ll rarely need to change these values.</span></p>
|
||||
<form class="">
|
||||
<div class="cm-match-area left">
|
||||
<label class="uk-form-label" for="context-prev-words">Number of words before the match</label>
|
||||
<div class="uk-form-controls">
|
||||
<input class="uk-input" type="number" name="context-prev-words" min="0" max="20" id="context-prev-words" placeholder="Before match, words" value="10" [value]="settings.contextprev" (change)="contextprevChange($event.target.value)"/>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cm-match-area right cm-margin-medium-bottom">
|
||||
<label class="uk-form-label" for="context-next-words">Number of words after the match</label>
|
||||
<div class="uk-form-controls">
|
||||
<input class="uk-input" type="number" name="context-next-words" min="0" max="20" id="context-next-words" placeholder="After match, words" value="5" [value]="settings.contextnext" (change)="contextnextChange($event.target.value)"/>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -21,9 +21,9 @@ export class SettingsComponent implements OnInit {
|
|||
max: 20
|
||||
},
|
||||
pips: {
|
||||
mode: 'count',
|
||||
mode: 'positions',
|
||||
values: [0, 45, 100],
|
||||
density: 5,
|
||||
values: 5,
|
||||
stepped: true
|
||||
}
|
||||
};
|
||||
|
|
|
@ -113,7 +113,7 @@
|
|||
color: var(--cm-theme-1);
|
||||
box-shadow: 0 6px 50px rgba(0,0,0,0.08);
|
||||
}
|
||||
.cm-button-primary {
|
||||
.cm-button-primary:not([disabled]) {
|
||||
background-color: var(--cm-theme-6);
|
||||
color: var(--cm-theme-12);
|
||||
}
|
||||
|
@ -132,12 +132,15 @@
|
|||
background-color: var(--cm-theme-7);
|
||||
color: var(--cm-theme-12);
|
||||
}
|
||||
.cm-button-examples:hover {
|
||||
background-color: #85c8e2;
|
||||
}
|
||||
.cm-run-test-button:not([disabled]) {
|
||||
background-color: var(--cm-theme-1);
|
||||
background-color: #8B9FA6;
|
||||
color: var(--cm-theme-12);
|
||||
}
|
||||
.cm-run-test-button:hover:not([disabled]) {
|
||||
background-color: #222;
|
||||
background-color: #667880;
|
||||
color: var(--cm-theme-12);
|
||||
box-shadow: 0 6px 50px rgba(0,0,0,0.08);
|
||||
}
|
||||
|
@ -893,7 +896,7 @@ html:not([dir=rtl]) .noUi-horizontal .noUi-handle {
|
|||
/* Custom Results Section
|
||||
========================================================================== */
|
||||
.cm-results-section {
|
||||
background-color: #f5f5f5;
|
||||
background-color: #fafafa;
|
||||
min-height: 686px;
|
||||
position: relative;
|
||||
border: 1px dashed var(--cm-theme-4);
|
||||
|
@ -1008,10 +1011,11 @@ header.uk-sticky-fixed .cm-results-count-section {
|
|||
cursor: default;
|
||||
position: relative;
|
||||
padding: 0px 35px 0px 25px;
|
||||
color: var(--cm-theme-1);
|
||||
background-color: var(--cm-theme-12);
|
||||
color: var(--cm-theme-1) !important;;
|
||||
background-color: transparent !important;
|
||||
border: 2px solid var(--cm-theme-7);
|
||||
margin: -2px 0px 0px 0px;
|
||||
border-radius: 2px;
|
||||
}
|
||||
.cm-doc-selected span {
|
||||
position: absolute;
|
||||
|
|
|
@ -592,10 +592,10 @@ class GetExampleProfilesHandler(BaseHandler):
|
|||
try:
|
||||
data = {}
|
||||
example_profiles = []
|
||||
example_profiles.append({'name': 'Egi', 'contents': 25, 'documents': 104})
|
||||
example_profiles.append({'name': 'Fbi', 'contents': 66, 'documents': 1023})
|
||||
example_profiles.append({'name': 'NSF', 'contents': 263, 'documents': 140})
|
||||
example_profiles.append({'name': 'Swiss', 'contents': 4, 'documents': 502})
|
||||
example_profiles.append({'name': 'Communities', 'contents': 25, 'documents': 104})
|
||||
example_profiles.append({'name': 'AOF', 'contents': 66, 'documents': 1023})
|
||||
example_profiles.append({'name': 'RCUK', 'contents': 263, 'documents': 140})
|
||||
example_profiles.append({'name': 'TARA', 'contents': 4, 'documents': 502})
|
||||
data['profiles'] = example_profiles
|
||||
self.write(json.dumps(data))
|
||||
self.finish()
|
||||
|
@ -899,21 +899,12 @@ class GetDocSamplesHandler(BaseHandler):
|
|||
data = {}
|
||||
doc_samples = []
|
||||
doc_samples.append({'name': 'Egi', 'documents': 104})
|
||||
doc_samples.append({'name': 'Fbi', 'documents': 1023})
|
||||
doc_samples.append({'name': 'NSF', 'documents': 140})
|
||||
doc_samples.append({'name': 'Swiss', 'documents': 502})
|
||||
doc_samples.append({'name': 'Egi', 'documents': 104})
|
||||
doc_samples.append({'name': 'Fbi', 'documents': 1023})
|
||||
doc_samples.append({'name': 'NSF', 'documents': 140})
|
||||
doc_samples.append({'name': 'Swiss', 'documents': 502})
|
||||
doc_samples.append({'name': 'Egi', 'documents': 104})
|
||||
doc_samples.append({'name': 'Fbi', 'documents': 1023})
|
||||
doc_samples.append({'name': 'NSF', 'documents': 140})
|
||||
doc_samples.append({'name': 'Swiss', 'documents': 502})
|
||||
doc_samples.append({'name': 'Egi', 'documents': 104})
|
||||
doc_samples.append({'name': 'Fbi', 'documents': 1023})
|
||||
doc_samples.append({'name': 'NSF', 'documents': 140})
|
||||
doc_samples.append({'name': 'Swiss', 'documents': 502})
|
||||
doc_samples.append({'name': 'AOF', 'documents': 1023})
|
||||
doc_samples.append({'name': 'SNSF', 'documents': 140})
|
||||
doc_samples.append({'name': 'ARIADNE', 'documents': 502})
|
||||
doc_samples.append({'name': 'RCUK', 'documents': 104})
|
||||
doc_samples.append({'name': 'TARA', 'documents': 1023})
|
||||
doc_samples.append({'name': 'NIH', 'documents': 140})
|
||||
data['documents'] = doc_samples
|
||||
self.write(json.dumps(data))
|
||||
self.finish()
|
||||
|
@ -1191,7 +1182,6 @@ class RunMiningHandler(BaseHandler):
|
|||
for cnt in xrange(contextnext+2,extracontextnext+contextnext+1):
|
||||
j2sextranext += ",next"+str(cnt)
|
||||
j2sextranext += ")"
|
||||
print j2sextraprev, j2sprev, j2snext, j2sextranext, j2scontext
|
||||
|
||||
# create positive and negative words weighted regex text
|
||||
pos_set = neg_set = conf = whr_conf = ''
|
||||
|
@ -1201,9 +1191,9 @@ class RunMiningHandler(BaseHandler):
|
|||
pos_words = json.loads(mining_parameters['poswords'])
|
||||
for key, value in pos_words.iteritems():
|
||||
# MONO GIA TO EGI
|
||||
if 'lowercase' in mining_parameters and mining_parameters['lowercase'] == "1":
|
||||
if 'lowercase' in mining_parameters and mining_parameters['lowercase'] == 1:
|
||||
key = key.decode('utf-8').lower()
|
||||
if 'stemming' in mining_parameters and mining_parameters['stemming'] == "1":
|
||||
if 'stemming' in mining_parameters and mining_parameters['stemming'] == 1:
|
||||
key = 'stem('+key+')'
|
||||
pos_set += r'regexpcountuniquematches("%s",%s)*%s + ' % (key,j2scontext,value)
|
||||
# ORIGINAL
|
||||
|
@ -1216,9 +1206,9 @@ class RunMiningHandler(BaseHandler):
|
|||
neg_words = json.loads(mining_parameters['negwords'])
|
||||
for key, value in neg_words.iteritems():
|
||||
# MONO GIA TO EGI
|
||||
if 'lowercase' in mining_parameters and mining_parameters['lowercase'] == "1":
|
||||
if 'lowercase' in mining_parameters and mining_parameters['lowercase'] == 1:
|
||||
key = key.decode('utf-8').lower()
|
||||
if 'stemming' in mining_parameters and mining_parameters['stemming'] == "1":
|
||||
if 'stemming' in mining_parameters and mining_parameters['stemming'] == 1:
|
||||
key = 'stem('+key+')'
|
||||
neg_set += r'regexpcountuniquematches("%s",%s)*%s + ' % (key,j2scontext,value)
|
||||
# ORIGINAL
|
||||
|
@ -1234,25 +1224,29 @@ class RunMiningHandler(BaseHandler):
|
|||
if conf != '':
|
||||
conf += ' as conf'
|
||||
whr_conf = 'and conf>=0'
|
||||
print conf
|
||||
|
||||
if numberOfDocsUploaded(user_id) != 0:
|
||||
doc_filters = "comprspaces(regexpr('[\n|\r]',d2,' '))"
|
||||
ackn_filters = "comprspaces(regexpr(\"\\'\", p2,''))"
|
||||
if 'punctuation' in mining_parameters and mining_parameters['punctuation'] == "1":
|
||||
if 'punctuation' in mining_parameters and mining_parameters['punctuation'] == 1:
|
||||
doc_filters = 'keywords('+doc_filters+')'
|
||||
ackn_filters = 'keywords('+ackn_filters+')'
|
||||
if 'lowercase' in mining_parameters and mining_parameters['lowercase'] == "1":
|
||||
if 'lowercase' in mining_parameters and mining_parameters['lowercase'] == 1:
|
||||
doc_filters = 'lower('+doc_filters+')'
|
||||
ackn_filters = 'lower('+ackn_filters+')'
|
||||
if 'stopwords' in mining_parameters and mining_parameters['stopwords'] == "1":
|
||||
if 'stopwords' in mining_parameters and mining_parameters['stopwords'] == 1:
|
||||
doc_filters = 'filterstopwords('+doc_filters+')'
|
||||
ackn_filters = 'filterstopwords('+ackn_filters+')'
|
||||
if 'stemming' in mining_parameters and mining_parameters['stemming'] == "1":
|
||||
if 'stemming' in mining_parameters and mining_parameters['stemming'] == 1:
|
||||
doc_filters = 'stem('+doc_filters+')'
|
||||
ackn_filters = 'stem('+ackn_filters+')'
|
||||
list(cursor.execute("drop table if exists grantstemp"+user_id, parse=False))
|
||||
query_pre_grants = "create temp table grantstemp{0} as select stripchars(p1) as gt1, case when p2 is null then null else {1} end as gt2 from (setschema 'p1,p2' file 'users_files/p{0}.tsv' dialect:tsv)".format(user_id, ackn_filters)
|
||||
cursor.execute(query_pre_grants)
|
||||
query00get = "select * from grantstemp{0}".format(user_id)
|
||||
results00get = [r for r in cursor.execute(query00get)]
|
||||
print results00get
|
||||
list(cursor.execute("drop table if exists docs"+user_id, parse=False))
|
||||
query1 = "create temp table docs{0} as select d1, {1} as d2 from (setschema 'd1,d2' select jsonpath(c1, '$.id', '$.text') from (file 'users_files/docs{0}.json'))".format(user_id, doc_filters)
|
||||
cursor.execute(query1)
|
||||
|
@ -1265,12 +1259,10 @@ class RunMiningHandler(BaseHandler):
|
|||
# string concatenation workaround because of the special characters conflicts
|
||||
if 'wordssplitnum' in mining_parameters and mining_parameters['wordssplitnum'] != '':
|
||||
words_split = int(mining_parameters['wordssplitnum'])
|
||||
# TODO must correct this!!!
|
||||
words_split = words_split + 1
|
||||
gt2 = 'gt2'
|
||||
if 'lowercase' in mining_parameters and mining_parameters['lowercase'] == "1":
|
||||
gt2 = 'comprspaces(gt2)'
|
||||
if 'lowercase' in mining_parameters and mining_parameters['lowercase'] == 1:
|
||||
gt2 = 'lower('+gt2+')'
|
||||
if 'stemming' in mining_parameters and mining_parameters['stemming'] == "1":
|
||||
if 'stemming' in mining_parameters and mining_parameters['stemming'] == 1:
|
||||
gt2 = 'stem('+gt2+')'
|
||||
# MONO GIA TO EGI
|
||||
if 0 < words_split and words_split <= 20:
|
||||
|
@ -1284,14 +1276,14 @@ class RunMiningHandler(BaseHandler):
|
|||
# acknowledgment_split = r'"dummy" as prev, regexpr("([\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|])", gt2, "\\\1") as middle, "dummy" as next'
|
||||
|
||||
# query0 = r"create temp table grants"+user_id+r' as select gt1 as g1, jmergeregexp(jgroup("(?<=[\s\b])"||middle||"(?=[\s\b])")) as g2 from '+r"(setschema 'gt1,prev,middle,next' select gt1, "+acknowledgment_split+r' from grantstemp'+user_id+r' where (gt1 or gt1!="") and gt2 not null) group by gt1 union all select distinct gt1 as g1, "(?!.*)" as g2 from grantstemp'+user_id+r" where (gt1 or gt1!='') and gt2 is null"
|
||||
query0 = r"create temp table grants"+user_id+r' as select gt1 as g1, jmergeregexp(jgroup(middle)) as g2 from '+r"(setschema 'gt1,prev,middle,next' select gt1, "+acknowledgment_split+r' from grantstemp'+user_id+r' where (gt1 or gt1!="") and gt2 != "") group by gt1 union all select distinct gt1 as g1, "(?!.*)" as g2 from grantstemp'+user_id+r" where (gt1 or gt1!='') and gt2 = ''"
|
||||
query0 = r"create temp table grants"+user_id+r' as select gt1 as g1, jmergeregexp(jgroup(middle)) as g2 from '+r"(setschema 'gt1,prev,middle,next' select gt1, "+acknowledgment_split+r' from grantstemp'+user_id+r' where (gt1 or gt1!="") and gt2 != "") group by gt1 union all select distinct gt1 as g1, "(.+)" as g2 from grantstemp'+user_id+r" where (gt1 or gt1!='') and gt2 = '' union all select distinct gt1 as g1, jmergeregexp(gt2) as g2 from grantstemp"+user_id+r" where (gt1 or gt1!='') and (gt2 or gt2!='') and regexpcountwords(' ', "+gt2+r")<"+str(words_split)+r""
|
||||
cursor.execute(query0)
|
||||
query0get = "select * from grants{0}".format(user_id)
|
||||
results0get = [r for r in cursor.execute(query0get)]
|
||||
print results0get
|
||||
|
||||
# FOR EGI ONLY
|
||||
query2 = r'select distinct d1, g1, extraprev, prev, middle, next, extranext, acknmatch, max(confidence) as confidence from (select d1, g1, regexpcountuniquematches(g2, '+j2scontext+r') as confidence, stripchars('+j2sextraprev+r') as extraprev, stripchars('+j2sprev+r') as prev, middle, stripchars('+j2snext+r') as next, stripchars('+j2sextranext+r') as extranext, '+j2scontext+r' as context, regexprfindall(g2, '+j2scontext+r') as acknmatch '+conf+r' from (select d1, textwindow(d2,'+str(extracontextprev+contextprev)+r','+str(extracontextnext+contextnext)+r','+str(contextmiddle)+r') from docs'+user_id+r'), (select g1, g2 from grants'+user_id+r') T where regexprmatches("(\b|\d|\W)"||T.g1||"(\b|\d|\W)",middle) '+whr_conf+r') group by d1'
|
||||
query2 = r'select distinct d1, r1, extraprev, prev, middle, next, extranext, case when g2="(.+)" then "[ ]" else acknmatch end as acknmatch, max(confidence) as confidence from (select d1, regexpr("(?:\b|\d|\W)("||T.g1||")(?:\b|\d|\W)",middle) as r1, g1, g2, regexpcountuniquematches(g2, '+j2scontext+r') as confidence, stripchars('+j2sextraprev+r') as extraprev, stripchars('+j2sprev+r') as prev, middle, stripchars('+j2snext+r') as next, stripchars('+j2sextranext+r') as extranext, '+j2scontext+r' as context, regexprfindall(g2, '+j2scontext+r') as acknmatch '+conf+r' from (select d1, textwindow(d2,'+str(extracontextprev+contextprev)+r','+str(extracontextnext+contextnext)+r','+str(contextmiddle)+r') from docs'+user_id+r'), (select g1, g2 from grants'+user_id+r') T where r1 not null and acknmatch!="[]" '+whr_conf+r') group by d1'
|
||||
# ORIGINAL
|
||||
# query2 = "select d1, g1, context, acknmatch, max(confidence) as confidence from (select d1, g1, regexpcountuniquematches(g2, j2s(prev,middle,next)) as confidence, j2s(prev,middle,next) as context, regexprfindall(g2, j2s(prev,middle,next)) as acknmatch {0} from (select d1, textwindow2s(d2,20,{3},20) from docs{1}), (select g1, g2 from grants{1}) T where regexprmatches(T.g1,middle) {2}) group by d1".format(conf, user_id, whr_conf, contextmiddle)
|
||||
|
||||
|
|
Binary file not shown.
Loading…
Reference in New Issue