Minor changes, CLARIN example

This commit is contained in:
sosguns2002 2018-04-12 13:48:02 +03:00
parent 009cb1e368
commit 50b61e5bc6
11 changed files with 58 additions and 27 deletions

View File

@ -4,7 +4,8 @@
"ngPackage": {
"lib": {
"entryFile": "public_api.ts",
"comments" : "none"
"comments" : "none",
"externals": {"ng2-nouislider": "ng2-nouislider"}
},
"dest": "../../dist/interactive-mining-lib"
}

View File

@ -32,7 +32,7 @@ export class ConfigurationComponent implements OnInit, AfterViewInit {
localStorage.setItem('contextprev', '10');
}
if (!localStorage.getItem('contextnext') || localStorage.getItem('contextnext') === 'undefined') {
localStorage.setItem('contextnext', '5');
localStorage.setItem('contextnext', '20');
}
if (!localStorage.getItem('wordssplitnum') || localStorage.getItem('wordssplitnum') === 'undefined') {
localStorage.setItem('wordssplitnum', '1');

View File

@ -40,7 +40,7 @@
<input #docupload type="file" (change)="fileChangeUpload($event);docupload.value=''" accept=".txt,.pdf">
<span class="uk-link">Upload your documents</span>
</div>
<span class="cm-tooltip" uk-icon="icon: info" title="Upload either <b>1 single PDF</b> or <b>1 single TXT</b> file or a valid <b>JSON file with two keys (id, text)</b> JSON files are necessary when you want to test the algorithm in <b>many publications.</b>" uk-tooltip="pos: right"></span>
<span class="cm-tooltip" uk-icon="icon: info" title="<span class='cm-label cm-label-docs'>PDF</span><span class='cm-label cm-label-docs'>TXT</span><span class='cm-label cm-label-docs'>JSON</span><span class='uk-text uk-text-small cm-coloured-text-meta'>file type<br>maximum 1MB</span><br><br>Upload either 1 <b>single PDF</b> or 1 <b>single TXT</b> file or a valid <b>JSON file with two keys (id, text)</b><br>JSON files are necessary when you want to test the algorithm in <b>many publications.</b>" uk-tooltip="pos: right"></span>
</div>
</div>

View File

@ -10,7 +10,7 @@
<nouislider [config]="sliderConfig" [(ngModel)]="settings.wordssplitnum" (ngModelChange)="onSliderChange($event)"></nouislider>
</div>
<div class="uk-width-auto uk-grid-item-match">
<span>Hight<br>precision</span>
<span>High<br>precision</span>
</div>
</div>
</div>
@ -71,7 +71,7 @@
<span *ngIf="negativePhrasesArray.length > 0" class="uk-text-small uk-margin-small-left">{{negativePhrasesArray.length}} phrase{{negativePhrasesArray.length===1?'':'s'}}</span>
</div>
<div class="uk-accordion-content">
<p class="uk-text-small">Add negative phrases that reduce the possibility for a match to be a match. You can use different weights to divide between important and less important phrases.</p>
<p class="uk-text-small">Add negative phrases. If these phrases are found around the match, this is possibly a false possitive. You can use different weights to assign importance.</p>
<div class="word-container">
<div class="cm-phrases-container">
<header>
@ -121,7 +121,7 @@
<p class="uk-text-small">Select among the following text preprocessing steps.</p>
<form class="uk-form-stacked">
<div class="uk-margin">
<label class="uk-form-label" for="stop-words-filter"><input id="stop-words-filter" class="uk-checkbox" type="checkbox" [checked]="settings.stopwords===1" (change)="stopwordsCheckBoxChange($event.target.checked)"> Stopword removal</label>
<label class="uk-form-label" for="stop-words-filter"><input id="stop-words-filter" class="uk-checkbox" type="checkbox" [checked]="settings.stopwords===1" (change)="stopwordsCheckBoxChange($event.target.checked)"> Stopword removal <span class="cm-tooltip" uk-icon="icon: info" title="<b>Remove</b> common words (e.g., <b>articles, prepositions</b> etc.)" uk-tooltip="pos: right"></span></label>
</div>
<div class="uk-margin">
<label class="uk-form-label" for="punctuation-filter"><input id="punctuation-filter" class="uk-checkbox" type="checkbox" [checked]="settings.punctuation===1" (change)="punctuationCheckBoxChange($event.target.checked)"> Punctuation removal</label>
@ -130,7 +130,7 @@
<label class="uk-form-label" for="lowercase-filter"><input id="lowercase-filter" class="uk-checkbox" type="checkbox" [checked]="settings.lowercase===1" (change)="lowercaseCheckBoxChange($event.target.checked)"> Convert to lower case</label>
</div>
<div class="uk-margin">
<label class="uk-form-label" for="stemming-filter"><input id="stemming-filter" class="uk-checkbox" type="checkbox" [checked]="settings.stemming===1" (change)="stemmingCheckBoxChange($event.target.checked)"> Word stemming <span class="cm-tooltip" uk-icon="icon: info" title="Stemming is a process of text normalisation, in which the <b>variant forms of a word are reduced to a common form</b>, for <b>example</b>:<br>connection, connections, connective, connected, connecting<br><b>are reduced to connect</b>." uk-tooltip="pos: right"></span></label>
<label class="uk-form-label" for="stemming-filter"><input id="stemming-filter" class="uk-checkbox" type="checkbox" [checked]="settings.stemming===1" (change)="stemmingCheckBoxChange($event.target.checked)"> Word stemming <span class="cm-tooltip" uk-icon="icon: info" title="Stemming is a process of text normalisation, in which the <b>variant forms of a word are reduced to a common form</b>, for <b>example</b>:<br>connection, connections, connective, connected, connecting<br><b>are reduced to connect</b>" uk-tooltip="pos: right"></span></label>
</div>
</form>
</div>
@ -142,7 +142,7 @@
<span *ngIf="settings.contextnext !== 20" class="uk-text-small uk-margin-small-left">after: {{settings.contextnext}}</span>
</div>
<div class="uk-accordion-content">
<p class="uk-text-small">You may edit the length of the text area that the algorithm uses to decide if a match is a true positive.<br><span class="uk-text-danger">This is mainly for advanced users, so you ll rarely need to change these values.</span></p>
<p class="uk-text-small">You may edit the length of the text area that the algorithm uses to decide if a match is a true positive.<br><span class="uk-text-danger">For advanced users only</span></p>
<form class="">
<div class="cm-match-area left">
<label class="uk-form-label" for="context-prev-words">Number of words before the match</label>

View File

@ -2,7 +2,7 @@
<div class="uk-section uk-section-default" style="padding-top: 20px;">
<div class="uk-container uk-container-expand">
<div class="uk-grid-collapse uk-child-width-expand uk-text-middle" uk-grid>
<p class="uk-margin-small-left">Use the table or the upload form to add your concepts <span class="cm-tooltip" uk-icon="icon: info" title="Provide <b>two columns</b> of data. The first column is a keywords that must be found in a publications fulltext. Such <b>keyword</b> may be a concept name, the name of an initiative of a community etc.<br>The second column provide a context. The <b>context</b> may be an acknowledgement statement or other phrase that is expected to be found when the concept in the first column is acknowledged." uk-tooltip="pos: right"></span></p>
<p class="uk-margin-small-left">Use the table or the upload form to add your concepts <span class="cm-tooltip" uk-icon="icon: info" title="Provide <b>two columns</b> of data. The first column is a list of keywords that must be found in a publications fulltext. Such <b>keyword</b> may be a concept name, the name of an initiative of a community etc.<br>The second column provide a context. The <b>context</b> may be an acknowledgement statement or other phrase that is expected to be found when the concept in the first column is acknowledged." uk-tooltip="pos: right"></span></p>
<div class="uk-text-right">
<button class="uk-button cm-button-primary" [disabled]="!isAnyContent()" (click)="saveAndContinue()">Continue</button>
</div>
@ -56,7 +56,6 @@
<div>
<span class="cm-label">TSV</span>
<span class="uk-text uk-text-small cm-coloured-text-meta">file type, maximum 50kB</span>
<span class="cm-tooltip" title="If you want the tooltip to appear with a little delay, just add the delay option to the uk-tooltip attribute with your value in milliseconds." uk-tooltip="pos: bottom" uk-icon="icon: info"></span>
</div>
</div>
<div uk-form-custom>

View File

@ -56,7 +56,7 @@ export class ManageprofilesService {
}
loadExampleProfile(name: string): Observable<ProfileData> {
return this.http.get<ProfileData>(this.backendServerAddress + this.loadExampleProfileUrl + `?user=${this.userId}`)
return this.http.post<ProfileData>(this.backendServerAddress + this.loadExampleProfileUrl, {user: this.userId, name: name})
.catch(this.util.handleError);
}

View File

@ -9,7 +9,8 @@
background-color: #FFF !important;
}
@import "~nouislider/distribute/nouislider.min.css";
/*NoUiSlider CSS*/
/*! nouislider - 11.0.3 - 2018-01-21 14:04:07 */.noUi-target,.noUi-target *{-webkit-touch-callout:none;-webkit-tap-highlight-color:transparent;-webkit-user-select:none;-ms-touch-action:none;touch-action:none;-ms-user-select:none;-moz-user-select:none;user-select:none;-moz-box-sizing:border-box;box-sizing:border-box}.noUi-target{position:relative;direction:ltr}.noUi-base,.noUi-connects{width:100%;height:100%;position:relative;z-index:1}.noUi-connects{overflow:hidden;z-index:0}.noUi-connect,.noUi-origin{will-change:transform;position:absolute;z-index:1;top:0;left:0;height:100%;width:100%;-webkit-transform-origin:0 0;transform-origin:0 0}html:not([dir=rtl]) .noUi-horizontal .noUi-origin{left:auto;right:0}.noUi-vertical .noUi-origin{width:0}.noUi-horizontal .noUi-origin{height:0}.noUi-handle{position:absolute}.noUi-state-tap .noUi-connect,.noUi-state-tap .noUi-origin{-webkit-transition:transform .3s;transition:transform .3s}.noUi-state-drag *{cursor:inherit!important}.noUi-horizontal{height:18px}.noUi-horizontal .noUi-handle{width:34px;height:28px;left:-17px;top:-6px}.noUi-vertical{width:18px}.noUi-vertical .noUi-handle{width:28px;height:34px;left:-6px;top:-17px}html:not([dir=rtl]) .noUi-horizontal .noUi-handle{right:-17px;left:auto}.noUi-target{background:#FAFAFA;border-radius:4px;border:1px solid #D3D3D3;box-shadow:inset 0 1px 1px #F0F0F0,0 3px 6px -5px #BBB}.noUi-connects{border-radius:3px}.noUi-connect{background:#3FB8AF}.noUi-draggable{cursor:ew-resize}.noUi-vertical .noUi-draggable{cursor:ns-resize}.noUi-handle{border:1px solid #D9D9D9;border-radius:3px;background:#FFF;cursor:default;box-shadow:inset 0 0 1px #FFF,inset 0 1px 7px #EBEBEB,0 3px 6px -3px #BBB}.noUi-active{box-shadow:inset 0 0 1px #FFF,inset 0 1px 7px #DDD,0 3px 6px -3px #BBB}.noUi-handle:after,.noUi-handle:before{content:"";display:block;position:absolute;height:14px;width:1px;background:#E8E7E6;left:14px;top:6px}.noUi-handle:after{left:17px}.noUi-vertical .noUi-handle:after,.noUi-vertical .noUi-handle:before{width:14px;height:1px;left:6px;top:14px}.noUi-vertical .noUi-handle:after{top:17px}[disabled] .noUi-connect{background:#B8B8B8}[disabled] .noUi-handle,[disabled].noUi-handle,[disabled].noUi-target{cursor:not-allowed}.noUi-pips,.noUi-pips *{-moz-box-sizing:border-box;box-sizing:border-box}.noUi-pips{position:absolute;color:#999}.noUi-value{position:absolute;white-space:nowrap;text-align:center}.noUi-value-sub{color:#ccc;font-size:10px}.noUi-marker{position:absolute;background:#CCC}.noUi-marker-large,.noUi-marker-sub{background:#AAA}.noUi-pips-horizontal{padding:10px 0;height:80px;top:100%;left:0;width:100%}.noUi-value-horizontal{-webkit-transform:translate(-50%,50%);transform:translate(-50%,50%)}.noUi-rtl .noUi-value-horizontal{-webkit-transform:translate(50%,50%);transform:translate(50%,50%)}.noUi-marker-horizontal.noUi-marker{margin-left:-1px;width:2px;height:5px}.noUi-marker-horizontal.noUi-marker-sub{height:10px}.noUi-marker-horizontal.noUi-marker-large{height:15px}.noUi-pips-vertical{padding:0 10px;height:100%;top:0;left:100%}.noUi-value-vertical{-webkit-transform:translate(0,-50%);transform:translate(0,-50%,0);padding-left:25px}.noUi-rtl .noUi-value-vertical{-webkit-transform:translate(0,50%);transform:translate(0,50%)}.noUi-marker-vertical.noUi-marker{width:5px;height:2px;margin-top:-1px}.noUi-marker-vertical.noUi-marker-sub{width:10px}.noUi-marker-vertical.noUi-marker-large{width:15px}.noUi-tooltip{display:block;position:absolute;border:1px solid #D9D9D9;border-radius:3px;background:#fff;color:#000;padding:5px;text-align:center;white-space:nowrap}.noUi-horizontal .noUi-tooltip{-webkit-transform:translate(-50%,0);transform:translate(-50%,0);left:50%;bottom:120%}.noUi-vertical .noUi-tooltip{-webkit-transform:translate(0,-50%);transform:translate(0,-50%);top:50%;right:120%}
/* COLOR PALETTE */
:root {
@ -1057,6 +1058,9 @@ header.uk-sticky-fixed .cm-results-count-section {
border-radius: 2px;
margin-right: 5px;
}
.cm-label-docs {
background: var(--cm-theme-7);
}
/* Custom text
========================================================================== */
.cm-text-muted {

View File

@ -158,8 +158,8 @@ def loadProfileDocs(user_id, profile_id):
if os.path.isfile(unique_profile_docs_file_name):
copyfile(unique_profile_docs_file_name, docs_file_name)
def loadExampleDocs(user_id):
sample_file = open("static/exampleDocs.txt", 'r')
def loadExampleDocs(docsLocation, user_id):
sample_file = open(docsLocation, 'r')
# write data to physical file
cname = "users_files/docs{0}.json".format(user_id)
fh = open(cname, 'w')
@ -169,7 +169,7 @@ def loadExampleDocs(user_id):
break
fh.write(copy_buffer)
fh.close()
lines_num = sum(1 for line in open(cname))
return sum(1 for line in open(cname))
def loadExampleProfile(user_id):
return loadProfile("static/exampleProfile.oamp", user_id)
@ -592,6 +592,7 @@ class GetExampleProfilesHandler(BaseHandler):
try:
data = {}
example_profiles = []
example_profiles.append({'name': 'Clarin', 'contents': 11, 'documents': 7})
example_profiles.append({'name': 'Communities', 'contents': 25, 'documents': 104})
example_profiles.append({'name': 'AOF', 'contents': 66, 'documents': 1023})
example_profiles.append({'name': 'RCUK', 'contents': 263, 'documents': 140})
@ -643,28 +644,40 @@ class LoadExampleProfileHandler(BaseHandler):
def set_default_headers(self):
self.set_header("Access-Control-Allow-Origin", "*")
self.set_header("Access-Control-Allow-Headers", "Origin, X-Requested-With, Content-Type, Accept")
self.set_header('Access-Control-Allow-Methods', 'GET, OPTIONS')
self.set_header('Access-Control-Allow-Methods', 'POST, OPTIONS')
self.set_header('Access-Control-Allow-Credentials', 'true')
self.set_header('Content-Type', 'application/json')
def options(self):
# no body
self.set_status(204)
self.finish()
def get(self):
def post(self):
try:
# get user id from arguments. Must have
if 'user' not in self.request.arguments or self.request.arguments['user'][0] == '':
# get user id from body. Must have
request_arguments = json.loads(self.request.body)
if 'user' not in request_arguments or request_arguments['user'] == '':
self.set_status(400)
self.write("Missing user's id parameter")
return
user_id = self.request.arguments['user'][0]
user_id = request_arguments['user']
# get data
if 'name' not in request_arguments or request_arguments['name'] == '':
self.set_status(400)
self.write("Missing example profiles name parameter")
return
example_name = request_arguments['name']
# reset everything
deleteAllUserFiles(user_id)
# load example data
loadExampleDocs(user_id)
data = loadExampleProfile(user_id)
data['docname'] = 'Example'
data['docsnumber'] = '26'
data = {}
if example_name == 'Clarin':
data = loadProfile("static/example{0}Profile.oamp".format(example_name), user_id)
data['docname'] = example_name
data['docsnumber'] = loadExampleDocs("static/example{0}Docs.json".format(example_name), user_id)
else:
# load example data
data = loadExampleProfile(user_id)
data['docname'] = 'Example'
data['docsnumber'] = loadExampleDocs("static/exampleDocs.txt", user_id)
self.write(json.dumps(data))
self.finish()
except Exception as ints:
@ -899,7 +912,7 @@ class GetDocSamplesHandler(BaseHandler):
data = {}
doc_samples = []
doc_samples.append({'name': 'Egi', 'documents': 104})
doc_samples.append({'name': 'AOF', 'documents': 1023})
doc_samples.append({'name': 'Clarin', 'documents': 1023})
doc_samples.append({'name': 'SNSF', 'documents': 140})
doc_samples.append({'name': 'ARIADNE', 'documents': 502})
doc_samples.append({'name': 'RCUK', 'documents': 104})
@ -1029,6 +1042,8 @@ class ChooseDocSampleHandler(BaseHandler):
sample_file_name = ""
if doc_sample == "Egi":
sample_file_name = "static/egi_sample.tsv"
elif doc_sample == "Clarin":
sample_file_name = "static/clarin_docs.json"
elif doc_sample == "Rcuk":
sample_file_name = "static/rcuk_sample.tsv"
elif doc_sample == "Arxiv":
@ -1228,21 +1243,26 @@ class RunMiningHandler(BaseHandler):
if numberOfDocsUploaded(user_id) != 0:
doc_filters = "comprspaces(regexpr('[\n|\r]',d2,' '))"
grant_filters = "stripchars(comprspaces(regexpr(\"\\'\", p1,'')))"
ackn_filters = "comprspaces(regexpr(\"\\'\", p2,''))"
if 'punctuation' in mining_parameters and mining_parameters['punctuation'] == 1:
doc_filters = 'keywords('+doc_filters+')'
grant_filters = 'keywords('+grant_filters+')'
ackn_filters = 'keywords('+ackn_filters+')'
if 'lowercase' in mining_parameters and mining_parameters['lowercase'] == 1:
doc_filters = 'lower('+doc_filters+')'
grant_filters = 'lower('+grant_filters+')'
ackn_filters = 'lower('+ackn_filters+')'
if 'stopwords' in mining_parameters and mining_parameters['stopwords'] == 1:
doc_filters = 'filterstopwords('+doc_filters+')'
grant_filters = 'filterstopwords('+grant_filters+')'
ackn_filters = 'filterstopwords('+ackn_filters+')'
if 'stemming' in mining_parameters and mining_parameters['stemming'] == 1:
doc_filters = 'stem('+doc_filters+')'
grant_filters = 'stem('+grant_filters+')'
ackn_filters = 'stem('+ackn_filters+')'
list(cursor.execute("drop table if exists grantstemp"+user_id, parse=False))
query_pre_grants = "create temp table grantstemp{0} as select stripchars(p1) as gt1, case when p2 is null then null else {1} end as gt2 from (setschema 'p1,p2' file 'users_files/p{0}.tsv' dialect:tsv)".format(user_id, ackn_filters)
query_pre_grants = "create temp table grantstemp{0} as select {1} as gt1, case when p2 is null then null else {2} end as gt2 from (setschema 'p1,p2' file 'users_files/p{0}.tsv' dialect:tsv)".format(user_id, grant_filters, ackn_filters)
cursor.execute(query_pre_grants)
query00get = "select * from grantstemp{0}".format(user_id)
results00get = [r for r in cursor.execute(query00get)]

File diff suppressed because one or more lines are too long