2018-02-28 12:30:50 +01:00
#!/usr/bin/env python
#
# Copyright 2009 Facebook
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import os
import re
import os . path
import tornado . httpserver
import tornado . autoreload
import tornado . ioloop
import tornado . options
import tornado . web
import tornado . escape as escape
import settings as msettings
import madapps
import datetime
import random
import csv
from shutil import copyfile
import itertools
import email . utils
import json
import StringIO
from tornado . options import define , options , logging
from tornado_addons import ozhandler
import copy
from collections import OrderedDict
define ( " port " , default = msettings . WEB_SERVER_PORT , help = " run on the given port " , type = int )
class Application ( tornado . web . Application ) :
def __init__ ( self ) :
handlers = [
2018-03-13 16:02:55 +01:00
( r " /version " , VersionHandler ) ,
2018-03-10 18:32:45 +01:00
( r " /initialhandshake " , InitialClientHandshakeHandler ) ,
2018-11-02 18:31:11 +01:00
( r " /getusersprofiles " , GetUsersProfilesHandler ) ,
( r " /updateprofilestatus " , UpdateProfileStatusHandler ) ,
2018-02-28 12:30:50 +01:00
( r " /getuserprofiles " , GetUserProfilesHandler ) ,
( r " /loaduserprofile " , LoadUserProfileHandler ) ,
( r " /deleteuserprofile " , DeleteUserProfileHandler ) ,
( r " /createnewprofile " , CreateNewProfileHandler ) ,
( r " /getexampleprofiles " , GetExampleProfilesHandler ) ,
( r " /loadexampleprofile " , LoadExampleProfileHandler ) ,
( r " /uploadprofile " , UploadProfileHandler ) ,
( r " /alreadyconcept " , AlreadyConceptsHandler ) ,
( r " /uploadcontentfile " , UploadContentFileHandler ) ,
( r " /updateconcept " , UpdateConceptsHandler ) ,
( r " /getdocsamples " , GetDocSamplesHandler ) ,
( r " /uploaddocuments " , UploadDocumentsHandler ) ,
( r " /choosedocsample " , ChooseDocSampleHandler ) ,
( r " /alreadydocuments " , AlreadyDocumentsHandler ) ,
( r " /runmining " , RunMiningHandler ) ,
( r " /preparesavedprofile " , PrepareSavedProfileHandler ) ,
( r " /saveprofile " , SaveProfileToDatabaseHandler ) ,
2019-05-29 16:02:54 +02:00
( r " /downloadprofile " , DownloadProfileHandler ) ,
( r " /notifyforprofile " , NotifyHandler )
2018-02-28 12:30:50 +01:00
]
settings = dict (
template_path = os . path . join ( os . path . dirname ( __file__ ) , " templates " ) ,
static_path = os . path . join ( os . path . dirname ( __file__ ) , " static " ) ,
xsrf_cookies = False ,
cookie_secret = msettings . SECRET_KEY ,
login_url = " /auth/login " ,
debug = msettings . DEBUG
)
tornado . web . Application . __init__ ( self , handlers , * * settings )
def auth_callback ( request , realm , username , password ) :
if username == msettings . USERNAME and password == msettings . PASSWORD :
request . user_id = 1
return True
else :
return False
def getNewUserId ( ) :
return ' user {0} ' . format ( datetime . datetime . now ( ) . microsecond + ( random . randrange ( 1 , 100 + 1 ) * 100000 ) )
def getNewProfileId ( ) :
return ' profile {0} ' . format ( datetime . datetime . now ( ) . microsecond + ( random . randrange ( 1 , 100 + 1 ) * 100000 ) )
def numberOfGrantsUploaded ( user_id , cookie_set ) :
if cookie_set and user_id :
2018-03-13 16:02:55 +01:00
file_name = " users_files/p %s .tsv " % ( user_id )
2018-02-28 12:30:50 +01:00
if os . path . isfile ( file_name ) :
num_lines = sum ( 1 for line in open ( file_name ) )
if str ( num_lines ) == cookie_set :
return num_lines
return 0
def numberOfDocsUploaded ( user_id ) :
if user_id :
2018-03-13 16:02:55 +01:00
file_name = " users_files/docs %s .json " % ( user_id )
2018-02-28 12:30:50 +01:00
if os . path . isfile ( file_name ) :
num_lines = sum ( 1 for line in open ( file_name ) )
return num_lines
return 0
def loadProfile ( profileLocation , user_id ) :
# extract data from profile file
import sys
sys . path . append ( msettings . MADIS_PATH )
import madis
# get the profile database cursor
cursor = madis . functions . Connection ( profileLocation ) . cursor ( )
# data to be sent
data = { }
# Write to csv file the grants ids
if len ( [ r for r in cursor . execute ( " SELECT name FROM sqlite_master WHERE type= ' table ' AND name= ' grants ' " ) ] ) :
2018-03-13 16:02:55 +01:00
cursor . execute ( " output ' users_files/p {0} .tsv ' select c1,c2 from grants " . format ( user_id ) )
2018-02-28 12:30:50 +01:00
# Get the number of grants uploaded
2018-03-13 16:02:55 +01:00
file_name = " users_files/p %s .tsv " % ( user_id )
2018-02-28 12:30:50 +01:00
if os . path . isfile ( file_name ) :
numberOfGrants = sum ( 1 for line in open ( file_name ) )
2018-03-10 18:32:45 +01:00
data [ ' concepts ' ] = numberOfGrants
2018-02-28 12:30:50 +01:00
# write to json the poswords
if len ( [ r for r in cursor . execute ( " SELECT name FROM sqlite_master WHERE type= ' table ' AND name= ' poswords ' " ) ] ) :
results = [ r for r in cursor . execute ( " select c1, c2 from poswords " ) ]
data [ ' poswords ' ] = { value : key for value , key in results }
# write to json the negwords
if len ( [ r for r in cursor . execute ( " SELECT name FROM sqlite_master WHERE type= ' table ' AND name= ' negwords ' " ) ] ) :
results = [ r for r in cursor . execute ( " select c1, c2 from negwords " ) ]
data [ ' negwords ' ] = { value : key for value , key in results }
# write to json the filters
if len ( [ r for r in cursor . execute ( " SELECT name FROM sqlite_master WHERE type= ' table ' AND name= ' filters ' " ) ] ) :
results = [ r for r in cursor . execute ( " select c1, c2 from filters " ) ]
for value , key in results :
data [ value ] = key
# data['filters'] = {value:key for value, key in results}
cursor . close ( )
return data
def deleteAllUserFiles ( user_id ) :
if user_id :
2018-03-13 16:02:55 +01:00
file_name = " users_files/p %s .tsv " % ( user_id )
2018-02-28 12:30:50 +01:00
if os . path . isfile ( file_name ) :
os . remove ( file_name )
2018-03-13 16:02:55 +01:00
file_name = " users_files/docs %s .json " % ( user_id )
2018-02-28 12:30:50 +01:00
if os . path . isfile ( file_name ) :
os . remove ( file_name )
def loadProfileDocs ( user_id , profile_id ) :
# copy unique profile docs file to a general user docs file
2018-03-13 16:02:55 +01:00
docs_file_name = " users_files/docs {0} .json " . format ( user_id )
unique_profile_docs_file_name = " users_files/OAMiningDocs_ {0} _ {1} .json " . format ( user_id , profile_id )
2018-03-02 11:07:03 +01:00
if os . path . isfile ( unique_profile_docs_file_name ) :
copyfile ( unique_profile_docs_file_name , docs_file_name )
2018-02-28 12:30:50 +01:00
2018-04-12 12:48:02 +02:00
def loadExampleDocs ( docsLocation , user_id ) :
sample_file = open ( docsLocation , ' r ' )
2018-02-28 12:30:50 +01:00
# write data to physical file
2018-03-13 16:02:55 +01:00
cname = " users_files/docs {0} .json " . format ( user_id )
2018-02-28 12:30:50 +01:00
fh = open ( cname , ' w ' )
while 1 :
copy_buffer = sample_file . read ( 1048576 )
if not copy_buffer :
break
fh . write ( copy_buffer )
fh . close ( )
2018-04-12 12:48:02 +02:00
return sum ( 1 for line in open ( cname ) )
2018-02-28 12:30:50 +01:00
def loadExampleProfile ( user_id ) :
return loadProfile ( " static/exampleProfile.oamp " , user_id )
class BaseHandler ( ozhandler . DjangoErrorMixin , ozhandler . BasicAuthMixin , tornado . web . RequestHandler ) :
def __init__ ( self , * args ) :
tornado . web . RequestHandler . __init__ ( self , * args )
if msettings . USERNAME != ' ' :
self . hiddenauthget = self . get
self . get = self . authget
if msettings . RESTRICT_IPS :
self . hiddenget = self . get
self . get = self . checkwhitelistget
def authget ( self , * args ) :
try :
if self . passwordless :
self . hiddenauthget ( * args )
return
except :
pass
if not self . get_authenticated_user ( auth_callback , ' stats ' ) :
return False
self . hiddenauthget ( * args )
def checkwhitelistget ( self , * args ) :
if self . request . remote_ip not in msettings . RESTRICT_IP_WHITELIST :
raise tornado . web . HTTPError ( 403 )
return self . hiddenget ( * args )
def get_current_user ( self ) :
return ' anonymous '
def executequery ( self , query , bindings = None ) :
def latinnum ( x ) :
x = int ( x )
lx = " "
while x > 25 :
lx + = chr ( ord ( ' A ' ) + int ( x / 25 ) )
x % = 25
lx + = chr ( ord ( ' A ' ) + x )
return lx
query = query . rstrip ( ' ; \n \ s ' )
try :
origvars = msettings . madis . functions . variables
c = msettings . Connection . cursor ( ) . execute ( query , localbindings = bindings )
except Exception , e :
try :
c . close ( )
except :
pass
msettings . madis . functions . variables = origvars
self . finish ( str ( e ) )
if msettings . DEBUG :
raise e
return
# Schema from query's description
try :
schema = c . getdescription ( )
except :
c . close ( )
msettings . madis . functions . variables = origvars
self . finish ( )
return
colnames = [ ]
coltypes = [ ]
for cname , ctype in schema :
if ctype == None :
colnames + = [ cname ]
coltypes + = [ ctype ]
continue
ctypehead3 = ctype . lower ( ) [ 0 : 3 ]
if ctypehead3 in ( ' int ' , ' rea ' , ' flo ' , ' dou ' , ' num ' ) :
ctype = ' number '
else :
ctype = ' string '
colnames + = [ cname ]
coltypes + = [ ctype ]
try :
firstrow = c . next ( )
except StopIteration :
c . close ( )
msettings . madis . functions . variables = origvars
self . finish ( )
return
except Exception , e :
c . close ( )
msettings . madis . functions . variables = origvars
self . finish ( str ( e ) )
return
# Merge with guessed schema from query's first row
for cname , ctype , i in zip ( colnames , coltypes , xrange ( len ( colnames ) ) ) :
if ctype == None :
frtype = type ( firstrow [ i ] )
if frtype in ( int , float ) :
coltypes [ i ] = ' number '
else :
coltypes [ i ] = ' string '
# Write responce's header
response = { " cols " : [ ] }
for name , ctype , num in zip ( colnames , coltypes , xrange ( len ( colnames ) ) ) :
id = latinnum ( num )
response [ " cols " ] + = [ { " id " : id , " label " : name , " type " : ctype } ]
# Write header
self . write ( json . dumps ( response , separators = ( ' , ' , ' : ' ) , sort_keys = True , ensure_ascii = False ) [ 0 : - 1 ] + ' , " rows " :[ ' )
# Write first line
response = json . dumps ( { " c " : [ { " v " : x } for x in firstrow ] } , separators = ( ' , ' , ' : ' ) , sort_keys = True , ensure_ascii = False )
self . write ( response )
self . executequeryrow ( c , msettings . madis . functions . variables )
msettings . madis . functions . variables = origvars
def executequeryrow ( self , cursor , vars ) :
try :
try :
origvars = msettings . madis . functions . variables
msettings . madis . functions . variables = vars
buffer = StringIO . StringIO ( )
while buffer . len < 30000 :
buffer . write ( ' , ' + json . dumps ( { " c " : [ { " v " : x } for x in cursor . next ( ) ] } , separators = ( ' , ' , ' : ' ) , sort_keys = True , ensure_ascii = False ) )
self . write ( buffer . getvalue ( ) )
self . flush ( callback = lambda : self . executequeryrow ( cursor , msettings . madis . functions . variables ) )
except StopIteration :
cursor . close ( )
self . finish ( buffer . getvalue ( ) + ' ]} ' )
finally :
msettings . madis . functions . variables = origvars
except IOError :
msettings . madis . functions . variables = origvars
cursor . close ( )
pass
def serveimage ( self , path , mime_type = None ) :
if os . path . sep != " / " :
path = path . replace ( " / " , os . path . sep )
abspath = os . path . abspath ( os . path . join ( self . settings [ ' static_path ' ] , path ) )
if mime_type == None :
mime_type = ' image/ ' + path [ - 3 : ]
self . set_header ( " Content-Type " , mime_type )
# Check the If-Modified-Since, and don't send the result if the
# content has not been modified
ims_value = self . request . headers . get ( " If-Modified-Since " )
if ims_value is not None :
date_tuple = email . utils . parsedate ( ims_value )
if_since = datetime . datetime . fromtimestamp ( time . mktime ( date_tuple ) )
if if_since > = modified :
self . set_status ( 304 )
return
file = open ( abspath , " rb " )
try :
self . write ( file . read ( ) )
finally :
file . close ( )
2018-03-13 16:02:55 +01:00
class VersionHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
self . set_header ( " Access-Control-Allow-Origin " , " * " )
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' GET, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def get ( self ) :
try :
self . write ( { ' version ' : 0.5 } )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
2018-02-28 12:30:50 +01:00
2018-03-10 18:32:45 +01:00
class InitialClientHandshakeHandler ( BaseHandler ) :
2018-02-28 12:30:50 +01:00
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' GET, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def get ( self ) :
try :
2018-03-10 18:32:45 +01:00
if ' user ' in self . request . arguments and self . request . arguments [ ' user ' ] [ 0 ] != ' ' :
user_id = self . request . arguments [ ' user ' ] [ 0 ]
2018-03-13 16:02:55 +01:00
database_file_name = " users_files/OAMiningProfilesDatabase_ {0} .db " . format ( user_id )
2018-11-02 18:31:11 +01:00
if ' communityId ' not in self . request . arguments or self . request . arguments [ ' communityId ' ] [ 0 ] == ' ' :
self . set_status ( 400 )
self . write ( " Missing arguement community id. " )
return
2018-11-28 17:55:22 +01:00
community_id = self . request . arguments [ ' communityId ' ] [ 0 ] [ : 128 ]
2018-11-02 18:31:11 +01:00
import sys
sys . path . append ( msettings . MADIS_PATH )
import madis
2018-03-10 18:32:45 +01:00
if ( not os . path . isfile ( database_file_name ) ) :
2018-04-20 10:54:39 +02:00
if not os . path . exists ( " users_files " ) :
2018-11-02 18:31:11 +01:00
os . makedirs ( " users_files " )
2018-03-10 18:32:45 +01:00
# create a database where the user stores his profiles info
# get the database cursor
cursor = madis . functions . Connection ( database_file_name ) . cursor ( )
# Create database table
2018-11-28 17:55:22 +01:00
cursor . execute ( ''' DROP TABLE IF EXISTS community ''' , parse = False )
cursor . execute ( ''' CREATE TABLE community(id) ''' , parse = False )
cursor . execute ( ''' INSERT INTO community VALUES(?) ''' , ( community_id , ) , parse = False )
cursor . execute ( ''' DROP TABLE IF EXISTS database ''' , parse = False )
2019-05-31 15:42:39 +02:00
cursor . execute ( ''' CREATE TABLE database(id,name,datecreated,status,matches,docname,docsnumber,notified) ''' , parse = False )
2018-03-10 18:32:45 +01:00
cursor . close ( )
2018-11-02 18:31:11 +01:00
else :
cursor = madis . functions . Connection ( database_file_name ) . cursor ( )
2018-11-28 17:55:22 +01:00
cursor . execute ( ''' DROP TABLE IF EXISTS community ''' , parse = False )
cursor . execute ( ''' CREATE TABLE community(id) ''' , parse = False )
cursor . execute ( ''' INSERT INTO community VALUES(?) ''' , ( community_id , ) , parse = False )
2018-11-02 18:31:11 +01:00
cursor . close ( )
2018-03-10 18:32:45 +01:00
else :
self . set_status ( 400 )
self . write ( " Missing cookie containing user ' s id... " )
return
self . write ( { } )
2018-02-28 12:30:50 +01:00
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
2018-11-02 18:31:11 +01:00
class GetUsersProfilesHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
self . set_header ( " Access-Control-Allow-Origin " , " * " )
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' GET, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def get ( self ) :
try :
# Check if the user has the admin parameter
if ' isinadministrators ' not in self . request . arguments or self . request . arguments [ ' isinadministrators ' ] [ 0 ] != ' true ' :
self . set_status ( 400 )
self . write ( " Must be an admin " )
return
# list users
2018-11-28 17:55:22 +01:00
users = [ re . search ( ' OAMiningProfilesDatabase_([ \\ w0-9]+).+ ' , f ) . group ( 1 ) for f in os . listdir ( ' ./users_files ' ) if re . match ( r ' OAMiningProfilesDatabase_[ \ w0-9]+ \ .db ' , f ) ]
print users
2018-11-02 18:31:11 +01:00
# for every user, read its database to find his profiles
import sys
sys . path . append ( msettings . MADIS_PATH )
import madis
# data to be sent
data = { }
users_profiles = [ ]
for user in users :
database_file_name = " users_files/OAMiningProfilesDatabase_ {0} .db " . format ( user )
if not os . path . isfile ( database_file_name ) :
self . set_status ( 400 )
self . write ( " Missing user \' s database " )
return
# get the database cursor
cursor = madis . functions . Connection ( database_file_name ) . cursor ( )
2019-05-31 15:42:39 +02:00
cursor . execute ( ''' ALTER TABLE database ADD COLUMN notified INTEGER DEFAULT 0 ''' )
2018-11-02 18:31:11 +01:00
try :
# get community id
2018-11-28 17:55:22 +01:00
community_id = [ r for r in cursor . execute ( ''' SELECT id FROM community ''' ) ] [ 0 ]
2018-11-02 18:31:11 +01:00
except Exception as ints :
print ints
community_id = ' Unkown ' + user
2019-05-29 16:02:54 +02:00
for r in cursor . execute ( ''' SELECT id,name,datecreated,status,matches,docname,notified FROM database order by rowid desc ''' ) :
users_profiles . append ( { " user " : community_id , " userId " : user , " profileId " : r [ 0 ] , " profile " : r [ 1 ] , " datecreated " : r [ 2 ] , " status " : r [ 3 ] , " matches " : r [ 4 ] , " docname " : r [ 5 ] , " notified " : r [ 6 ] } )
2018-11-02 18:31:11 +01:00
data [ ' profiles ' ] = users_profiles
self . write ( json . dumps ( data ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class UpdateProfileStatusHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
self . set_header ( " Access-Control-Allow-Origin " , " * " )
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' POST, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def post ( self ) :
try :
# get user id from body. Must have
request_arguments = json . loads ( self . request . body )
if ' isinadministrators ' not in request_arguments or request_arguments [ ' isinadministrators ' ] != ' true ' :
self . set_status ( 400 )
self . write ( " Must be an admin " )
return
import sys
sys . path . append ( msettings . MADIS_PATH )
import madis
2018-11-28 17:55:22 +01:00
user = request_arguments [ ' user ' ] [ : 128 ]
profile_id = request_arguments [ ' id ' ] [ : 128 ]
2018-11-02 18:31:11 +01:00
database_file_name = " users_files/OAMiningProfilesDatabase_ {0} .db " . format ( user )
if not os . path . isfile ( database_file_name ) :
self . set_status ( 400 )
self . write ( " Missing user \' s database " )
return
cursor = madis . functions . Connection ( database_file_name ) . cursor ( )
# Write new Profile status to users database
status = request_arguments [ ' status ' ]
2019-05-29 16:02:54 +02:00
cursor . execute ( ''' UPDATE database set status=? where id=? ''' , ( status , profile_id , ) , parse = False )
2018-11-02 18:31:11 +01:00
cursor . close ( )
self . write ( json . dumps ( { } ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
2018-02-28 12:30:50 +01:00
class GetUserProfilesHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' GET, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def get ( self ) :
try :
2018-03-10 18:32:45 +01:00
# get user id from arguments. Must have
if ' user ' not in self . request . arguments or self . request . arguments [ ' user ' ] [ 0 ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id parameter " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = self . request . arguments [ ' user ' ] [ 0 ] [ : 128 ]
2018-02-28 12:30:50 +01:00
# extract data from database
import sys
sys . path . append ( msettings . MADIS_PATH )
import madis
# database file name
2018-03-13 16:02:55 +01:00
database_file_name = " users_files/OAMiningProfilesDatabase_ {0} .db " . format ( user_id )
2018-03-08 11:19:06 +01:00
if not os . path . isfile ( database_file_name ) :
self . set_status ( 400 )
self . write ( " Missing user \' s database " )
return
2018-02-28 12:30:50 +01:00
# get the database cursor
cursor = madis . functions . Connection ( database_file_name ) . cursor ( )
# data to be sent
data = { }
user_profiles = [ ]
2019-05-29 16:02:54 +02:00
for r in cursor . execute ( ''' SELECT id,name,datecreated,status,matches,docname,notified FROM database order by rowid desc ''' ) :
user_profiles . append ( { " id " : r [ 0 ] , " name " : r [ 1 ] , " datecreated " : r [ 2 ] , " status " : r [ 3 ] , " matches " : r [ 4 ] , " docname " : r [ 5 ] , " notified " : r [ 6 ] } )
2018-11-02 18:31:11 +01:00
data [ ' profiles ' ] = user_profiles
2018-02-28 12:30:50 +01:00
cursor . close ( )
self . write ( json . dumps ( data ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class LoadUserProfileHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' POST, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def post ( self ) :
try :
2018-03-10 18:32:45 +01:00
# get user id from body. Must have
request_arguments = json . loads ( self . request . body )
if ' user ' not in request_arguments or request_arguments [ ' user ' ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id argument " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = request_arguments [ ' user ' ] [ : 128 ]
2018-02-28 12:30:50 +01:00
# get data
2018-03-10 18:32:45 +01:00
if ' id ' not in request_arguments or request_arguments [ ' id ' ] == ' ' :
self . set_status ( 400 )
self . write ( " Missing profiles id argument " )
return
2018-11-28 17:55:22 +01:00
profile_id = request_arguments [ ' id ' ] [ : 128 ]
2018-02-28 12:30:50 +01:00
# delete profile from database
import sys
sys . path . append ( msettings . MADIS_PATH )
import madis
# database file name
2018-03-13 16:02:55 +01:00
database_file_name = " users_files/OAMiningProfilesDatabase_ {0} .db " . format ( user_id )
2018-02-28 12:30:50 +01:00
# get the database cursor
cursor = madis . functions . Connection ( database_file_name ) . cursor ( )
# check if this profile exists
2018-11-28 17:55:22 +01:00
profile_data = [ r for r in cursor . execute ( ''' SELECT docname,docsnumber FROM database WHERE id=? ''' , ( profile_id , ) ) ]
2018-02-28 12:30:50 +01:00
if len ( profile_data ) == 0 :
self . set_status ( 400 )
self . write ( " There is no profile with this name " )
cursor . close ( )
return
cursor . close ( )
# check if profile file exists on the disk
2018-03-13 16:02:55 +01:00
file_name = " users_files/OAMiningProfile_ %s _ %s .oamp " % ( user_id , profile_id )
2018-02-28 12:30:50 +01:00
if not os . path . isfile ( file_name ) :
self . set_status ( 400 )
self . write ( " There is no profile file with this name " )
return
# reset everything
deleteAllUserFiles ( user_id )
loadProfileDocs ( user_id , profile_id )
data = loadProfile ( file_name , user_id )
data [ ' docname ' ] = profile_data [ 0 ] [ 0 ]
data [ ' docsnumber ' ] = profile_data [ 0 ] [ 1 ]
self . write ( json . dumps ( data ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class DeleteUserProfileHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' POST, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def post ( self ) :
try :
2018-03-10 18:32:45 +01:00
# get user id from body. Must have
request_arguments = json . loads ( self . request . body )
if ' user ' not in request_arguments or request_arguments [ ' user ' ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id argument " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = request_arguments [ ' user ' ] [ : 128 ]
2018-02-28 12:30:50 +01:00
# get data
2018-03-10 18:32:45 +01:00
if ' id ' not in request_arguments or request_arguments [ ' id ' ] == ' ' :
self . set_status ( 400 )
self . write ( " Missing profiles id argument " )
return
2018-11-28 17:55:22 +01:00
profile_id = request_arguments [ ' id ' ] [ : 128 ]
2018-02-28 12:30:50 +01:00
# delete profile from database
import sys
sys . path . append ( msettings . MADIS_PATH )
import madis
# database file name
2018-03-13 16:02:55 +01:00
database_file_name = " users_files/OAMiningProfilesDatabase_ {0} .db " . format ( user_id )
2018-02-28 12:30:50 +01:00
# get the database cursor
cursor = madis . functions . Connection ( database_file_name ) . cursor ( )
# data to be sent
2018-11-28 17:55:22 +01:00
cursor . execute ( ''' DELETE FROM database WHERE id=? ''' , ( profile_id , ) , parse = False )
2018-02-28 12:30:50 +01:00
cursor . close ( )
# delete profile from disk
2018-03-13 16:02:55 +01:00
file_name = " users_files/OAMiningProfile_ %s _ %s .oamp " % ( user_id , profile_id )
if os . path . isfile ( file_name ) :
os . remove ( file_name )
# delete profile docs from disk
file_name = " users_files/OAMiningDocs_ {0} _ {1} .json " . format ( user_id , profile_id )
2018-02-28 12:30:50 +01:00
if os . path . isfile ( file_name ) :
os . remove ( file_name )
self . write ( json . dumps ( { } ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class GetExampleProfilesHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' GET, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def get ( self ) :
try :
data = { }
example_profiles = [ ]
2018-04-18 17:00:21 +02:00
example_profiles . append ( { ' name ' : ' Clarin ' , ' contents ' : 4 , ' documents ' : 9 } )
2018-03-26 12:55:47 +02:00
example_profiles . append ( { ' name ' : ' Communities ' , ' contents ' : 25 , ' documents ' : 104 } )
example_profiles . append ( { ' name ' : ' AOF ' , ' contents ' : 66 , ' documents ' : 1023 } )
example_profiles . append ( { ' name ' : ' RCUK ' , ' contents ' : 263 , ' documents ' : 140 } )
example_profiles . append ( { ' name ' : ' TARA ' , ' contents ' : 4 , ' documents ' : 502 } )
2018-02-28 12:30:50 +01:00
data [ ' profiles ' ] = example_profiles
self . write ( json . dumps ( data ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class CreateNewProfileHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' GET, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def get ( self ) :
try :
2018-03-10 18:32:45 +01:00
# get user id from arguments. Must have
if ' user ' not in self . request . arguments or self . request . arguments [ ' user ' ] [ 0 ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id parameter " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = self . request . arguments [ ' user ' ] [ 0 ] [ : 128 ]
2018-02-28 12:30:50 +01:00
deleteAllUserFiles ( user_id )
self . write ( json . dumps ( { } ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class LoadExampleProfileHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
2018-04-12 12:48:02 +02:00
self . set_header ( ' Access-Control-Allow-Methods ' , ' POST, OPTIONS ' )
2018-02-28 12:30:50 +01:00
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
2018-04-12 12:48:02 +02:00
def post ( self ) :
2018-02-28 12:30:50 +01:00
try :
2018-04-12 12:48:02 +02:00
# get user id from body. Must have
request_arguments = json . loads ( self . request . body )
if ' user ' not in request_arguments or request_arguments [ ' user ' ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id parameter " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = request_arguments [ ' user ' ] [ : 128 ]
2018-04-12 12:48:02 +02:00
# get data
if ' name ' not in request_arguments or request_arguments [ ' name ' ] == ' ' :
self . set_status ( 400 )
self . write ( " Missing example profiles name parameter " )
return
2018-11-28 17:55:22 +01:00
example_name = request_arguments [ ' name ' ] [ : 128 ]
2018-02-28 12:30:50 +01:00
# reset everything
deleteAllUserFiles ( user_id )
2018-04-12 12:48:02 +02:00
data = { }
if example_name == ' Clarin ' :
data = loadProfile ( " static/example {0} Profile.oamp " . format ( example_name ) , user_id )
data [ ' docname ' ] = example_name
data [ ' docsnumber ' ] = loadExampleDocs ( " static/example {0} Docs.json " . format ( example_name ) , user_id )
else :
# load example data
data = loadExampleProfile ( user_id )
data [ ' docname ' ] = ' Example '
data [ ' docsnumber ' ] = loadExampleDocs ( " static/exampleDocs.txt " , user_id )
2018-02-28 12:30:50 +01:00
self . write ( json . dumps ( data ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class UploadProfileHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' POST, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def post ( self ) :
try :
2018-03-10 18:32:45 +01:00
# get user id from arguments. Must have
if ' user ' not in self . request . arguments or self . request . arguments [ ' user ' ] [ 0 ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id parameter " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = self . request . arguments [ ' user ' ] [ 0 ] [ : 128 ]
2018-02-28 12:30:50 +01:00
# get file info and body from post data
2018-11-28 17:55:22 +01:00
fileinfo = self . request . files [ ' upload ' ] [ 0 ] [ : 128 ]
2018-02-28 12:30:50 +01:00
fname = fileinfo [ ' filename ' ]
extn = os . path . splitext ( fname ) [ 1 ]
# must be .pdf or .json
if extn != " .oamp " :
self . write ( json . dumps ( { ' respond ' : " <b style= \" color: red \" >File must be .oamp compatible profile</b> " } ) )
return
# write data to physical file
2018-03-13 16:02:55 +01:00
cname = " users_files/profile {0} .oamp " . format ( user_id )
2018-02-28 12:30:50 +01:00
fh = open ( cname , ' w ' )
fh . write ( fileinfo [ ' body ' ] )
fh . close ( )
data = loadProfile ( cname , user_id )
self . write ( json . dumps ( data ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class AlreadyConceptsHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' GET, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def get ( self ) :
try :
2018-03-10 18:32:45 +01:00
# get user id from arguments. Must have
if ' user ' not in self . request . arguments or self . request . arguments [ ' user ' ] [ 0 ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id parameter " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = self . request . arguments [ ' user ' ] [ 0 ] [ : 128 ]
2018-02-28 12:30:50 +01:00
data = { }
data [ ' data ' ] = { }
2018-03-13 16:02:55 +01:00
file_name = " users_files/p %s .tsv " % ( user_id )
2018-02-28 12:30:50 +01:00
if os . path . isfile ( file_name ) :
codes = { }
num_lines = 0
for line in open ( file_name ) :
columns = re . split ( r ' \ t+ ' , line . rstrip ( ' \t \n \r ' ) )
if len ( columns ) and columns [ 0 ] == ' ' :
continue
elif len ( columns ) > 1 :
codes [ columns [ 0 ] ] = columns [ 1 ]
elif len ( columns ) == 1 :
codes [ columns [ 0 ] ] = ' '
num_lines + = 1
2018-03-10 18:32:45 +01:00
# get user id from arguments. Must have
if ' concepts ' in self . request . arguments and self . request . arguments [ ' concepts ' ] [ 0 ] == str ( num_lines ) :
2018-02-28 12:30:50 +01:00
data [ ' data ' ] = codes
self . write ( json . dumps ( data ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class UploadContentFileHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' POST, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def post ( self ) :
try :
2018-03-10 18:32:45 +01:00
# get user id from arguments. Must have
if ' user ' not in self . request . arguments or self . request . arguments [ ' user ' ] [ 0 ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id parameter " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = self . request . arguments [ ' user ' ] [ 0 ] [ : 128 ]
2018-02-28 12:30:50 +01:00
# get file info and body from post data
2018-11-28 17:55:22 +01:00
fileinfo = self . request . files [ ' upload ' ] [ 0 ] [ : 128 ]
2018-02-28 12:30:50 +01:00
fname = fileinfo [ ' filename ' ]
extn = os . path . splitext ( fname ) [ 1 ]
# must be .pdf or .json
if extn != " .tsv " and extn != " .txt " :
self . set_status ( 400 )
self . write ( " File must be .tsv or .txt... " )
return
codes = { }
lines = fileinfo [ ' body ' ] . splitlines ( )
for line in lines :
columns = re . split ( r ' \ t+ ' , line . rstrip ( ' \t \n \r ' ) )
if len ( columns ) and columns [ 0 ] == ' ' :
continue
elif len ( columns ) > 1 :
codes [ columns [ 0 ] ] = columns [ 1 ]
elif len ( columns ) == 1 :
codes [ columns [ 0 ] ] = ' '
# data to be sent
data = { }
if len ( lines ) == 1 :
data [ ' error ' ] = " File <b> {0} </b> uploaded.<br><b>1 Code</b> loaded! <i>Please make sure that you separate each code with newline!</i> " . format ( fname )
else :
data [ ' data ' ] = codes
data [ ' respond ' ] = " <b> {0} Codes</b> loaded successfully! " . format ( len ( lines ) )
2018-03-10 18:32:45 +01:00
data [ ' concepts ' ] = str ( len ( lines ) )
2018-02-28 12:30:50 +01:00
self . write ( json . dumps ( data ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class UpdateConceptsHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' POST, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def post ( self ) :
try :
2018-03-10 18:32:45 +01:00
# get user id from body. Must have
request_arguments = json . loads ( self . request . body )
if ' user ' not in request_arguments or request_arguments [ ' user ' ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id argument " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = request_arguments [ ' user ' ] [ : 128 ]
2018-02-28 12:30:50 +01:00
# get data
concepts = json . loads ( json . loads ( self . request . body ) [ ' concepts ' ] )
# write data to physical file
2018-03-13 16:02:55 +01:00
cname = " users_files/p {0} .tsv " . format ( user_id )
2018-02-28 12:30:50 +01:00
fh = open ( cname , ' w ' )
concepts_len = 0
for key , value in concepts . iteritems ( ) :
if key == ' ' :
continue
concepts_len + = 1
fh . write ( " {0} \t {1} \n " . format ( key , value ) )
fh . close ( )
# data to be sent
data = { }
if concepts_len == 0 :
self . set_status ( 400 )
self . write ( " You have to provide at least one concept to continue! " )
return
else :
data [ ' respond ' ] = " <b> {0} Codes</b> loaded successfully! " . format ( concepts_len )
2018-03-10 18:32:45 +01:00
data [ ' concepts ' ] = str ( concepts_len )
2018-02-28 12:30:50 +01:00
self . write ( json . dumps ( data ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class GetDocSamplesHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' GET, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def get ( self ) :
try :
2018-03-10 18:32:45 +01:00
# get user id from arguments. Must have
if ' user ' not in self . request . arguments or self . request . arguments [ ' user ' ] [ 0 ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id parameter " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = self . request . arguments [ ' user ' ] [ 0 ] [ : 128 ]
2018-02-28 12:30:50 +01:00
data = { }
doc_samples = [ ]
doc_samples . append ( { ' name ' : ' Egi ' , ' documents ' : 104 } )
2018-04-18 17:00:21 +02:00
doc_samples . append ( { ' name ' : ' Clarin ' , ' documents ' : 7 } )
doc_samples . append ( { ' name ' : ' Wellcome Trust ' , ' documents ' : 250 } )
2018-03-26 12:55:47 +02:00
doc_samples . append ( { ' name ' : ' ARIADNE ' , ' documents ' : 502 } )
doc_samples . append ( { ' name ' : ' RCUK ' , ' documents ' : 104 } )
doc_samples . append ( { ' name ' : ' TARA ' , ' documents ' : 1023 } )
doc_samples . append ( { ' name ' : ' NIH ' , ' documents ' : 140 } )
2018-02-28 12:30:50 +01:00
data [ ' documents ' ] = doc_samples
self . write ( json . dumps ( data ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class UploadDocumentsHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' POST, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def post ( self ) :
try :
2018-03-10 18:32:45 +01:00
# get user id from arguments. Must have
if ' user ' not in self . request . arguments or self . request . arguments [ ' user ' ] [ 0 ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id parameter " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = self . request . arguments [ ' user ' ] [ 0 ] [ : 128 ]
fileinfo = self . request . files [ ' upload ' ] [ 0 ] [ : 128 ]
2018-02-28 12:30:50 +01:00
fname = fileinfo [ ' filename ' ]
extn = os . path . splitext ( fname ) [ 1 ]
# data to be sent
data = { }
# must be .pdf, .txt or .json
if extn != " .pdf " and extn != " .txt " and extn != " .json " :
self . set_status ( 400 )
self . write ( " File must be .pdf, .json or .txt " )
return
return
# write data to physical file
2018-03-13 16:02:55 +01:00
cname = " users_files/docs {0} {1} " . format ( user_id , extn )
2018-02-28 12:30:50 +01:00
fh = open ( cname , ' w ' )
fh . write ( fileinfo [ ' body ' ] )
fh . close ( )
# Convert pdf to txt and then to json format
if extn == " .pdf " :
import subprocess as sub
p = sub . Popen ( [ ' pdftotext ' , ' -enc ' , ' UTF-8 ' , cname ] , stdout = sub . PIPE , stderr = sub . PIPE )
output , errors = p . communicate ( )
if errors :
self . set_status ( 400 )
self . write ( " An error occurred when trying to convert .pdf to .txt... " )
return
os . remove ( cname )
2018-03-13 16:02:55 +01:00
cname = " users_files/docs {0} .txt " . format ( user_id )
2018-02-28 12:30:50 +01:00
with open ( cname , ' r ' ) as fin :
docData = fin . read ( ) . replace ( ' \n ' , ' ' )
if len ( docData ) == 0 :
self . set_status ( 400 )
self . write ( " An error occurred when trying to convert .pdf to text... " )
return
2018-03-13 16:02:55 +01:00
with open ( " users_files/docs {0} .json " . format ( user_id ) , " wb " ) as fout :
2018-02-28 12:30:50 +01:00
json . dump ( { " text " : docData , " id " : os . path . splitext ( fname ) [ 0 ] } , fout )
os . remove ( cname )
# else check if txt is in correct json format
elif extn == " .txt " or extn == " .json " :
try :
jsonlist = [ ]
for line in open ( cname , ' r ' ) :
jsonlist . append ( json . loads ( line ) )
2018-03-13 16:02:55 +01:00
os . rename ( cname , " users_files/docs {0} .json " . format ( user_id ) )
2018-02-28 12:30:50 +01:00
except ValueError , e :
self . set_status ( 400 )
self . write ( " File is not in a valid json format... " )
os . remove ( cname )
print e
return
2018-03-13 16:02:55 +01:00
file_name = " users_files/docs %s .json " % ( user_id )
2018-02-28 12:30:50 +01:00
if os . path . isfile ( file_name ) :
lines = sum ( 1 for line in open ( file_name ) )
data [ ' respond ' ] = " <b> {0} Documents</b> loaded successfully! " . format ( lines )
data [ ' data ' ] = lines
self . write ( json . dumps ( data ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class ChooseDocSampleHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' POST, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def post ( self ) :
try :
2018-03-10 18:32:45 +01:00
# get user id from body. Must have
request_arguments = json . loads ( self . request . body )
if ' user ' not in request_arguments or request_arguments [ ' user ' ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id argument " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = request_arguments [ ' user ' ] [ : 128 ]
2018-03-10 18:32:45 +01:00
if ' docsample ' not in request_arguments or request_arguments [ ' docsample ' ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
self . write ( " A doc sample name must be provided " )
return
2018-11-28 17:55:22 +01:00
doc_sample = request_arguments [ ' docsample ' ] [ : 128 ]
2018-02-28 12:30:50 +01:00
sample_file_name = " "
if doc_sample == " Egi " :
sample_file_name = " static/egi_sample.tsv "
2018-04-12 12:48:02 +02:00
elif doc_sample == " Clarin " :
2018-04-18 17:00:21 +02:00
sample_file_name = " static/exampleClarinDocs.json "
elif doc_sample == " Wellcome Trust " :
sample_file_name = " static/exampleWTDocs.json "
2018-02-28 12:30:50 +01:00
else :
self . set_status ( 400 )
self . write ( " No Doc sample with this name " )
return
sample_file = open ( sample_file_name , ' r ' )
# write data to physical file
2018-03-13 16:02:55 +01:00
cname = " users_files/docs {0} .json " . format ( user_id )
2018-02-28 12:30:50 +01:00
fh = open ( cname , ' w ' )
while 1 :
copy_buffer = sample_file . read ( 1048576 )
if not copy_buffer :
break
fh . write ( copy_buffer )
fh . close ( )
lines_num = sum ( 1 for line in open ( cname ) )
# data to be sent
data = { }
if lines_num == 0 :
self . set_status ( 400 )
self . write ( " File must contain atleast one document... " )
return
else :
data [ ' respond ' ] = " <b> {0} Documents</b> loaded successfully! " . format ( lines_num )
data [ ' data ' ] = lines_num
self . write ( json . dumps ( data ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class AlreadyDocumentsHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' GET, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def get ( self ) :
try :
2018-03-10 18:32:45 +01:00
# get user id from arguments. Must have
if ' user ' not in self . request . arguments or self . request . arguments [ ' user ' ] [ 0 ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id parameter " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = self . request . arguments [ ' user ' ] [ 0 ] [ : 128 ]
2018-02-28 12:30:50 +01:00
data = { }
if msettings . RESET_FIELDS == 1 :
data [ ' data ' ] = - 1
else :
data [ ' data ' ] = 0
2018-03-13 16:02:55 +01:00
file_name = " users_files/docs %s .json " % ( user_id )
2018-02-28 12:30:50 +01:00
if os . path . isfile ( file_name ) :
data [ ' data ' ] = sum ( 1 for line in open ( file_name ) )
msettings . RESET_FIELDS = 0
self . write ( json . dumps ( data ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class RunMiningHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' POST, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def post ( self ) :
try :
2018-03-10 18:32:45 +01:00
# get user id from body. Must have
request_arguments = json . loads ( self . request . body )
if ' user ' not in request_arguments or request_arguments [ ' user ' ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id argument " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = request_arguments [ ' user ' ] [ : 128 ]
2018-03-10 18:32:45 +01:00
mining_parameters = request_arguments [ ' parameters ' ]
2018-02-28 12:30:50 +01:00
# get the database cursor
cursor = msettings . Connection . cursor ( )
# data to be sent
data = { }
# set the textwindow size
extracontextprev = 10
extracontextnext = 10
contextprev = 10
contextnext = 5
# Automatically find middle size from grant codes white spaces
2018-11-28 17:55:22 +01:00
querygrantsize = ''' SELECT max(p1) FROM (SELECT regexpcountwords( ' \ s ' ,stripchars(p1)) AS p1 FROM (setschema ' p1,p2 ' file ' users_files/p {0} .tsv ' dialect:tsv)) ''' . format ( user_id )
2018-02-28 12:30:50 +01:00
contextmiddle = [ r for r in cursor . execute ( querygrantsize ) ] [ 0 ] [ 0 ] + 1
2018-03-10 18:32:45 +01:00
if ' contextprev ' in mining_parameters and mining_parameters [ ' contextprev ' ] != ' ' :
contextprev = int ( mining_parameters [ ' contextprev ' ] )
2018-04-18 17:00:21 +02:00
if contextprev < 0 or contextprev > 50 :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
self . write ( " Context size must be in its limits... " )
return
2018-03-10 18:32:45 +01:00
if ' contextnext ' in mining_parameters and mining_parameters [ ' contextnext ' ] != ' ' :
contextnext = int ( mining_parameters [ ' contextnext ' ] )
2018-04-18 17:00:21 +02:00
if contextnext < 0 or contextnext > 50 :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
self . write ( " Context size must be in its limits... " )
return
j2sextraprev = " j2s(prev1 "
for cnt in xrange ( 2 , extracontextprev + 1 ) :
j2sextraprev + = " ,prev " + str ( cnt )
j2sextraprev + = " ) "
j2sprev = " "
j2scontext = " ( "
if contextprev :
j2scontext = " j2s(prev " + str ( extracontextprev + 1 )
j2sprev = " j2s(prev " + str ( extracontextprev + 1 )
for cnt in xrange ( extracontextprev + 2 , extracontextprev + contextprev + 1 ) :
j2sprev + = " ,prev " + str ( cnt )
j2scontext + = " ,prev " + str ( cnt )
j2sprev + = " ) "
j2scontext + = " , "
else :
j2scontext = " j2s( "
j2snext = " j2s(next1 "
j2scontext + = " middle "
if contextnext :
j2scontext + = " ,next1 "
for cnt in xrange ( 2 , contextnext + 1 ) :
j2snext + = " ,next " + str ( cnt )
j2scontext + = " ,next " + str ( cnt )
j2snext + = " ) "
j2scontext + = " ) "
j2sextranext = " j2s(next " + str ( contextnext + 1 )
for cnt in xrange ( contextnext + 2 , extracontextnext + contextnext + 1 ) :
j2sextranext + = " ,next " + str ( cnt )
j2sextranext + = " ) "
# create positive and negative words weighted regex text
pos_set = neg_set = conf = whr_conf = ' '
2018-03-10 18:32:45 +01:00
if ' poswords ' in mining_parameters and mining_parameters [ ' poswords ' ] != ' {} ' :
2018-02-28 12:30:50 +01:00
data [ ' poswords ' ] = [ ]
# construct math string for positive words matching calculation with weights
2018-03-10 18:32:45 +01:00
pos_words = json . loads ( mining_parameters [ ' poswords ' ] )
2018-02-28 12:30:50 +01:00
for key , value in pos_words . iteritems ( ) :
# MONO GIA TO EGI
2018-03-26 12:55:47 +02:00
if ' lowercase ' in mining_parameters and mining_parameters [ ' lowercase ' ] == 1 :
2018-03-23 15:02:35 +01:00
key = key . decode ( ' utf-8 ' ) . lower ( )
2018-03-26 12:55:47 +02:00
if ' stemming ' in mining_parameters and mining_parameters [ ' stemming ' ] == 1 :
2018-03-23 15:02:35 +01:00
key = ' stem( ' + key + ' ) '
pos_set + = r ' regexpcountuniquematches( " %s " , %s )* %s + ' % ( key , j2scontext , value )
2018-02-28 12:30:50 +01:00
# ORIGINAL
# pos_set += r'regexpcountuniquematches("(?:\b)%s(?:\b)",j2s(prev,middle,next))*%s + ' % (key,value)
data [ ' poswords ' ] . append ( key )
pos_set + = " 0 "
2018-03-10 18:32:45 +01:00
if ' negwords ' in mining_parameters and mining_parameters [ ' negwords ' ] != ' {} ' :
2018-02-28 12:30:50 +01:00
data [ ' negwords ' ] = [ ]
# construct math string for negative words matching calculation with weights
2018-03-10 18:32:45 +01:00
neg_words = json . loads ( mining_parameters [ ' negwords ' ] )
2018-02-28 12:30:50 +01:00
for key , value in neg_words . iteritems ( ) :
# MONO GIA TO EGI
2018-03-26 12:55:47 +02:00
if ' lowercase ' in mining_parameters and mining_parameters [ ' lowercase ' ] == 1 :
2018-03-23 15:02:35 +01:00
key = key . decode ( ' utf-8 ' ) . lower ( )
2018-03-26 12:55:47 +02:00
if ' stemming ' in mining_parameters and mining_parameters [ ' stemming ' ] == 1 :
2018-03-23 15:02:35 +01:00
key = ' stem( ' + key + ' ) '
neg_set + = r ' regexpcountuniquematches( " %s " , %s )* %s + ' % ( key , j2scontext , value )
2018-02-28 12:30:50 +01:00
# ORIGINAL
# neg_set += r'regexpcountuniquematches("(?:\b)%s(?:\b)",j2s(prev,middle,next))*%s - ' % (key,value)
data [ ' negwords ' ] . append ( key )
neg_set + = " 0 "
if pos_set != ' ' and neg_set != ' ' :
2018-04-18 17:00:21 +02:00
conf = " , ( {0} - ( {1} )) " . format ( pos_set , neg_set )
2018-02-28 12:30:50 +01:00
elif pos_set != ' ' :
conf = " , {0} " . format ( pos_set )
elif neg_set != ' ' :
2018-04-18 17:00:21 +02:00
conf = " , -( {0} ) " . format ( neg_set )
2018-02-28 12:30:50 +01:00
if conf != ' ' :
conf + = ' as conf '
whr_conf = ' and conf>=0 '
2018-03-26 12:55:47 +02:00
print conf
2018-02-28 12:30:50 +01:00
2018-11-28 17:55:22 +01:00
# docs proccess
2018-02-28 12:30:50 +01:00
if numberOfDocsUploaded ( user_id ) != 0 :
2018-11-28 17:55:22 +01:00
document_source = ' d2 '
if ' documentarea ' in mining_parameters and mining_parameters [ ' documentarea ' ] != ' ' :
print mining_parameters [ ' documentarea ' ]
if mining_parameters [ ' documentarea ' ] == ' acknowledgment ' :
document_source = ' textacknowledgments( ' + document_source + ' ) '
elif mining_parameters [ ' documentarea ' ] == ' citations ' :
document_source = ' textreferences( ' + document_source + ' ) '
doc_filters = " comprspaces(regexpr( ' [ \n | \r ] ' , " + document_source + " , ' ' )) "
2018-04-12 12:48:02 +02:00
grant_filters = " stripchars(comprspaces(regexpr( \" \\ ' \" , p1, ' ' ))) "
2018-02-28 12:30:50 +01:00
ackn_filters = " comprspaces(regexpr( \" \\ ' \" , p2, ' ' )) "
2018-03-26 12:55:47 +02:00
if ' punctuation ' in mining_parameters and mining_parameters [ ' punctuation ' ] == 1 :
2018-02-28 12:30:50 +01:00
doc_filters = ' keywords( ' + doc_filters + ' ) '
2018-04-12 12:48:02 +02:00
grant_filters = ' keywords( ' + grant_filters + ' ) '
2018-02-28 12:30:50 +01:00
ackn_filters = ' keywords( ' + ackn_filters + ' ) '
2018-03-26 12:55:47 +02:00
if ' lowercase ' in mining_parameters and mining_parameters [ ' lowercase ' ] == 1 :
2018-03-22 22:32:33 +01:00
doc_filters = ' lower( ' + doc_filters + ' ) '
2018-04-12 12:48:02 +02:00
grant_filters = ' lower( ' + grant_filters + ' ) '
2018-03-22 22:32:33 +01:00
ackn_filters = ' lower( ' + ackn_filters + ' ) '
2018-03-26 12:55:47 +02:00
if ' stopwords ' in mining_parameters and mining_parameters [ ' stopwords ' ] == 1 :
2018-02-28 12:30:50 +01:00
doc_filters = ' filterstopwords( ' + doc_filters + ' ) '
2018-04-12 12:48:02 +02:00
grant_filters = ' filterstopwords( ' + grant_filters + ' ) '
2018-02-28 12:30:50 +01:00
ackn_filters = ' filterstopwords( ' + ackn_filters + ' ) '
2018-03-26 12:55:47 +02:00
if ' stemming ' in mining_parameters and mining_parameters [ ' stemming ' ] == 1 :
2018-03-23 15:02:35 +01:00
doc_filters = ' stem( ' + doc_filters + ' ) '
2018-04-12 12:48:02 +02:00
grant_filters = ' stem( ' + grant_filters + ' ) '
2018-03-23 15:02:35 +01:00
ackn_filters = ' stem( ' + ackn_filters + ' ) '
2018-02-28 12:30:50 +01:00
list ( cursor . execute ( " drop table if exists grantstemp " + user_id , parse = False ) )
2018-04-12 12:48:02 +02:00
query_pre_grants = " create temp table grantstemp {0} as select {1} as gt1, case when p2 is null then null else {2} end as gt2 from (setschema ' p1,p2 ' file ' users_files/p {0} .tsv ' dialect:tsv) " . format ( user_id , grant_filters , ackn_filters )
2018-02-28 12:30:50 +01:00
cursor . execute ( query_pre_grants )
2018-11-28 17:55:22 +01:00
# query00get = "select * from grantstemp{0}".format(user_id)
# results00get = [r for r in cursor.execute(query00get)]
# print results00get
2018-02-28 12:30:50 +01:00
list ( cursor . execute ( " drop table if exists docs " + user_id , parse = False ) )
2018-03-13 16:02:55 +01:00
query1 = " create temp table docs {0} as select d1, {1} as d2 from (setschema ' d1,d2 ' select jsonpath(c1, ' $.id ' , ' $.text ' ) from (file ' users_files/docs {0} .json ' )) " . format ( user_id , doc_filters )
2018-02-28 12:30:50 +01:00
cursor . execute ( query1 )
else :
self . set_status ( 400 )
self . write ( " You have to provide atleast 1 document... " )
return
2018-11-28 17:55:22 +01:00
# grants proccess
2018-02-28 12:30:50 +01:00
list ( cursor . execute ( " drop table if exists grants " + user_id , parse = False ) )
# string concatenation workaround because of the special characters conflicts
2018-03-10 18:32:45 +01:00
if ' wordssplitnum ' in mining_parameters and mining_parameters [ ' wordssplitnum ' ] != ' ' :
words_split = int ( mining_parameters [ ' wordssplitnum ' ] )
2018-03-26 12:55:47 +02:00
gt2 = ' comprspaces(gt2) '
if ' lowercase ' in mining_parameters and mining_parameters [ ' lowercase ' ] == 1 :
2018-03-22 22:32:33 +01:00
gt2 = ' lower( ' + gt2 + ' ) '
2018-03-26 12:55:47 +02:00
if ' stemming ' in mining_parameters and mining_parameters [ ' stemming ' ] == 1 :
2018-03-23 15:02:35 +01:00
gt2 = ' stem( ' + gt2 + ' ) '
2018-02-28 12:30:50 +01:00
# MONO GIA TO EGI
2018-03-22 22:32:33 +01:00
if 0 < words_split and words_split < = 20 :
acknowledgment_split = r ' textwindow2s( ' + gt2 + ' ,0, ' + str ( words_split ) + r ' ,0) '
2018-02-28 12:30:50 +01:00
else :
2018-03-22 22:32:33 +01:00
acknowledgment_split = r ' " dummy " as prev, ' + gt2 + ' as middle, " dummy " as next '
2018-02-28 12:30:50 +01:00
# ORIGINAL
2018-03-22 22:32:33 +01:00
# if 0 < words_split and words_split <= 20:
2018-02-28 12:30:50 +01:00
# acknowledgment_split = r'textwindow2s(regexpr("([\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|])", gt2, "\\\1"),0,'+str(words_split)+r',0)'
# else:
# acknowledgment_split = r'"dummy" as prev, regexpr("([\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|])", gt2, "\\\1") as middle, "dummy" as next'
# query0 = r"create temp table grants"+user_id+r' as select gt1 as g1, jmergeregexp(jgroup("(?<=[\s\b])"||middle||"(?=[\s\b])")) as g2 from '+r"(setschema 'gt1,prev,middle,next' select gt1, "+acknowledgment_split+r' from grantstemp'+user_id+r' where (gt1 or gt1!="") and gt2 not null) group by gt1 union all select distinct gt1 as g1, "(?!.*)" as g2 from grantstemp'+user_id+r" where (gt1 or gt1!='') and gt2 is null"
2018-03-26 12:55:47 +02:00
query0 = r " create temp table grants " + user_id + r ' as select gt1 as g1, jmergeregexp(jgroup(middle)) as g2 from ' + r " (setschema ' gt1,prev,middle,next ' select gt1, " + acknowledgment_split + r ' from grantstemp ' + user_id + r ' where (gt1 or gt1!= " " ) and gt2 != " " ) group by gt1 union all select distinct gt1 as g1, " (.+) " as g2 from grantstemp ' + user_id + r " where (gt1 or gt1!= ' ' ) and gt2 = ' ' union all select distinct gt1 as g1, jmergeregexp(gt2) as g2 from grantstemp " + user_id + r " where (gt1 or gt1!= ' ' ) and (gt2 or gt2!= ' ' ) and regexpcountwords( ' ' , " + gt2 + r " )< " + str ( words_split ) + r " "
2018-02-28 12:30:50 +01:00
cursor . execute ( query0 )
query0get = " select * from grants {0} " . format ( user_id )
results0get = [ r for r in cursor . execute ( query0get ) ]
print results0get
# FOR EGI ONLY
2018-03-26 12:55:47 +02:00
query2 = r ' select distinct d1, r1, extraprev, prev, middle, next, extranext, case when g2= " (.+) " then " [ ] " else acknmatch end as acknmatch, max(confidence) as confidence from (select d1, regexpr( " (?: \ b| \ d| \ W)( " ||T.g1|| " )(?: \ b| \ d| \ W) " ,middle) as r1, g1, g2, regexpcountuniquematches(g2, ' + j2scontext + r ' ) as confidence, stripchars( ' + j2sextraprev + r ' ) as extraprev, stripchars( ' + j2sprev + r ' ) as prev, middle, stripchars( ' + j2snext + r ' ) as next, stripchars( ' + j2sextranext + r ' ) as extranext, ' + j2scontext + r ' as context, regexprfindall(g2, ' + j2scontext + r ' ) as acknmatch ' + conf + r ' from (select d1, textwindow(d2, ' + str ( extracontextprev + contextprev ) + r ' , ' + str ( extracontextnext + contextnext ) + r ' , ' + str ( contextmiddle ) + r ' ) from docs ' + user_id + r ' ), (select g1, g2 from grants ' + user_id + r ' ) T where r1 not null and acknmatch!= " [] " ' + whr_conf + r ' ) group by d1 '
2018-02-28 12:30:50 +01:00
# ORIGINAL
# query2 = "select d1, g1, context, acknmatch, max(confidence) as confidence from (select d1, g1, regexpcountuniquematches(g2, j2s(prev,middle,next)) as confidence, j2s(prev,middle,next) as context, regexprfindall(g2, j2s(prev,middle,next)) as acknmatch {0} from (select d1, textwindow2s(d2,20,{3},20) from docs{1}), (select g1, g2 from grants{1}) T where regexprmatches(T.g1,middle) {2}) group by d1".format(conf, user_id, whr_conf, contextmiddle)
# OLD ONE
# query2 = "select c1, c3 {0} from (select c1, textwindow2s(c2,10,1,5) from (select * from docs{1})), (select c3 from grants{1}) T where middle = T.c3 {2}".format(conf, user_id, whr_conf)
results = [ r for r in cursor . execute ( query2 ) ]
print results
doctitles = { }
for r in results :
if r [ 0 ] not in doctitles :
doctitles [ r [ 0 ] ] = [ ]
doctitles [ r [ 0 ] ] . append ( { " match " : r [ 1 ] , " extraprev " : r [ 2 ] , " prev " : r [ 3 ] , " middle " : r [ 4 ] , " next " : r [ 5 ] , " extranext " : r [ 6 ] , " acknmatch " : json . loads ( r [ 7 ] ) , " confidence " : r [ 8 ] } )
data [ ' matches ' ] = doctitles
data [ ' respond ' ] = " Matching results updated! "
self . write ( json . dumps ( data ) )
self . flush ( )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class PrepareSavedProfileHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' POST, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def post ( self ) :
try :
2018-03-10 18:32:45 +01:00
# get user id from body. Must have
request_arguments = json . loads ( self . request . body )
if ' user ' not in request_arguments or request_arguments [ ' user ' ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id argument " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = request_arguments [ ' user ' ] [ : 128 ]
2018-03-10 18:32:45 +01:00
profile_parameters = request_arguments [ ' parameters ' ]
2018-02-28 12:30:50 +01:00
import sys
sys . path . append ( msettings . MADIS_PATH )
import madis
# get the database cursor
# profile file name
2018-03-13 16:02:55 +01:00
profile_file_name = " users_files/OAMiningProfile_ {0} .oamp " . format ( user_id )
2018-02-28 12:30:50 +01:00
cursor = madis . functions . Connection ( profile_file_name ) . cursor ( )
# Create poswords table
2018-11-28 17:55:22 +01:00
cursor . execute ( ''' DROP TABLE IF EXISTS poswords ''' , parse = False )
cursor . execute ( ''' CREATE TABLE poswords(c1,c2) ''' , parse = False )
2018-02-28 12:30:50 +01:00
# Create negwords table
2018-11-28 17:55:22 +01:00
cursor . execute ( ''' DROP TABLE IF EXISTS negwords ''' , parse = False )
cursor . execute ( ''' CREATE TABLE negwords(c1,c2) ''' , parse = False )
2018-02-28 12:30:50 +01:00
# Create filters table
2018-11-28 17:55:22 +01:00
cursor . execute ( ''' DROP TABLE IF EXISTS filters ''' , parse = False )
cursor . execute ( ''' CREATE TABLE filters(c1,c2) ''' , parse = False )
2018-02-28 12:30:50 +01:00
# Create grants table
2018-11-28 17:55:22 +01:00
cursor . execute ( ''' DROP TABLE IF EXISTS grants ''' , parse = False )
cursor . execute ( ''' CREATE TABLE grants(c1,c2) ''' , parse = False )
2018-03-10 18:32:45 +01:00
if ' poswords ' in profile_parameters and profile_parameters [ ' poswords ' ] != ' {} ' :
2018-02-28 12:30:50 +01:00
# construct math string for positive words matching calculation with weights
2018-03-10 18:32:45 +01:00
pos_words = json . loads ( profile_parameters [ ' poswords ' ] )
2018-11-28 17:55:22 +01:00
cursor . executemany ( ''' INSERT INTO poswords(c1,c2) VALUES(?,?) ''' ,
2018-02-28 12:30:50 +01:00
(
( key , value , ) for key , value in pos_words . iteritems ( )
)
)
2018-03-10 18:32:45 +01:00
if ' negwords ' in profile_parameters and profile_parameters [ ' negwords ' ] != ' {} ' :
2018-02-28 12:30:50 +01:00
# construct math string for negative words matching calculation with weights
2018-03-10 18:32:45 +01:00
neg_words = json . loads ( profile_parameters [ ' negwords ' ] )
2018-11-28 17:55:22 +01:00
cursor . executemany ( ''' INSERT INTO negwords(c1,c2) VALUES(?,?) ''' ,
2018-02-28 12:30:50 +01:00
(
( key , value , ) for key , value in neg_words . iteritems ( )
)
)
2018-03-02 11:07:03 +01:00
filters = { }
2018-03-10 18:32:45 +01:00
if ' contextprev ' in profile_parameters and profile_parameters [ ' contextprev ' ] != ' ' :
filters [ ' contextprev ' ] = profile_parameters [ ' contextprev ' ]
if ' contextnext ' in profile_parameters and profile_parameters [ ' contextnext ' ] != ' ' :
filters [ ' contextnext ' ] = profile_parameters [ ' contextnext ' ]
2018-03-23 15:02:35 +01:00
if ' lowercase ' in profile_parameters and profile_parameters [ ' lowercase ' ] != ' ' :
filters [ ' lowercase ' ] = profile_parameters [ ' lowercase ' ]
2018-03-10 18:32:45 +01:00
if ' wordssplitnum ' in profile_parameters and profile_parameters [ ' wordssplitnum ' ] != ' ' :
filters [ ' wordssplitnum ' ] = profile_parameters [ ' wordssplitnum ' ]
if ' stopwords ' in profile_parameters and profile_parameters [ ' stopwords ' ] != ' ' :
filters [ ' stopwords ' ] = profile_parameters [ ' stopwords ' ]
2018-03-23 15:02:35 +01:00
if ' punctuation ' in profile_parameters and profile_parameters [ ' punctuation ' ] != ' ' :
2018-03-10 18:32:45 +01:00
filters [ ' punctuation ' ] = profile_parameters [ ' punctuation ' ]
2018-03-23 15:02:35 +01:00
if ' stemming ' in profile_parameters and profile_parameters [ ' stemming ' ] != ' ' :
filters [ ' stemming ' ] = profile_parameters [ ' stemming ' ]
2018-11-28 17:55:22 +01:00
if ' documentarea ' in profile_parameters and profile_parameters [ ' documentarea ' ] != ' ' :
filters [ ' documentarea ' ] = profile_parameters [ ' documentarea ' ]
cursor . executemany ( ''' INSERT INTO filters(c1,c2) VALUES(?,?) ''' ,
2018-03-02 11:07:03 +01:00
(
( key , value , ) for key , value in filters . iteritems ( )
)
)
2018-03-10 18:32:45 +01:00
if numberOfGrantsUploaded ( user_id , request_arguments [ ' concepts ' ] ) != 0 :
2018-11-28 17:55:22 +01:00
# cursor.execute('''VAR 'currprofile' VALUES(?)''', ('users_files/p{0}.tsv'.format(user_id),))
cursor . execute ( ''' INSERT INTO grants SELECT stripchars(c1) as c1, stripchars(c2) as c2 FROM (file ' users_files/p {0} .tsv ' ) ''' . format ( user_id ) )
2018-02-28 12:30:50 +01:00
cursor . close ( )
data = { }
data [ ' data ' ] = 1
self . write ( json . dumps ( data ) )
self . flush ( )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class SaveProfileToDatabaseHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' POST, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/json ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def post ( self ) :
try :
2018-03-10 18:32:45 +01:00
# get user id from body. Must have
request_arguments = json . loads ( self . request . body )
if ' user ' not in request_arguments or request_arguments [ ' user ' ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id argument " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = request_arguments [ ' user ' ] [ : 128 ]
2018-02-28 12:30:50 +01:00
# get data
2018-11-28 17:55:22 +01:00
profile_id = request_arguments [ ' id ' ] [ : 128 ]
profile_name = request_arguments [ ' name ' ] [ : 128 ]
doc_name = request_arguments [ ' docname ' ] [ : 128 ]
2018-03-10 18:32:45 +01:00
docs_number = request_arguments [ ' docsnumber ' ]
2018-02-28 12:30:50 +01:00
# copy profile file to a unique user profile file
2018-03-13 16:02:55 +01:00
profile_file_name = " users_files/OAMiningProfile_ {0} .oamp " . format ( user_id )
2018-02-28 12:30:50 +01:00
# check if profile has already an id
old_profile = True
if profile_id == ' ' :
# get unique profile id
profile_id = getNewProfileId ( )
old_profile = False
2018-03-13 16:02:55 +01:00
unique_profile_file_name = " users_files/OAMiningProfile_ {0} _ {1} .oamp " . format ( user_id , profile_id )
2018-02-28 12:30:50 +01:00
copyfile ( profile_file_name , unique_profile_file_name )
# copy profile docs to unique profile docs
if doc_name != ' ' and docs_number != 0 :
2018-03-13 16:02:55 +01:00
docs_file_name = " users_files/docs {0} .json " . format ( user_id )
unique_docs_file_name = " users_files/OAMiningDocs_ {0} _ {1} .json " . format ( user_id , profile_id )
2018-02-28 12:30:50 +01:00
copyfile ( docs_file_name , unique_docs_file_name )
# write new profile to database
import sys
sys . path . append ( msettings . MADIS_PATH )
import madis
# database file name
2018-03-13 16:02:55 +01:00
database_file_name = " users_files/OAMiningProfilesDatabase_ {0} .db " . format ( user_id )
2018-02-28 12:30:50 +01:00
# get the database cursor
cursor = madis . functions . Connection ( database_file_name ) . cursor ( )
user_profiles = [ ]
if old_profile :
2019-05-29 16:02:54 +02:00
cursor . execute ( ''' UPDATE database SET datecreated=?, status=?, matches=?, docname=?, docsnumber=?, notified=? WHERE id=? ''' , ( datetime . date . today ( ) . strftime ( " % B %d % Y " ) , " Processing " , " 8/8 " , doc_name , docs_number , 0 , profile_id ) , parse = False )
2018-02-28 12:30:50 +01:00
else :
2019-07-02 11:22:09 +02:00
cursor . execute ( ''' INSERT INTO database VALUES(?,?,?,?,?,?,?,?) ''' , ( profile_id , profile_name , datetime . date . today ( ) . strftime ( " % B %d % Y " ) , " Processing " , " 8/8 " , doc_name , docs_number , 0 , ) , parse = False )
2018-02-28 12:30:50 +01:00
cursor . close ( )
self . write ( json . dumps ( { } ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
class DownloadProfileHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
2018-03-10 18:32:45 +01:00
self . set_header ( " Access-Control-Allow-Origin " , " * " )
2018-02-28 12:30:50 +01:00
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' POST, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/oamp ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def post ( self ) :
try :
2018-03-10 18:32:45 +01:00
# get user id from body. Must have
request_arguments = json . loads ( self . request . body )
if ' user ' not in request_arguments or request_arguments [ ' user ' ] == ' ' :
2018-02-28 12:30:50 +01:00
self . set_status ( 400 )
2018-03-10 18:32:45 +01:00
self . write ( " Missing user ' s id argument " )
2018-02-28 12:30:50 +01:00
return
2018-11-28 17:55:22 +01:00
user_id = request_arguments [ ' user ' ] [ : 128 ]
profile_id = request_arguments [ ' id ' ] [ : 128 ]
2018-03-13 16:02:55 +01:00
unique_profile_file_name = " users_files/OAMiningProfile_ {0} _ {1} .oamp " . format ( user_id , profile_id )
2018-02-28 12:30:50 +01:00
buf_size = 4096
self . set_header ( ' Content-Type ' , ' application/octet-stream ' )
self . set_header ( ' Content-Disposition ' , ' attachment; filename= ' + " OAMiningProfile_ {0} _ {1} .oamp " . format ( user_id , profile_id ) )
self . flush ( )
with open ( unique_profile_file_name , ' r ' ) as f :
while True :
data = f . read ( buf_size )
if not data :
break
self . write ( data )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
2019-05-29 16:02:54 +02:00
class NotifyHandler ( BaseHandler ) :
passwordless = True
def set_default_headers ( self ) :
self . set_header ( " Access-Control-Allow-Origin " , " * " )
self . set_header ( " Access-Control-Allow-Headers " , " Origin, X-Requested-With, Content-Type, Accept " )
self . set_header ( ' Access-Control-Allow-Methods ' , ' POST, OPTIONS ' )
self . set_header ( ' Access-Control-Allow-Credentials ' , ' true ' )
self . set_header ( ' Content-Type ' , ' application/oamp ' )
def options ( self ) :
# no body
self . set_status ( 204 )
self . finish ( )
def post ( self ) :
try :
# get user id from body. Must have
request_arguments = json . loads ( self . request . body )
if ' user ' not in request_arguments or request_arguments [ ' user ' ] == ' ' :
self . set_status ( 400 )
self . write ( " Missing user ' s id argument " )
return
community = request_arguments [ ' community ' ] [ : 128 ]
user_id = request_arguments [ ' user ' ] [ : 128 ]
# get data
profile_id = request_arguments [ ' id ' ] [ : 128 ]
# Import smtplib for the actual sending function
import smtplib
subject = ' New Profile update of Community: {} on profile: {} ' . format ( community , profile_id )
text = ' Hello our great mining team experts of OpenAIRE, \n \n A new profile update of Community {} \n on profile named: {} ' . format ( community , profile_id )
message = ' Subject: {} \n \n {} ' . format ( subject , text )
# Send the message via our own SMTP server.
s = smtplib . SMTP ( msettings . SMTP_HOST , msettings . SMTP_PORT )
s . ehlo ( )
s . starttls ( )
s . ehlo ( )
s . login ( msettings . SMTP_USERNAME , msettings . SMTP_PASSWORD )
2019-05-30 10:39:35 +02:00
s . sendmail ( msettings . SMTP_FROM , msettings . SMTP_TO , message )
2019-05-29 16:02:54 +02:00
s . quit ( )
# write new profile to database
import sys
sys . path . append ( msettings . MADIS_PATH )
import madis
# database file name
database_file_name = " users_files/OAMiningProfilesDatabase_ {0} .db " . format ( user_id )
# get the database cursor
cursor = madis . functions . Connection ( database_file_name ) . cursor ( )
user_profiles = [ ]
cursor . execute ( ''' UPDATE database SET notified=1 WHERE id=? ''' , ( profile_id , ) , parse = False )
cursor . close ( )
self . write ( json . dumps ( { } ) )
self . finish ( )
except Exception as ints :
self . set_status ( 400 )
self . write ( " A server error occurred, please contact administrator! " )
self . finish ( )
print ints
return
2018-02-28 12:30:50 +01:00
def main ( ) :
def getqtext ( query , params ) :
query = query . strip ( ' \n \ s ' )
query = escape . xhtml_escape ( query )
for i in params :
i = i . replace ( ' ' , ' _ ' )
query = re . sub ( ' : ' + i , ' <b><i> ' + escape . xhtml_escape ( i ) + ' </i></b> ' , query )
query = re . sub ( ' $ ' + i , ' <b><i> ' + escape . xhtml_escape ( i ) + ' </i></b> ' , query )
query = re . sub ( ' @ ' + i , ' <b><i> ' + escape . xhtml_escape ( i ) + ' </i></b> ' , query )
return query . replace ( " \n " , " <br/> " )
tornado . options . parse_command_line ( )
if not msettings . DEBUG :
sockets = tornado . netutil . bind_sockets ( options . port )
tornado . process . fork_processes ( 0 )
server = tornado . httpserver . HTTPServer ( Application ( ) )
# ssl_options = {
#"certfile": os.path.join("/home/openaire/ssl/certificate.crt"),
#"keyfile": os.path.join("/home/openaire/ssl/privateKey.key"),
#})
server . add_sockets ( sockets )
tornado . ioloop . IOLoop . instance ( ) . start ( )
else :
# debug case
http_server = tornado . httpserver . HTTPServer ( Application ( ) )
http_server . bind ( options . port )
http_server . start ( 1 )
tornado . ioloop . IOLoop . instance ( ) . start ( )
if __name__ == " __main__ " :
main ( )