updates -openorgs
This commit is contained in:
parent
415b45e3ca
commit
ba98a16bcb
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -9,19 +9,49 @@ dix_org = load_json('dictionaries/dix_acad.json')
|
||||||
dix_mult = load_json('dictionaries/dix_mult.json')
|
dix_mult = load_json('dictionaries/dix_mult.json')
|
||||||
dix_city = load_json('dictionaries/dix_city.json')
|
dix_city = load_json('dictionaries/dix_city.json')
|
||||||
dix_country = load_json('dictionaries/dix_country.json')
|
dix_country = load_json('dictionaries/dix_country.json')
|
||||||
|
dix_org_oaire = load_json('dictionaries/dix_acad_oaire.json')
|
||||||
|
dix_mult_oaire = load_json('dictionaries/dix_mult_oaire.json')
|
||||||
|
dix_country_oaire = load_json('dictionaries/dix_country_oaire.json')
|
||||||
|
dix_status = load_json('dictionaries/dix_status.json')
|
||||||
|
|
||||||
|
|
||||||
def affro(raw_aff_string):
|
def find_ror(input, simU, simG):
|
||||||
try:
|
result = Aff_Ids(input, dix_org, dix_mult, dix_city, dix_country, simU, simG)
|
||||||
result = Aff_Ids(create_df_algorithm(raw_aff_string), dix_org, dix_mult, dix_city, dix_country, 0.65, 0.82)
|
result_oaire = Aff_Ids(input, dix_org_oaire, dix_mult_oaire, dix_country_oaire, dix_country_oaire, simU, simG)
|
||||||
if len(result)>0:
|
|
||||||
# result_dict = [json.dumps({'Origin': 'AffRo', 'PID':x[2], 'Confidence':x[1], 'Status':x[3]}) for x in result]
|
results_upd = []
|
||||||
result_dict = [{'Provenance': 'AffRo', 'PID':x[2], 'Value':x[3], 'Confidence':x[1], 'Status':x[4]} for x in result]
|
|
||||||
|
for r in result:
|
||||||
|
|
||||||
|
if dix_status[r[2]][0] == 'active':
|
||||||
|
results_upd.append([r[1], 'ROR', r[2], 'active'])
|
||||||
|
else:
|
||||||
|
if dix_status[r[2]][1] == '':
|
||||||
|
results_upd.append([r[1], 'ROR', r[2], dix_status[r[2]][0]])
|
||||||
|
else:
|
||||||
|
|
||||||
|
results_upd.append([r[1], 'ROR', r[2], dix_status[r[2]][0]])
|
||||||
|
|
||||||
|
results_upd.append([r[1], 'ROR', dix_status[r[2]][1], 'active'])
|
||||||
|
|
||||||
|
for r in result_oaire:
|
||||||
|
results_upd.append([r[1],'OpenOrgs', r[2], None])
|
||||||
|
|
||||||
|
if len(results_upd)>0:
|
||||||
|
result_dict = [{'Provenance': 'AffRo', 'PID':'ROR', 'Value':x[2], 'Confidence':x[0], 'Status':x[3]} if x[1] == 'ROR' else {'Provenance': 'AffRo', 'PID':'OpenOrgs', 'Value':x[2], 'Confidence':x[0], 'Status': 'active'} for x in results_upd]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
result_dict = []
|
result_dict = []
|
||||||
|
|
||||||
|
|
||||||
return result_dict
|
return result_dict
|
||||||
|
|
||||||
|
|
||||||
|
def affro(raw_aff_string):
|
||||||
|
try:
|
||||||
|
result = find_ror(create_df_algorithm(raw_aff_string), 0.65, 0.82)
|
||||||
|
|
||||||
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Return some indication of an error, or log the row
|
# Return some indication of an error, or log the row
|
||||||
print(f"Error: {str(e)}")
|
print(f"Error: {str(e)}")
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -181,44 +181,45 @@ protected_phrases1 = [
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
replacements = {'saint' : 'st',
|
replacements = {'czechoslovak':'czech',
|
||||||
|
'saint' : 'st',
|
||||||
'aghia' : 'agia',
|
'aghia' : 'agia',
|
||||||
'universitatsklinikum' : 'universi hospital',
|
'universitatsklinikum' : 'universi hospital',
|
||||||
'universitetshospital' : 'universi hospital',
|
'universitetshospital' : 'universi hospital',
|
||||||
'universitatskinderklinik' : 'universi childrens hospital',
|
'universitatskinderklinik' : 'universi childrens hospital',
|
||||||
'universitatskliniken': 'universi hospital',
|
'universitatskliniken' : 'universi hospital',
|
||||||
'Universitätsklinik': 'universi hospital',
|
'Universitätsklinik' : 'universi hospital',
|
||||||
'universitatsmedizin': 'universi medicine',
|
'universitatsmedizin' : 'universi medicine',
|
||||||
'universitatsbibliothek' : 'universi library',
|
'universitatsbibliothek' : 'universi library',
|
||||||
'nat.':'national',
|
'nat.' : 'national',
|
||||||
'uni versity':'university',
|
'uni versity' : 'university',
|
||||||
'unive rsity': 'university',
|
'unive rsity' : 'university',
|
||||||
'univ ersity': 'university',
|
'univ ersity' : 'university',
|
||||||
'inst ':'institute ',
|
'inst ' : 'institute ',
|
||||||
'adv ':'advanced ',
|
'adv ' : 'advanced ',
|
||||||
'univ ':'university ',
|
'univ ' : 'university ',
|
||||||
'stud ': 'studies ',
|
'stud ' : 'studies ',
|
||||||
'inst.':'institute',
|
'inst.' : 'institute',
|
||||||
'adv.':'advanced',
|
'adv.' : 'advanced',
|
||||||
'univ.':'university',
|
'univ.' : 'university',
|
||||||
'stud.': 'studies',
|
'stud.' : 'studies',
|
||||||
'univercity':'university',
|
'univercity' : 'university',
|
||||||
'univerisity':'university',
|
'univerisity' : 'university',
|
||||||
'universtiy':'university',
|
'universtiy' : 'university',
|
||||||
'univeristy':'university',
|
'univeristy' : 'university',
|
||||||
'universirty':'university',
|
'universirty' : 'university',
|
||||||
'universiti':'university',
|
'universiti' : 'university',
|
||||||
'universitiy':'university',
|
'universitiy' : 'university',
|
||||||
'universty' :'university',
|
'universty' : 'university',
|
||||||
'techniche' : 'technological',
|
'techniche' : 'technological',
|
||||||
'univ col': 'university colege',
|
'univ col' : 'university colege',
|
||||||
'univ. col.': 'university colege',
|
'univ. col.' : 'university colege',
|
||||||
'univ. coll.': 'university colege',
|
'univ. coll.' : 'university colege',
|
||||||
'col.':'colege',
|
'col.' : 'colege',
|
||||||
'hipokration' : 'hipocration',
|
'hipokration' : 'hipocration',
|
||||||
'belfield, dublin': 'dublin',
|
'belfield, dublin' : 'dublin',
|
||||||
'balsbridge, dublin': 'dublin', #ballsbridge
|
'balsbridge, dublin' : 'dublin', #ballsbridge
|
||||||
'earlsfort terrace, dublin': 'dublin',
|
'earlsfort terrace, dublin' : 'dublin',
|
||||||
'bon secours hospital, cork' : 'bon secours hospital cork',
|
'bon secours hospital, cork' : 'bon secours hospital cork',
|
||||||
'bon secours hospital, dublin' : 'bon secours hospital dublin',
|
'bon secours hospital, dublin' : 'bon secours hospital dublin',
|
||||||
'bon secours hospital, galway' : 'bon secours hospital galway',
|
'bon secours hospital, galway' : 'bon secours hospital galway',
|
||||||
|
@ -231,7 +232,7 @@ replacements = {'saint' : 'st',
|
||||||
'royal holoway, university london' : 'royal holoway universi london', #holloway
|
'royal holoway, university london' : 'royal holoway universi london', #holloway
|
||||||
'city, university london' : 'city universi london',
|
'city, university london' : 'city universi london',
|
||||||
'city university, london' : 'city universi london',
|
'city university, london' : 'city universi london',
|
||||||
'aeginition':'eginition',
|
'aeginition' : 'eginition',
|
||||||
'national technical university, athens' : 'national technical university athens'
|
'national technical university, athens' : 'national technical university athens'
|
||||||
# 'harvard medical school' : 'harvard university'
|
# 'harvard medical school' : 'harvard university'
|
||||||
|
|
||||||
|
|
|
@ -9,8 +9,7 @@ from sklearn.metrics.pairwise import cosine_similarity
|
||||||
from functions_cluster import *
|
from functions_cluster import *
|
||||||
from create_input_cluster import *
|
from create_input_cluster import *
|
||||||
|
|
||||||
with open('dix_status.json', 'rb') as f:
|
|
||||||
dix_status = json.load(f)
|
|
||||||
|
|
||||||
specific = [k for k in categ_dicts if categ_dicts[k] == 'Specific']
|
specific = [k for k in categ_dicts if categ_dicts[k] == 'Specific']
|
||||||
|
|
||||||
|
@ -334,19 +333,19 @@ def Aff_Ids(input, dix_org, dix_mult, dix_city_ror, dix_country_ror, simU, simG)
|
||||||
|
|
||||||
|
|
||||||
results = [[x[0],x[1], ids[i]] for i,x in enumerate(best)]
|
results = [[x[0],x[1], ids[i]] for i,x in enumerate(best)]
|
||||||
results_upd = []
|
# results_upd = []
|
||||||
|
|
||||||
for r in results:
|
# for r in results:
|
||||||
if 'ror.org' in r[2]:
|
# if 'ror.org' in r[2]:
|
||||||
if dix_status[r[2]][0] == 'active':
|
# if dix_status[r[2]][0] == 'active':
|
||||||
results_upd.append([r[0],r[1], 'ROR', r[2], 'active'])
|
# results_upd.append([r[0],r[1], 'ROR', r[2], 'active'])
|
||||||
else:
|
# else:
|
||||||
if dix_status[r[2]][1] == '':
|
# if dix_status[r[2]][1] == '':
|
||||||
results_upd.append([r[0],r[1], 'ROR', r[2], dix_status[r[2]][0]])
|
# results_upd.append([r[0],r[1], 'ROR', r[2], dix_status[r[2]][0]])
|
||||||
else:
|
# else:
|
||||||
|
|
||||||
results_upd.append([r[0],r[1], 'ROR', r[2], dix_status[r[2]][0]])
|
# results_upd.append([r[0],r[1], 'ROR', r[2], dix_status[r[2]][0]])
|
||||||
|
|
||||||
results_upd.append([r[0],r[1], 'ROR', dix_status[r[2]][1], 'active'])
|
# results_upd.append([r[0],r[1], 'ROR', dix_status[r[2]][1], 'active'])
|
||||||
|
|
||||||
return results_upd
|
return results
|
Loading…
Reference in New Issue