updates -openorgs

This commit is contained in:
mkallipo 2024-11-21 12:39:26 +01:00
parent 415b45e3ca
commit ba98a16bcb
13 changed files with 92 additions and 59 deletions

View File

@ -9,19 +9,49 @@ dix_org = load_json('dictionaries/dix_acad.json')
dix_mult = load_json('dictionaries/dix_mult.json') dix_mult = load_json('dictionaries/dix_mult.json')
dix_city = load_json('dictionaries/dix_city.json') dix_city = load_json('dictionaries/dix_city.json')
dix_country = load_json('dictionaries/dix_country.json') dix_country = load_json('dictionaries/dix_country.json')
dix_org_oaire = load_json('dictionaries/dix_acad_oaire.json')
dix_mult_oaire = load_json('dictionaries/dix_mult_oaire.json')
dix_country_oaire = load_json('dictionaries/dix_country_oaire.json')
dix_status = load_json('dictionaries/dix_status.json')
def find_ror(input, simU, simG):
result = Aff_Ids(input, dix_org, dix_mult, dix_city, dix_country, simU, simG)
result_oaire = Aff_Ids(input, dix_org_oaire, dix_mult_oaire, dix_country_oaire, dix_country_oaire, simU, simG)
results_upd = []
for r in result:
if dix_status[r[2]][0] == 'active':
results_upd.append([r[1], 'ROR', r[2], 'active'])
else:
if dix_status[r[2]][1] == '':
results_upd.append([r[1], 'ROR', r[2], dix_status[r[2]][0]])
else:
results_upd.append([r[1], 'ROR', r[2], dix_status[r[2]][0]])
results_upd.append([r[1], 'ROR', dix_status[r[2]][1], 'active'])
for r in result_oaire:
results_upd.append([r[1],'OpenOrgs', r[2], None])
if len(results_upd)>0:
result_dict = [{'Provenance': 'AffRo', 'PID':'ROR', 'Value':x[2], 'Confidence':x[0], 'Status':x[3]} if x[1] == 'ROR' else {'Provenance': 'AffRo', 'PID':'OpenOrgs', 'Value':x[2], 'Confidence':x[0], 'Status': 'active'} for x in results_upd]
else:
result_dict = []
return result_dict
def affro(raw_aff_string): def affro(raw_aff_string):
try: try:
result = Aff_Ids(create_df_algorithm(raw_aff_string), dix_org, dix_mult, dix_city, dix_country, 0.65, 0.82) result = find_ror(create_df_algorithm(raw_aff_string), 0.65, 0.82)
if len(result)>0:
# result_dict = [json.dumps({'Origin': 'AffRo', 'PID':x[2], 'Confidence':x[1], 'Status':x[3]}) for x in result] return result
result_dict = [{'Provenance': 'AffRo', 'PID':x[2], 'Value':x[3], 'Confidence':x[1], 'Status':x[4]} for x in result]
else:
result_dict = []
return result_dict
except Exception as e: except Exception as e:
# Return some indication of an error, or log the row # Return some indication of an error, or log the row
print(f"Error: {str(e)}") print(f"Error: {str(e)}")

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -181,44 +181,45 @@ protected_phrases1 = [
replacements = {'saint' : 'st', replacements = {'czechoslovak':'czech',
'saint' : 'st',
'aghia' : 'agia', 'aghia' : 'agia',
'universitatsklinikum' : 'universi hospital', 'universitatsklinikum' : 'universi hospital',
'universitetshospital' : 'universi hospital', 'universitetshospital' : 'universi hospital',
'universitatskinderklinik' : 'universi childrens hospital', 'universitatskinderklinik' : 'universi childrens hospital',
'universitatskliniken': 'universi hospital', 'universitatskliniken' : 'universi hospital',
'Universitätsklinik': 'universi hospital', 'Universitätsklinik' : 'universi hospital',
'universitatsmedizin': 'universi medicine', 'universitatsmedizin' : 'universi medicine',
'universitatsbibliothek' : 'universi library', 'universitatsbibliothek' : 'universi library',
'nat.':'national', 'nat.' : 'national',
'uni versity':'university', 'uni versity' : 'university',
'unive rsity': 'university', 'unive rsity' : 'university',
'univ ersity': 'university', 'univ ersity' : 'university',
'inst ':'institute ', 'inst ' : 'institute ',
'adv ':'advanced ', 'adv ' : 'advanced ',
'univ ':'university ', 'univ ' : 'university ',
'stud ': 'studies ', 'stud ' : 'studies ',
'inst.':'institute', 'inst.' : 'institute',
'adv.':'advanced', 'adv.' : 'advanced',
'univ.':'university', 'univ.' : 'university',
'stud.': 'studies', 'stud.' : 'studies',
'univercity':'university', 'univercity' : 'university',
'univerisity':'university', 'univerisity' : 'university',
'universtiy':'university', 'universtiy' : 'university',
'univeristy':'university', 'univeristy' : 'university',
'universirty':'university', 'universirty' : 'university',
'universiti':'university', 'universiti' : 'university',
'universitiy':'university', 'universitiy' : 'university',
'universty' :'university', 'universty' : 'university',
'techniche' : 'technological', 'techniche' : 'technological',
'univ col': 'university colege', 'univ col' : 'university colege',
'univ. col.': 'university colege', 'univ. col.' : 'university colege',
'univ. coll.': 'university colege', 'univ. coll.' : 'university colege',
'col.':'colege', 'col.' : 'colege',
'hipokration' : 'hipocration', 'hipokration' : 'hipocration',
'belfield, dublin': 'dublin', 'belfield, dublin' : 'dublin',
'balsbridge, dublin': 'dublin', #ballsbridge 'balsbridge, dublin' : 'dublin', #ballsbridge
'earlsfort terrace, dublin': 'dublin', 'earlsfort terrace, dublin' : 'dublin',
'bon secours hospital, cork' : 'bon secours hospital cork', 'bon secours hospital, cork' : 'bon secours hospital cork',
'bon secours hospital, dublin' : 'bon secours hospital dublin', 'bon secours hospital, dublin' : 'bon secours hospital dublin',
'bon secours hospital, galway' : 'bon secours hospital galway', 'bon secours hospital, galway' : 'bon secours hospital galway',
@ -231,7 +232,7 @@ replacements = {'saint' : 'st',
'royal holoway, university london' : 'royal holoway universi london', #holloway 'royal holoway, university london' : 'royal holoway universi london', #holloway
'city, university london' : 'city universi london', 'city, university london' : 'city universi london',
'city university, london' : 'city universi london', 'city university, london' : 'city universi london',
'aeginition':'eginition', 'aeginition' : 'eginition',
'national technical university, athens' : 'national technical university athens' 'national technical university, athens' : 'national technical university athens'
# 'harvard medical school' : 'harvard university' # 'harvard medical school' : 'harvard university'

View File

@ -9,8 +9,7 @@ from sklearn.metrics.pairwise import cosine_similarity
from functions_cluster import * from functions_cluster import *
from create_input_cluster import * from create_input_cluster import *
with open('dix_status.json', 'rb') as f:
dix_status = json.load(f)
specific = [k for k in categ_dicts if categ_dicts[k] == 'Specific'] specific = [k for k in categ_dicts if categ_dicts[k] == 'Specific']
@ -334,19 +333,19 @@ def Aff_Ids(input, dix_org, dix_mult, dix_city_ror, dix_country_ror, simU, simG)
results = [[x[0],x[1], ids[i]] for i,x in enumerate(best)] results = [[x[0],x[1], ids[i]] for i,x in enumerate(best)]
results_upd = [] # results_upd = []
for r in results: # for r in results:
if 'ror.org' in r[2]: # if 'ror.org' in r[2]:
if dix_status[r[2]][0] == 'active': # if dix_status[r[2]][0] == 'active':
results_upd.append([r[0],r[1], 'ROR', r[2], 'active']) # results_upd.append([r[0],r[1], 'ROR', r[2], 'active'])
else: # else:
if dix_status[r[2]][1] == '': # if dix_status[r[2]][1] == '':
results_upd.append([r[0],r[1], 'ROR', r[2], dix_status[r[2]][0]]) # results_upd.append([r[0],r[1], 'ROR', r[2], dix_status[r[2]][0]])
else: # else:
results_upd.append([r[0],r[1], 'ROR', r[2], dix_status[r[2]][0]]) # results_upd.append([r[0],r[1], 'ROR', r[2], dix_status[r[2]][0]])
results_upd.append([r[0],r[1], 'ROR', dix_status[r[2]][1], 'active']) # results_upd.append([r[0],r[1], 'ROR', dix_status[r[2]][1], 'active'])
return results_upd return results