updates -openorgs

This commit is contained in:
mkallipo 2024-11-21 12:39:26 +01:00
parent 415b45e3ca
commit ba98a16bcb
13 changed files with 92 additions and 59 deletions

View File

@ -9,19 +9,49 @@ dix_org = load_json('dictionaries/dix_acad.json')
dix_mult = load_json('dictionaries/dix_mult.json')
dix_city = load_json('dictionaries/dix_city.json')
dix_country = load_json('dictionaries/dix_country.json')
dix_org_oaire = load_json('dictionaries/dix_acad_oaire.json')
dix_mult_oaire = load_json('dictionaries/dix_mult_oaire.json')
dix_country_oaire = load_json('dictionaries/dix_country_oaire.json')
dix_status = load_json('dictionaries/dix_status.json')
def find_ror(input, simU, simG):
result = Aff_Ids(input, dix_org, dix_mult, dix_city, dix_country, simU, simG)
result_oaire = Aff_Ids(input, dix_org_oaire, dix_mult_oaire, dix_country_oaire, dix_country_oaire, simU, simG)
results_upd = []
for r in result:
if dix_status[r[2]][0] == 'active':
results_upd.append([r[1], 'ROR', r[2], 'active'])
else:
if dix_status[r[2]][1] == '':
results_upd.append([r[1], 'ROR', r[2], dix_status[r[2]][0]])
else:
results_upd.append([r[1], 'ROR', r[2], dix_status[r[2]][0]])
results_upd.append([r[1], 'ROR', dix_status[r[2]][1], 'active'])
for r in result_oaire:
results_upd.append([r[1],'OpenOrgs', r[2], None])
if len(results_upd)>0:
result_dict = [{'Provenance': 'AffRo', 'PID':'ROR', 'Value':x[2], 'Confidence':x[0], 'Status':x[3]} if x[1] == 'ROR' else {'Provenance': 'AffRo', 'PID':'OpenOrgs', 'Value':x[2], 'Confidence':x[0], 'Status': 'active'} for x in results_upd]
else:
result_dict = []
return result_dict
def affro(raw_aff_string):
try:
result = Aff_Ids(create_df_algorithm(raw_aff_string), dix_org, dix_mult, dix_city, dix_country, 0.65, 0.82)
if len(result)>0:
# result_dict = [json.dumps({'Origin': 'AffRo', 'PID':x[2], 'Confidence':x[1], 'Status':x[3]}) for x in result]
result_dict = [{'Provenance': 'AffRo', 'PID':x[2], 'Value':x[3], 'Confidence':x[1], 'Status':x[4]} for x in result]
else:
result_dict = []
return result_dict
result = find_ror(create_df_algorithm(raw_aff_string), 0.65, 0.82)
return result
except Exception as e:
# Return some indication of an error, or log the row
print(f"Error: {str(e)}")

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -181,44 +181,45 @@ protected_phrases1 = [
replacements = {'saint' : 'st',
replacements = {'czechoslovak':'czech',
'saint' : 'st',
'aghia' : 'agia',
'universitatsklinikum' : 'universi hospital',
'universitetshospital' : 'universi hospital',
'universitatskinderklinik' : 'universi childrens hospital',
'universitatskliniken': 'universi hospital',
'Universitätsklinik': 'universi hospital',
'universitatsmedizin': 'universi medicine',
'universitatskliniken' : 'universi hospital',
'Universitätsklinik' : 'universi hospital',
'universitatsmedizin' : 'universi medicine',
'universitatsbibliothek' : 'universi library',
'nat.':'national',
'uni versity':'university',
'unive rsity': 'university',
'univ ersity': 'university',
'inst ':'institute ',
'adv ':'advanced ',
'univ ':'university ',
'stud ': 'studies ',
'inst.':'institute',
'adv.':'advanced',
'univ.':'university',
'stud.': 'studies',
'univercity':'university',
'univerisity':'university',
'universtiy':'university',
'univeristy':'university',
'universirty':'university',
'universiti':'university',
'universitiy':'university',
'universty' :'university',
'nat.' : 'national',
'uni versity' : 'university',
'unive rsity' : 'university',
'univ ersity' : 'university',
'inst ' : 'institute ',
'adv ' : 'advanced ',
'univ ' : 'university ',
'stud ' : 'studies ',
'inst.' : 'institute',
'adv.' : 'advanced',
'univ.' : 'university',
'stud.' : 'studies',
'univercity' : 'university',
'univerisity' : 'university',
'universtiy' : 'university',
'univeristy' : 'university',
'universirty' : 'university',
'universiti' : 'university',
'universitiy' : 'university',
'universty' : 'university',
'techniche' : 'technological',
'univ col': 'university colege',
'univ. col.': 'university colege',
'univ. coll.': 'university colege',
'col.':'colege',
'univ col' : 'university colege',
'univ. col.' : 'university colege',
'univ. coll.' : 'university colege',
'col.' : 'colege',
'hipokration' : 'hipocration',
'belfield, dublin': 'dublin',
'balsbridge, dublin': 'dublin', #ballsbridge
'earlsfort terrace, dublin': 'dublin',
'belfield, dublin' : 'dublin',
'balsbridge, dublin' : 'dublin', #ballsbridge
'earlsfort terrace, dublin' : 'dublin',
'bon secours hospital, cork' : 'bon secours hospital cork',
'bon secours hospital, dublin' : 'bon secours hospital dublin',
'bon secours hospital, galway' : 'bon secours hospital galway',
@ -231,7 +232,7 @@ replacements = {'saint' : 'st',
'royal holoway, university london' : 'royal holoway universi london', #holloway
'city, university london' : 'city universi london',
'city university, london' : 'city universi london',
'aeginition':'eginition',
'aeginition' : 'eginition',
'national technical university, athens' : 'national technical university athens'
# 'harvard medical school' : 'harvard university'

View File

@ -9,8 +9,7 @@ from sklearn.metrics.pairwise import cosine_similarity
from functions_cluster import *
from create_input_cluster import *
with open('dix_status.json', 'rb') as f:
dix_status = json.load(f)
specific = [k for k in categ_dicts if categ_dicts[k] == 'Specific']
@ -334,19 +333,19 @@ def Aff_Ids(input, dix_org, dix_mult, dix_city_ror, dix_country_ror, simU, simG)
results = [[x[0],x[1], ids[i]] for i,x in enumerate(best)]
results_upd = []
# results_upd = []
for r in results:
if 'ror.org' in r[2]:
if dix_status[r[2]][0] == 'active':
results_upd.append([r[0],r[1], 'ROR', r[2], 'active'])
else:
if dix_status[r[2]][1] == '':
results_upd.append([r[0],r[1], 'ROR', r[2], dix_status[r[2]][0]])
else:
# for r in results:
# if 'ror.org' in r[2]:
# if dix_status[r[2]][0] == 'active':
# results_upd.append([r[0],r[1], 'ROR', r[2], 'active'])
# else:
# if dix_status[r[2]][1] == '':
# results_upd.append([r[0],r[1], 'ROR', r[2], dix_status[r[2]][0]])
# else:
results_upd.append([r[0],r[1], 'ROR', r[2], dix_status[r[2]][0]])
# results_upd.append([r[0],r[1], 'ROR', r[2], dix_status[r[2]][0]])
results_upd.append([r[0],r[1], 'ROR', dix_status[r[2]][1], 'active'])
# results_upd.append([r[0],r[1], 'ROR', dix_status[r[2]][1], 'active'])
return results_upd
return results