updates -openorgs
This commit is contained in:
parent
415b45e3ca
commit
ba98a16bcb
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -9,19 +9,49 @@ dix_org = load_json('dictionaries/dix_acad.json')
|
|||
dix_mult = load_json('dictionaries/dix_mult.json')
|
||||
dix_city = load_json('dictionaries/dix_city.json')
|
||||
dix_country = load_json('dictionaries/dix_country.json')
|
||||
dix_org_oaire = load_json('dictionaries/dix_acad_oaire.json')
|
||||
dix_mult_oaire = load_json('dictionaries/dix_mult_oaire.json')
|
||||
dix_country_oaire = load_json('dictionaries/dix_country_oaire.json')
|
||||
dix_status = load_json('dictionaries/dix_status.json')
|
||||
|
||||
|
||||
def find_ror(input, simU, simG):
|
||||
result = Aff_Ids(input, dix_org, dix_mult, dix_city, dix_country, simU, simG)
|
||||
result_oaire = Aff_Ids(input, dix_org_oaire, dix_mult_oaire, dix_country_oaire, dix_country_oaire, simU, simG)
|
||||
|
||||
results_upd = []
|
||||
|
||||
for r in result:
|
||||
|
||||
if dix_status[r[2]][0] == 'active':
|
||||
results_upd.append([r[1], 'ROR', r[2], 'active'])
|
||||
else:
|
||||
if dix_status[r[2]][1] == '':
|
||||
results_upd.append([r[1], 'ROR', r[2], dix_status[r[2]][0]])
|
||||
else:
|
||||
|
||||
results_upd.append([r[1], 'ROR', r[2], dix_status[r[2]][0]])
|
||||
|
||||
results_upd.append([r[1], 'ROR', dix_status[r[2]][1], 'active'])
|
||||
|
||||
for r in result_oaire:
|
||||
results_upd.append([r[1],'OpenOrgs', r[2], None])
|
||||
|
||||
if len(results_upd)>0:
|
||||
result_dict = [{'Provenance': 'AffRo', 'PID':'ROR', 'Value':x[2], 'Confidence':x[0], 'Status':x[3]} if x[1] == 'ROR' else {'Provenance': 'AffRo', 'PID':'OpenOrgs', 'Value':x[2], 'Confidence':x[0], 'Status': 'active'} for x in results_upd]
|
||||
|
||||
else:
|
||||
result_dict = []
|
||||
|
||||
|
||||
return result_dict
|
||||
|
||||
|
||||
def affro(raw_aff_string):
|
||||
try:
|
||||
result = Aff_Ids(create_df_algorithm(raw_aff_string), dix_org, dix_mult, dix_city, dix_country, 0.65, 0.82)
|
||||
if len(result)>0:
|
||||
# result_dict = [json.dumps({'Origin': 'AffRo', 'PID':x[2], 'Confidence':x[1], 'Status':x[3]}) for x in result]
|
||||
result_dict = [{'Provenance': 'AffRo', 'PID':x[2], 'Value':x[3], 'Confidence':x[1], 'Status':x[4]} for x in result]
|
||||
|
||||
else:
|
||||
result_dict = []
|
||||
|
||||
return result_dict
|
||||
result = find_ror(create_df_algorithm(raw_aff_string), 0.65, 0.82)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
# Return some indication of an error, or log the row
|
||||
print(f"Error: {str(e)}")
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -181,44 +181,45 @@ protected_phrases1 = [
|
|||
|
||||
|
||||
|
||||
replacements = {'saint' : 'st',
|
||||
replacements = {'czechoslovak':'czech',
|
||||
'saint' : 'st',
|
||||
'aghia' : 'agia',
|
||||
'universitatsklinikum' : 'universi hospital',
|
||||
'universitetshospital' : 'universi hospital',
|
||||
'universitatskinderklinik' : 'universi childrens hospital',
|
||||
'universitatskliniken': 'universi hospital',
|
||||
'Universitätsklinik': 'universi hospital',
|
||||
'universitatsmedizin': 'universi medicine',
|
||||
'universitatskliniken' : 'universi hospital',
|
||||
'Universitätsklinik' : 'universi hospital',
|
||||
'universitatsmedizin' : 'universi medicine',
|
||||
'universitatsbibliothek' : 'universi library',
|
||||
'nat.':'national',
|
||||
'uni versity':'university',
|
||||
'unive rsity': 'university',
|
||||
'univ ersity': 'university',
|
||||
'inst ':'institute ',
|
||||
'adv ':'advanced ',
|
||||
'univ ':'university ',
|
||||
'stud ': 'studies ',
|
||||
'inst.':'institute',
|
||||
'adv.':'advanced',
|
||||
'univ.':'university',
|
||||
'stud.': 'studies',
|
||||
'univercity':'university',
|
||||
'univerisity':'university',
|
||||
'universtiy':'university',
|
||||
'univeristy':'university',
|
||||
'universirty':'university',
|
||||
'universiti':'university',
|
||||
'universitiy':'university',
|
||||
'universty' :'university',
|
||||
'nat.' : 'national',
|
||||
'uni versity' : 'university',
|
||||
'unive rsity' : 'university',
|
||||
'univ ersity' : 'university',
|
||||
'inst ' : 'institute ',
|
||||
'adv ' : 'advanced ',
|
||||
'univ ' : 'university ',
|
||||
'stud ' : 'studies ',
|
||||
'inst.' : 'institute',
|
||||
'adv.' : 'advanced',
|
||||
'univ.' : 'university',
|
||||
'stud.' : 'studies',
|
||||
'univercity' : 'university',
|
||||
'univerisity' : 'university',
|
||||
'universtiy' : 'university',
|
||||
'univeristy' : 'university',
|
||||
'universirty' : 'university',
|
||||
'universiti' : 'university',
|
||||
'universitiy' : 'university',
|
||||
'universty' : 'university',
|
||||
'techniche' : 'technological',
|
||||
'univ col': 'university colege',
|
||||
'univ. col.': 'university colege',
|
||||
'univ. coll.': 'university colege',
|
||||
'col.':'colege',
|
||||
'univ col' : 'university colege',
|
||||
'univ. col.' : 'university colege',
|
||||
'univ. coll.' : 'university colege',
|
||||
'col.' : 'colege',
|
||||
'hipokration' : 'hipocration',
|
||||
'belfield, dublin': 'dublin',
|
||||
'balsbridge, dublin': 'dublin', #ballsbridge
|
||||
'earlsfort terrace, dublin': 'dublin',
|
||||
'belfield, dublin' : 'dublin',
|
||||
'balsbridge, dublin' : 'dublin', #ballsbridge
|
||||
'earlsfort terrace, dublin' : 'dublin',
|
||||
'bon secours hospital, cork' : 'bon secours hospital cork',
|
||||
'bon secours hospital, dublin' : 'bon secours hospital dublin',
|
||||
'bon secours hospital, galway' : 'bon secours hospital galway',
|
||||
|
@ -231,7 +232,7 @@ replacements = {'saint' : 'st',
|
|||
'royal holoway, university london' : 'royal holoway universi london', #holloway
|
||||
'city, university london' : 'city universi london',
|
||||
'city university, london' : 'city universi london',
|
||||
'aeginition':'eginition',
|
||||
'aeginition' : 'eginition',
|
||||
'national technical university, athens' : 'national technical university athens'
|
||||
# 'harvard medical school' : 'harvard university'
|
||||
|
||||
|
|
|
@ -9,8 +9,7 @@ from sklearn.metrics.pairwise import cosine_similarity
|
|||
from functions_cluster import *
|
||||
from create_input_cluster import *
|
||||
|
||||
with open('dix_status.json', 'rb') as f:
|
||||
dix_status = json.load(f)
|
||||
|
||||
|
||||
specific = [k for k in categ_dicts if categ_dicts[k] == 'Specific']
|
||||
|
||||
|
@ -334,19 +333,19 @@ def Aff_Ids(input, dix_org, dix_mult, dix_city_ror, dix_country_ror, simU, simG)
|
|||
|
||||
|
||||
results = [[x[0],x[1], ids[i]] for i,x in enumerate(best)]
|
||||
results_upd = []
|
||||
# results_upd = []
|
||||
|
||||
for r in results:
|
||||
if 'ror.org' in r[2]:
|
||||
if dix_status[r[2]][0] == 'active':
|
||||
results_upd.append([r[0],r[1], 'ROR', r[2], 'active'])
|
||||
else:
|
||||
if dix_status[r[2]][1] == '':
|
||||
results_upd.append([r[0],r[1], 'ROR', r[2], dix_status[r[2]][0]])
|
||||
else:
|
||||
# for r in results:
|
||||
# if 'ror.org' in r[2]:
|
||||
# if dix_status[r[2]][0] == 'active':
|
||||
# results_upd.append([r[0],r[1], 'ROR', r[2], 'active'])
|
||||
# else:
|
||||
# if dix_status[r[2]][1] == '':
|
||||
# results_upd.append([r[0],r[1], 'ROR', r[2], dix_status[r[2]][0]])
|
||||
# else:
|
||||
|
||||
results_upd.append([r[0],r[1], 'ROR', r[2], dix_status[r[2]][0]])
|
||||
# results_upd.append([r[0],r[1], 'ROR', r[2], dix_status[r[2]][0]])
|
||||
|
||||
results_upd.append([r[0],r[1], 'ROR', dix_status[r[2]][1], 'active'])
|
||||
# results_upd.append([r[0],r[1], 'ROR', dix_status[r[2]][1], 'active'])
|
||||
|
||||
return results_upd
|
||||
return results
|
Loading…
Reference in New Issue