Merge pull request 'redirection of non active ror ids' (#1) from main into openaire-workflow-ready
Reviewed-on: #1
This commit is contained in:
commit
b5d32cb730
|
@ -15,7 +15,7 @@ def affro(raw_aff_string):
|
||||||
try:
|
try:
|
||||||
result = Aff_Ids(create_df_algorithm(raw_aff_string), dix_org, dix_mult, dix_city, dix_country, 0.65, 0.82)
|
result = Aff_Ids(create_df_algorithm(raw_aff_string), dix_org, dix_mult, dix_city, dix_country, 0.65, 0.82)
|
||||||
if len(result)>0:
|
if len(result)>0:
|
||||||
result_dict = [json.dumps({'Origin': 'affRo', 'RORid':x[2], 'Confidence':x[1]}) for x in result]
|
result_dict = [json.dumps({'Origin': 'affRo', 'RORid':x[2], 'Confidence':x[1], 'Status':x[3]}) for x in result]
|
||||||
else:
|
else:
|
||||||
result_dict = []
|
result_dict = []
|
||||||
|
|
||||||
|
@ -25,7 +25,6 @@ def affro(raw_aff_string):
|
||||||
print(f"Error: {str(e)}")
|
print(f"Error: {str(e)}")
|
||||||
print(raw_aff_string)
|
print(raw_aff_string)
|
||||||
pass
|
pass
|
||||||
#raw_aff = 'university of california, los angeles, university of athens, university of california, san diego, university of athens, greece'
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -9,6 +9,9 @@ from sklearn.metrics.pairwise import cosine_similarity
|
||||||
from functions_cluster import *
|
from functions_cluster import *
|
||||||
from create_input_cluster import *
|
from create_input_cluster import *
|
||||||
|
|
||||||
|
with open('/Users/myrto/Documents/openAIRE/3. ror/dictionaries/dix_status.json', 'rb') as f:
|
||||||
|
dix_status = json.load(f)
|
||||||
|
|
||||||
def best_sim_score(light_raw, candidate_num, pairs_list, m, simU, simG):
|
def best_sim_score(light_raw, candidate_num, pairs_list, m, simU, simG):
|
||||||
"""
|
"""
|
||||||
Finds the best match between a 'key word' and several legal names from the OpenAIRE database.
|
Finds the best match between a 'key word' and several legal names from the OpenAIRE database.
|
||||||
|
@ -259,39 +262,37 @@ def Aff_Ids(input, dix_org, dix_mult, dix_city_ror, dix_country_ror, simU, simG)
|
||||||
# id_list = []
|
# id_list = []
|
||||||
if dix_mult[x] != 'unique':
|
if dix_mult[x] != 'unique':
|
||||||
if x in list(dix_city_ror.keys()):
|
if x in list(dix_city_ror.keys()):
|
||||||
match_found0 = False
|
|
||||||
match_found = False
|
match_found = False
|
||||||
|
|
||||||
for city in dix_city_ror[x]:
|
for city in dix_city_ror[x]:
|
||||||
if city[0] in light_aff:
|
if city[0] in light_aff:
|
||||||
if city[0] not in x:
|
if city[0] not in x:
|
||||||
ids[i] = city[1]
|
ids[i] = city[1]
|
||||||
|
|
||||||
match_found0 = True
|
|
||||||
match_found = True
|
match_found = True
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
|
if light_aff.count(city[0]) >1:
|
||||||
|
ids[i] = city[1]
|
||||||
|
match_found = True
|
||||||
|
break
|
||||||
|
|
||||||
if not match_found:
|
if not match_found:
|
||||||
for city in dix_city_ror[x]:
|
for city in dix_city_ror[x]:
|
||||||
if city[0] in light_aff and city[0] not in x:
|
if city[0] in light_aff and city[0] not in x:
|
||||||
ids[i] = city[1]
|
ids[i] = city[1]
|
||||||
match_found0 = True
|
|
||||||
print('ok')
|
|
||||||
break
|
break
|
||||||
|
|
||||||
if not match_found:
|
if not match_found:
|
||||||
match_found2 = False
|
|
||||||
match_found3 = False
|
match_found3 = False
|
||||||
|
|
||||||
for country in dix_country_ror[x]:
|
for country in dix_country_ror[x]:
|
||||||
if country[0] == 'united states' and (country[0] in light_aff or 'usa' in light_aff):
|
if country[0] == 'united states' and (country[0] in light_aff or 'usa' in light_aff):
|
||||||
ids[i] = country[1]
|
ids[i] = country[1]
|
||||||
match_found2 = True
|
|
||||||
match_found3 = True
|
match_found3 = True
|
||||||
break
|
break
|
||||||
|
|
||||||
if country[0] == 'united kingdom' and (country[0] in light_aff or 'uk' in light_aff):
|
if country[0] == 'united kingdom' and (country[0] in light_aff or 'uk' in light_aff):
|
||||||
ids[i] = country[1]
|
ids[i] = country[1]
|
||||||
match_found2 = True
|
|
||||||
match_found3 = True
|
match_found3 = True
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -299,7 +300,6 @@ def Aff_Ids(input, dix_org, dix_mult, dix_city_ror, dix_country_ror, simU, simG)
|
||||||
|
|
||||||
if country[0] not in x:
|
if country[0] not in x:
|
||||||
ids[i] = country[1]
|
ids[i] = country[1]
|
||||||
match_found2 = True
|
|
||||||
match_found3 = True
|
match_found3 = True
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -307,13 +307,25 @@ def Aff_Ids(input, dix_org, dix_mult, dix_city_ror, dix_country_ror, simU, simG)
|
||||||
for country in dix_country_ror[x]:
|
for country in dix_country_ror[x]:
|
||||||
if country[0] in light_aff and country[0] in x:
|
if country[0] in light_aff and country[0] in x:
|
||||||
ids[i] = country[1]
|
ids[i] = country[1]
|
||||||
match_found2 = True
|
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
results = [[x[0],x[1], ids[i]] for i,x in enumerate(best)]
|
results = [[x[0],x[1], ids[i]] for i,x in enumerate(best)]
|
||||||
|
results_upd = []
|
||||||
|
|
||||||
return results #[[result[to_check[i]] for i in ready] + [to_check[2]], best[0]]
|
for r in results:
|
||||||
|
if dix_status[r[2]][0] == 'active':
|
||||||
|
r.append('active')
|
||||||
|
results_upd.append(r)
|
||||||
|
else:
|
||||||
|
if dix_status[r[2]][1] == '':
|
||||||
|
r.append(dix_status[r[2]][0])
|
||||||
|
results_upd.append(r)
|
||||||
|
else:
|
||||||
|
r.append(dix_status[r[2]][0])
|
||||||
|
results_upd.append(r)
|
||||||
|
results_upd.append([r[0],r[1], dix_status[r[2]][1], 'active'])
|
||||||
|
|
||||||
|
|
||||||
|
return results_upd
|
Loading…
Reference in New Issue