Merge pull request 'redirection of non active ror ids' (#1) from main into openaire-workflow-ready
Reviewed-on: #1
This commit is contained in:
commit
b5d32cb730
|
@ -15,7 +15,7 @@ def affro(raw_aff_string):
|
|||
try:
|
||||
result = Aff_Ids(create_df_algorithm(raw_aff_string), dix_org, dix_mult, dix_city, dix_country, 0.65, 0.82)
|
||||
if len(result)>0:
|
||||
result_dict = [json.dumps({'Origin': 'affRo', 'RORid':x[2], 'Confidence':x[1]}) for x in result]
|
||||
result_dict = [json.dumps({'Origin': 'affRo', 'RORid':x[2], 'Confidence':x[1], 'Status':x[3]}) for x in result]
|
||||
else:
|
||||
result_dict = []
|
||||
|
||||
|
@ -25,7 +25,6 @@ def affro(raw_aff_string):
|
|||
print(f"Error: {str(e)}")
|
||||
print(raw_aff_string)
|
||||
pass
|
||||
#raw_aff = 'university of california, los angeles, university of athens, university of california, san diego, university of athens, greece'
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -9,6 +9,9 @@ from sklearn.metrics.pairwise import cosine_similarity
|
|||
from functions_cluster import *
|
||||
from create_input_cluster import *
|
||||
|
||||
with open('/Users/myrto/Documents/openAIRE/3. ror/dictionaries/dix_status.json', 'rb') as f:
|
||||
dix_status = json.load(f)
|
||||
|
||||
def best_sim_score(light_raw, candidate_num, pairs_list, m, simU, simG):
|
||||
"""
|
||||
Finds the best match between a 'key word' and several legal names from the OpenAIRE database.
|
||||
|
@ -259,39 +262,37 @@ def Aff_Ids(input, dix_org, dix_mult, dix_city_ror, dix_country_ror, simU, simG)
|
|||
# id_list = []
|
||||
if dix_mult[x] != 'unique':
|
||||
if x in list(dix_city_ror.keys()):
|
||||
match_found0 = False
|
||||
match_found = False
|
||||
|
||||
for city in dix_city_ror[x]:
|
||||
if city[0] in light_aff:
|
||||
if city[0] not in x:
|
||||
ids[i] = city[1]
|
||||
|
||||
match_found0 = True
|
||||
match_found = True
|
||||
break
|
||||
else:
|
||||
if light_aff.count(city[0]) >1:
|
||||
ids[i] = city[1]
|
||||
match_found = True
|
||||
break
|
||||
|
||||
if not match_found:
|
||||
for city in dix_city_ror[x]:
|
||||
if city[0] in light_aff and city[0] not in x:
|
||||
ids[i] = city[1]
|
||||
match_found0 = True
|
||||
print('ok')
|
||||
break
|
||||
|
||||
if not match_found:
|
||||
match_found2 = False
|
||||
match_found3 = False
|
||||
|
||||
for country in dix_country_ror[x]:
|
||||
if country[0] == 'united states' and (country[0] in light_aff or 'usa' in light_aff):
|
||||
ids[i] = country[1]
|
||||
match_found2 = True
|
||||
match_found3 = True
|
||||
break
|
||||
|
||||
if country[0] == 'united kingdom' and (country[0] in light_aff or 'uk' in light_aff):
|
||||
ids[i] = country[1]
|
||||
match_found2 = True
|
||||
match_found3 = True
|
||||
break
|
||||
|
||||
|
@ -299,7 +300,6 @@ def Aff_Ids(input, dix_org, dix_mult, dix_city_ror, dix_country_ror, simU, simG)
|
|||
|
||||
if country[0] not in x:
|
||||
ids[i] = country[1]
|
||||
match_found2 = True
|
||||
match_found3 = True
|
||||
break
|
||||
|
||||
|
@ -307,13 +307,25 @@ def Aff_Ids(input, dix_org, dix_mult, dix_city_ror, dix_country_ror, simU, simG)
|
|||
for country in dix_country_ror[x]:
|
||||
if country[0] in light_aff and country[0] in x:
|
||||
ids[i] = country[1]
|
||||
match_found2 = True
|
||||
break
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
results = [[x[0],x[1], ids[i]] for i,x in enumerate(best)]
|
||||
results_upd = []
|
||||
|
||||
return results #[[result[to_check[i]] for i in ready] + [to_check[2]], best[0]]
|
||||
for r in results:
|
||||
if dix_status[r[2]][0] == 'active':
|
||||
r.append('active')
|
||||
results_upd.append(r)
|
||||
else:
|
||||
if dix_status[r[2]][1] == '':
|
||||
r.append(dix_status[r[2]][0])
|
||||
results_upd.append(r)
|
||||
else:
|
||||
r.append(dix_status[r[2]][0])
|
||||
results_upd.append(r)
|
||||
results_upd.append([r[0],r[1], dix_status[r[2]][1], 'active'])
|
||||
|
||||
|
||||
return results_upd
|
Loading…
Reference in New Issue