affRo/affro_cluster.py

72 lines
2.3 KiB
Python
Raw Normal View History

2024-09-05 12:23:32 +02:00
import sys
##import functions
from functions_cluster import *
from matching_cluster import *
from create_input_cluster import *
import json
dix_org = load_json('dictionaries/dix_acad.json')
dix_mult = load_json('dictionaries/dix_mult.json')
dix_city = load_json('dictionaries/dix_city.json')
dix_country = load_json('dictionaries/dix_country.json')
2024-11-21 12:39:26 +01:00
dix_org_oaire = load_json('dictionaries/dix_acad_oaire.json')
dix_mult_oaire = load_json('dictionaries/dix_mult_oaire.json')
dix_country_oaire = load_json('dictionaries/dix_country_oaire.json')
dix_status = load_json('dictionaries/dix_status.json')
2024-09-05 12:23:32 +02:00
2024-11-21 12:39:26 +01:00
def find_ror(input, simU, simG):
result = Aff_Ids(input, dix_org, dix_mult, dix_city, dix_country, simU, simG)
result_oaire = Aff_Ids(input, dix_org_oaire, dix_mult_oaire, dix_country_oaire, dix_country_oaire, simU, simG)
results_upd = []
2024-10-07 11:25:16 +02:00
2024-11-21 12:39:26 +01:00
for r in result:
if dix_status[r[2]][0] == 'active':
results_upd.append([r[1], 'ROR', r[2], 'active'])
2024-09-05 12:23:32 +02:00
else:
2024-11-21 12:39:26 +01:00
if dix_status[r[2]][1] == '':
results_upd.append([r[1], 'ROR', r[2], dix_status[r[2]][0]])
else:
results_upd.append([r[1], 'ROR', r[2], dix_status[r[2]][0]])
results_upd.append([r[1], 'ROR', dix_status[r[2]][1], 'active'])
for r in result_oaire:
results_upd.append([r[1],'OpenOrgs', r[2], None])
if len(results_upd)>0:
result_dict = [{'Provenance': 'AffRo', 'PID':'ROR', 'Value':x[2], 'Confidence':x[0], 'Status':x[3]} if x[1] == 'ROR' else {'Provenance': 'AffRo', 'PID':'OpenOrgs', 'Value':x[2], 'Confidence':x[0], 'Status': 'active'} for x in results_upd]
2024-09-05 12:23:32 +02:00
2024-11-21 12:39:26 +01:00
else:
result_dict = []
return result_dict
def affro(raw_aff_string):
try:
result = find_ror(create_df_algorithm(raw_aff_string), 0.65, 0.82)
return result
2024-09-05 12:23:32 +02:00
except Exception as e:
# Return some indication of an error, or log the row
print(f"Error: {str(e)}")
print(raw_aff_string)
pass
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python affro_spark.py <string> <float1> <float2>")
sys.exit(1)
string_arg = sys.argv[1]
# float_arg1 = float(sys.argv[2])
# float_arg2 = float(sys.argv[3])
print(affro(string_arg))