import sys ##import functions from functions_cluster import * from matching_cluster import * from create_input_cluster import * import json dix_org = load_json('dictionaries/dix_acad.json') dix_mult = load_json('dictionaries/dix_mult.json') dix_city = load_json('dictionaries/dix_city.json') dix_country = load_json('dictionaries/dix_country.json') dix_org_oaire = load_json('dictionaries/dix_acad_oaire.json') dix_mult_oaire = load_json('dictionaries/dix_mult_oaire.json') dix_country_oaire = load_json('dictionaries/dix_country_oaire.json') dix_status = load_json('dictionaries/dix_status.json') def find_ror(input, simU, simG): result = Aff_Ids(input, dix_org, dix_mult, dix_city, dix_country, simU, simG) result_oaire = Aff_Ids(input, dix_org_oaire, dix_mult_oaire, dix_country_oaire, dix_country_oaire, simU, simG) results_upd = [] for r in result: if dix_status[r[2]][0] == 'active': results_upd.append([r[1], 'ROR', r[2], 'active']) else: if dix_status[r[2]][1] == '': results_upd.append([r[1], 'ROR', r[2], dix_status[r[2]][0]]) else: results_upd.append([r[1], 'ROR', r[2], dix_status[r[2]][0]]) results_upd.append([r[1], 'ROR', dix_status[r[2]][1], 'active']) for r in result_oaire: results_upd.append([r[1],'OpenOrgs', r[2], None]) if len(results_upd)>0: result_dict = [{'Provenance': 'AffRo', 'PID':'ROR', 'Value':x[2], 'Confidence':x[0], 'Status':x[3]} if x[1] == 'ROR' else {'Provenance': 'AffRo', 'PID':'OpenOrgs', 'Value':x[2], 'Confidence':x[0], 'Status': 'active'} for x in results_upd] else: result_dict = [] return result_dict def affro(raw_aff_string): try: result = find_ror(create_df_algorithm(raw_aff_string), 0.65, 0.82) return result except Exception as e: # Return some indication of an error, or log the row print(f"Error: {str(e)}") print(raw_aff_string) pass if __name__ == "__main__": if len(sys.argv) != 2: print("Usage: python affro_spark.py ") sys.exit(1) string_arg = sys.argv[1] # float_arg1 = float(sys.argv[2]) # float_arg2 = float(sys.argv[3]) print(affro(string_arg))