42 lines
1.3 KiB
Python
42 lines
1.3 KiB
Python
import sys
|
|
##import functions
|
|
from functions_cluster import *
|
|
from matching_cluster import *
|
|
from create_input_cluster import *
|
|
import json
|
|
|
|
dix_org = load_json('dictionaries/dix_acad.json')
|
|
dix_mult = load_json('dictionaries/dix_mult.json')
|
|
dix_city = load_json('dictionaries/dix_city.json')
|
|
dix_country = load_json('dictionaries/dix_country.json')
|
|
|
|
|
|
def affro(raw_aff_string):
|
|
try:
|
|
result = Aff_Ids(create_df_algorithm(raw_aff_string), dix_org, dix_mult, dix_city, dix_country, 0.65, 0.82)
|
|
if len(result)>0:
|
|
# result_dict = [json.dumps({'Origin': 'AffRo', 'PID':x[2], 'Confidence':x[1], 'Status':x[3]}) for x in result]
|
|
result_dict = [{'Provenance': 'AffRo', 'PID':x[2], 'Value':x[3], 'Confidence':x[1], 'Status':x[4]} for x in result]
|
|
|
|
else:
|
|
result_dict = []
|
|
|
|
return result_dict
|
|
except Exception as e:
|
|
# Return some indication of an error, or log the row
|
|
print(f"Error: {str(e)}")
|
|
print(raw_aff_string)
|
|
pass
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 2:
|
|
print("Usage: python affro_spark.py <string> <float1> <float2>")
|
|
sys.exit(1)
|
|
|
|
string_arg = sys.argv[1]
|
|
# float_arg1 = float(sys.argv[2])
|
|
# float_arg2 = float(sys.argv[3])
|
|
|
|
print(affro(string_arg))
|