affRo/affro_cluster.py

import sys
##import functions
from functions_cluster import *
from matching_cluster import *
from create_input_cluster import *
import json

dix_org = load_json('dictionaries/dix_acad.json')
dix_mult = load_json('dictionaries/dix_mult.json')
dix_city = load_json('dictionaries/dix_city.json')
dix_country = load_json('dictionaries/dix_country.json')


def affro(raw_aff_string):
    try:
        result = Aff_Ids(create_df_algorithm(raw_aff_string), dix_org, dix_mult, dix_city, dix_country,  0.65, 0.82)
        if len(result)>0:
            result_dict =  [json.dumps({'Origin': 'affRo', 'RORid':x[2], 'Confidence':x[1], 'Status':x[3]}) for x in result]
        else:
            result_dict =  []

        return result_dict
    except Exception as e:
        # Return some indication of an error, or log the row
        print(f"Error: {str(e)}")
        print(raw_aff_string)
        pass


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python affro_spark.py <string> <float1> <float2>")
        sys.exit(1)

    string_arg = sys.argv[1]
   # float_arg1 = float(sys.argv[2])
   # float_arg2 = float(sys.argv[3])

    print(affro(string_arg))