import os import argparse import glob from pathlib import Path import re import subprocess regex = r"<\w*>(.*)<\/\w*>" def extract_argument(path): arguments = {} with open(path) as f: for line in f: if not line.startswith("#"): s = line.strip().split("=") if len(s) ==2: arguments[s[0].strip()] = s[1].strip() return arguments def get_jar_path(): for current_path in glob.glob("target/*.jar"): return current_path def extract_value(line): matches = re.finditer(regex, line, re.MULTILINE) for matchNum, match in enumerate(matches, start=1): for groupNum in range(0, len(match.groups())): return match.group(groupNum + 1) def extract_dependencies(): with open("pom.xml") as f: check_dependency = False dep_to_add = [] for line in f: if "" in line: check_dependency = True current_deps = {} elif check_dependency: if "groupId" in line: current_deps["groupId"] = extract_value(line) elif "artifactId" in line: current_deps["artifactId"] = extract_value(line) elif "version" in line: current_deps["version"] = extract_value(line) print("version", extract_value(line)) elif "" in line: dep_to_add.append(current_deps) check_dependency = False return dep_to_add def extracting_class_args(args): d = [] skip =["user_name", "reference_class"] for item in args: if item not in skip: d.append(f"-{item}") d.append(args[item]) return " ".join(d) def to_path(dependency): home_dir = Path.home() base_path = os.path.join(home_dir, ".m2/repository/") group_path = os.path.join(base_path, dependency["groupId"].replace(".", "/")) data_path = os.path.join(group_path, dependency["artifactId"]) version_path = os.path.join(data_path, dependency["version"]) current_path = os.path.join(version_path, "{}-{}.jar".format(dependency["artifactId"], dependency["version"])) if os.path.exists(current_path): return "{}-{}.jar".format(dependency["artifactId"], dependency["version"]), current_path return None,None if __name__ == "__main__": deps = extract_dependencies() parser = argparse.ArgumentParser( description="This scripts help you to publish, execute your check script in scala ") parser.add_argument("path") python_args = parser.parse_args() if os.path.exists(python_args.path): script_argument = extract_argument(python_args.path) other_arguments = [] print("Cleaning Compile application ") os.system('mvn clean compile package') main_jar_path = get_jar_path() if main_jar_path is None: raise Exception("Unable to find the jar") print("copy on your root folder") os.system("ssh {}@iis-cdh5-test-gw.ocean.icm.edu.pl rm -rf sandro_nb".format(script_argument['user_name'])) os.system("ssh {}@iis-cdh5-test-gw.ocean.icm.edu.pl mkdir sandro_nb".format(script_argument['user_name'])) os.system(f"scp {main_jar_path} {script_argument['user_name']}@iis-cdh5-test-gw.ocean.icm.edu.pl:sandro_nb/") jars = [] for item in deps: name, p = to_path(item) if p: print(f"Copying dependencies {p} to lib deps") os.system(f"scp {p} {script_argument['user_name']}@iis-cdh5-test-gw.ocean.icm.edu.pl:sandro_nb/") jars.append(name) j_name = ",".join(["sandro_nb/"+ item for item in jars]) name = main_jar_path.replace("target/", "") jar_section = "" if len(jars) > 0: jar_section =f"--jars {j_name}" class_args = extracting_class_args(script_argument) command = f"spark2-submit --master yarn {jar_section} --executor-memory 4G --class {script_argument['reference_class']} --conf \"spark.sql.shuffle.partitions=10000\" sandro_nb/{name} {class_args}" print(f"executing command {command}") os.system("ssh {}@iis-cdh5-test-gw.ocean.icm.edu.pl {} ".format(script_argument['user_name'], command, )) else: raise Exception(f"path not found {python_args.path}")