DHP-Explorer/execute_notebook.py

128 lines
4.4 KiB
Python

import os
import argparse
import glob
from pathlib import Path
import re
import subprocess
regex = r"<\w*>(.*)<\/\w*>"
def extract_argument(path):
arguments = {}
with open(path) as f:
for line in f:
if not line.startswith("#"):
s = line.strip().split("=")
if len(s) ==2:
arguments[s[0].strip()] = s[1].strip()
return arguments
def get_jar_path():
for current_path in glob.glob("target/*.jar"):
return current_path
def extract_value(line):
matches = re.finditer(regex, line, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
for groupNum in range(0, len(match.groups())):
return match.group(groupNum + 1)
def extract_dependencies():
with open("pom.xml") as f:
check_dependency = False
dep_to_add = []
for line in f:
if "<!-- JAR NEED -->" in line:
check_dependency = True
current_deps = {}
elif check_dependency:
if "groupId" in line:
current_deps["groupId"] = extract_value(line)
elif "artifactId" in line:
current_deps["artifactId"] = extract_value(line)
elif "version" in line:
current_deps["version"] = extract_value(line)
print("version", extract_value(line))
elif "</dependency>" in line:
dep_to_add.append(current_deps)
check_dependency = False
return dep_to_add
def extracting_class_args(args):
d = []
skip =["user_name", "reference_class"]
for item in args:
if item not in skip:
d.append(f"-{item}")
d.append(args[item])
return " ".join(d)
def to_path(dependency):
home_dir = Path.home()
base_path = os.path.join(home_dir, ".m2/repository/")
group_path = os.path.join(base_path, dependency["groupId"].replace(".", "/"))
data_path = os.path.join(group_path, dependency["artifactId"])
version_path = os.path.join(data_path, dependency["version"])
current_path = os.path.join(version_path, "{}-{}.jar".format(dependency["artifactId"], dependency["version"]))
if os.path.exists(current_path):
return "{}-{}.jar".format(dependency["artifactId"], dependency["version"]), current_path
return None,None
if __name__ == "__main__":
deps = extract_dependencies()
parser = argparse.ArgumentParser(
description="This scripts help you to publish, execute your check script in scala ")
parser.add_argument("path")
python_args = parser.parse_args()
if os.path.exists(python_args.path):
script_argument = extract_argument(python_args.path)
other_arguments = []
print("Cleaning Compile application ")
os.system('mvn clean compile package')
main_jar_path = get_jar_path()
if main_jar_path is None:
raise Exception("Unable to find the jar")
print("copy on your root folder")
os.system("ssh {}@iis-cdh5-test-gw.ocean.icm.edu.pl rm -rf sandro_nb".format(script_argument['user_name']))
os.system("ssh {}@iis-cdh5-test-gw.ocean.icm.edu.pl mkdir sandro_nb".format(script_argument['user_name']))
os.system(f"scp {main_jar_path} {script_argument['user_name']}@iis-cdh5-test-gw.ocean.icm.edu.pl:sandro_nb/")
jars = []
for item in deps:
name, p = to_path(item)
if p:
print(f"Copying dependencies {p} to lib deps")
os.system(f"scp {p} {script_argument['user_name']}@iis-cdh5-test-gw.ocean.icm.edu.pl:sandro_nb/")
jars.append(name)
j_name = ",".join(["sandro_nb/"+ item for item in jars])
name = main_jar_path.replace("target/", "")
jar_section = ""
if len(jars) > 0:
jar_section =f"--jars {j_name}"
class_args = extracting_class_args(script_argument)
command = f"spark2-submit --master yarn {jar_section} --executor-memory 4G --class {script_argument['reference_class']} --conf \"spark.sql.shuffle.partitions=10000\" sandro_nb/{name} {class_args}"
print(f"executing command {command}")
os.system("ssh {}@iis-cdh5-test-gw.ocean.icm.edu.pl {} ".format(script_argument['user_name'], command, ))
else:
raise Exception(f"path not found {python_args.path}")