Skip to content

Commit

Permalink
Add support for HTML, CSS and Javascript in LocalCommandLineCodeExec…
Browse files Browse the repository at this point in the history
…utor with Mapping executor/saver microsoft#2303  (microsoft#2464)

* Add support for HTML, CSS and Javascript in LocalCommandLineCodeExecutor

* init branch

* init branch

* feat: test code execution added

* fix: test update

* fix: test

* fix: policy test

* feat: default policy

---------

Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
  • Loading branch information
asandez1 and ekzhu authored Apr 24, 2024
1 parent ebde196 commit 31fe75a
Show file tree
Hide file tree
Showing 3 changed files with 183 additions and 45 deletions.
87 changes: 54 additions & 33 deletions autogen/coding/local_commandline_code_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from hashlib import md5
from pathlib import Path
from string import Template
from typing import Any, Callable, ClassVar, List, TypeVar, Union, cast
from typing import Any, Callable, ClassVar, Dict, List, Optional, Union

from typing_extensions import ParamSpec

Expand All @@ -28,7 +28,31 @@


class LocalCommandLineCodeExecutor(CodeExecutor):
SUPPORTED_LANGUAGES: ClassVar[List[str]] = ["bash", "shell", "sh", "pwsh", "powershell", "ps1", "python"]
SUPPORTED_LANGUAGES: ClassVar[List[str]] = [
"bash",
"shell",
"sh",
"pwsh",
"powershell",
"ps1",
"python",
"javascript",
"html",
"css",
]
DEFAULT_EXECUTION_POLICY: ClassVar[Dict[str, bool]] = {
"bash": True,
"shell": True,
"sh": True,
"pwsh": True,
"powershell": True,
"ps1": True,
"python": True,
"javascript": False,
"html": False,
"css": False,
}

FUNCTION_PROMPT_TEMPLATE: ClassVar[
str
] = """You have access to the following user defined functions. They can be accessed from the module called `$module_name` by their function names.
Expand All @@ -43,29 +67,27 @@ def __init__(
work_dir: Union[Path, str] = Path("."),
functions: List[Union[FunctionWithRequirements[Any, A], Callable[..., Any], FunctionWithRequirementsStr]] = [],
functions_module: str = "functions",
execution_policies: Optional[Dict[str, bool]] = None,
):
"""(Experimental) A code executor class that executes code through a local command line
"""(Experimental) A code executor class that executes or saves LLM generated code a local command line
environment.
**This will execute LLM generated code on the local machine.**
**This will execute or save LLM generated code on the local machine.**
Each code block is saved as a file and executed in a separate process in
the working directory, and a unique file is generated and saved in the
working directory for each code block.
The code blocks are executed in the order they are received.
Command line code is sanitized using regular expression match against a list of dangerous commands in order to prevent self-destructive
commands from being executed which may potentially affect the users environment.
Currently the only supported languages is Python and shell scripts.
For Python code, use the language "python" for the code block.
For shell scripts, use the language "bash", "shell", or "sh" for the code
block.
Each code block is saved as a file in the working directory. Depending on the execution policy,
the code may be executed in a separate process.
The code blocks are executed or save in the order they are received.
Command line code is sanitized against a list of dangerous commands to prevent self-destructive commands from being executed,
which could potentially affect the user's environment. Supported languages include Python, shell scripts (bash, shell, sh),
PowerShell (pwsh, powershell, ps1), HTML, CSS, and JavaScript.
Execution policies determine whether each language's code blocks are executed or saved only.
Args:
timeout (int): The timeout for code execution. Default is 60.
work_dir (str): The working directory for the code execution. If None,
a default working directory will be used. The default working
directory is the current directory ".".
functions (List[Union[FunctionWithRequirements[Any, A], Callable[..., Any]]]): A list of functions that are available to the code executor. Default is an empty list.
timeout (int): The timeout for code execution, default is 60 seconds.
work_dir (Union[Path, str]): The working directory for code execution, defaults to the current directory.
functions (List[Union[FunctionWithRequirements[Any, A], Callable[..., Any], FunctionWithRequirementsStr]]): A list of callable functions available to the executor.
functions_module (str): The module name under which functions are accessible.
execution_policies (Optional[Dict[str, bool]]): A dictionary mapping languages to execution policies (True for execution, False for saving only). Defaults to class-wide DEFAULT_EXECUTION_POLICY.
"""

if timeout < 1:
Expand All @@ -91,6 +113,10 @@ def __init__(
else:
self._setup_functions_complete = True

self.execution_policies = self.DEFAULT_EXECUTION_POLICY.copy()
if execution_policies is not None:
self.execution_policies.update(execution_policies)

def format_functions_for_prompt(self, prompt_template: str = FUNCTION_PROMPT_TEMPLATE) -> str:
"""(Experimental) Format the functions for a prompt.
Expand All @@ -104,7 +130,6 @@ def format_functions_for_prompt(self, prompt_template: str = FUNCTION_PROMPT_TEM
Returns:
str: The formatted prompt.
"""

template = Template(prompt_template)
return template.substitute(
module_name=self._functions_module,
Expand Down Expand Up @@ -171,26 +196,19 @@ def _setup_functions(self) -> None:
required_packages = list(set(flattened_packages))
if len(required_packages) > 0:
logging.info("Ensuring packages are installed in executor.")

cmd = [sys.executable, "-m", "pip", "install"]
cmd.extend(required_packages)

cmd = [sys.executable, "-m", "pip", "install"] + required_packages
try:
result = subprocess.run(
cmd, cwd=self._work_dir, capture_output=True, text=True, timeout=float(self._timeout)
)
except subprocess.TimeoutExpired as e:
raise ValueError("Pip install timed out") from e

if result.returncode != 0:
raise ValueError(f"Pip install failed. {result.stdout}, {result.stderr}")

# Attempt to load the function file to check for syntax errors, imports etc.
exec_result = self._execute_code_dont_check_setup([CodeBlock(code=func_file_content, language="python")])

if exec_result.exit_code != 0:
raise ValueError(f"Functions failed to load: {exec_result.output}")

self._setup_functions_complete = True

def execute_code_blocks(self, code_blocks: List[CodeBlock]) -> CommandLineCodeResult:
Expand All @@ -201,10 +219,8 @@ def execute_code_blocks(self, code_blocks: List[CodeBlock]) -> CommandLineCodeRe
Returns:
CommandLineCodeResult: The result of the code execution."""

if not self._setup_functions_complete:
self._setup_functions()

return self._execute_code_dont_check_setup(code_blocks)

def _execute_code_dont_check_setup(self, code_blocks: List[CodeBlock]) -> CommandLineCodeResult:
Expand All @@ -229,6 +245,7 @@ def _execute_code_dont_check_setup(self, code_blocks: List[CodeBlock]) -> Comman
logs_all += "\n" + f"unknown language {lang}"
break

execute_code = self.execution_policies.get(lang, False)
try:
# Check if there is a filename comment
filename = _get_file_name_from_content(code, self._work_dir)
Expand All @@ -239,15 +256,19 @@ def _execute_code_dont_check_setup(self, code_blocks: List[CodeBlock]) -> Comman
# create a file with an automatically generated name
code_hash = md5(code.encode()).hexdigest()
filename = f"tmp_code_{code_hash}.{'py' if lang.startswith('python') else lang}"

written_file = (self._work_dir / filename).resolve()
with written_file.open("w", encoding="utf-8") as f:
f.write(code)
file_names.append(written_file)

program = sys.executable if lang.startswith("python") else _cmd(lang)
cmd = [program, str(written_file.absolute())]
if not execute_code:
# Just return a message that the file is saved.
logs_all += f"Code saved to {str(written_file)}\n"
exitcode = 0
continue

program = _cmd(lang)
cmd = [program, str(written_file.absolute())]
try:
result = subprocess.run(
cmd, cwd=self._work_dir, capture_output=True, text=True, timeout=float(self._timeout)
Expand Down
32 changes: 20 additions & 12 deletions autogen/coding/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,31 @@
from pathlib import Path
from typing import Optional

filename_patterns = [
re.compile(r"^<!-- (filename:)?(.+?) -->", re.DOTALL),
re.compile(r"^/\* (filename:)?(.+?) \*/", re.DOTALL),
re.compile(r"^// (filename:)?(.+?)$", re.DOTALL),
re.compile(r"^# (filename:)?(.+?)$", re.DOTALL),
]


# Raises ValueError if the file is not in the workspace
def _get_file_name_from_content(code: str, workspace_path: Path) -> Optional[str]:
first_line = code.split("\n")[0]
first_line = code.split("\n")[0].strip()
# TODO - support other languages
if first_line.startswith("# filename:"):
filename = first_line.split(":")[1].strip()

# Handle relative paths in the filename
path = Path(filename)
if not path.is_absolute():
path = workspace_path / path
path = path.resolve()
# Throws an error if the file is not in the workspace
relative = path.relative_to(workspace_path.resolve())
return str(relative)
for pattern in filename_patterns:
matches = pattern.match(first_line)
if matches is not None:
filename = matches.group(2).strip()

# Handle relative paths in the filename
path = Path(filename)
if not path.is_absolute():
path = workspace_path / path
path = path.resolve()
# Throws an error if the file is not in the workspace
relative = path.relative_to(workspace_path.resolve())
return str(relative)
return None


Expand Down
109 changes: 109 additions & 0 deletions test/coding/test_commandline_code_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,34 @@
PYTHON_VARIANTS = ["python", "Python", "py"]


@pytest.mark.parametrize(
"lang, should_execute",
[
("python", False), # Python should not execute
("bash", False), # Bash should execute
("html", False), # HTML should not execute
("javascript", False), # JavaScript should not execute
],
)
def test_execution_policy_enforcement(lang, should_execute):
with tempfile.TemporaryDirectory() as temp_dir:
executor = LocalCommandLineCodeExecutor(
work_dir=temp_dir,
execution_policies={"python": False, "bash": False, "html": False, "javascript": False, "css": False},
)
code = "print('Hello, world!')" if lang == "python" else "echo 'Hello, world!'"
code_block = CodeBlock(code=code, language=lang)
result = executor.execute_code_blocks([code_block])

if should_execute:
assert "Hello, world!" in result.output, f"Expected execution for {lang}, but it didn't execute."
else:
assert "Hello, world!" not in result.output, f"Expected no execution for {lang}, but it executed."

# Ensure files are saved regardless of execution
assert result.code_file is not None, f"Expected code file to be saved for {lang}, but it wasn't."


@pytest.mark.parametrize("cls", classes_to_test)
def test_is_code_executor(cls) -> None:
assert isinstance(cls, CodeExecutor)
Expand Down Expand Up @@ -114,6 +142,87 @@ def _test_execute_code(py_variant, executor: CodeExecutor) -> None:
assert file_line.strip() == code_line.strip()


def test_local_commandline_code_executor_save_files() -> None:
with tempfile.TemporaryDirectory() as temp_dir:
executor = LocalCommandLineCodeExecutor(work_dir=temp_dir)
_test_save_files(executor, save_file_only=False)


def test_local_commandline_code_executor_save_files_only() -> None:
with tempfile.TemporaryDirectory() as temp_dir:
# Using execution_policies to specify that no languages should execute
executor = LocalCommandLineCodeExecutor(
work_dir=temp_dir,
execution_policies={"python": False, "bash": False, "javascript": False, "html": False, "css": False},
)
_test_save_files(executor, save_file_only=True)


def _test_save_files(executor: CodeExecutor, save_file_only: bool) -> None:

def _check_output(code_result: CodeBlock, expected_output: str) -> None:
if save_file_only:
return expected_output not in code_result.output
else:
return expected_output in code_result.output

# Test executable code block.

# Test saving to a given filename, Python.
code_blocks = [CodeBlock(code="# filename: test.py\nimport sys; print('hello world!')", language="python")]
code_result = executor.execute_code_blocks(code_blocks)
assert (
code_result.exit_code == 0 and _check_output(code_result, "hello world!") and code_result.code_file is not None
)
assert os.path.basename(code_result.code_file) == "test.py"

# Test saving to a given filename without "filename" prefix, Python.
code_blocks = [CodeBlock(code="# test.py\nimport sys; print('hello world!')", language="python")]
code_result = executor.execute_code_blocks(code_blocks)
assert (
code_result.exit_code == 0 and _check_output(code_result, "hello world!") and code_result.code_file is not None
)
assert os.path.basename(code_result.code_file) == "test.py"

# Test non-executable code block.

# Test saving to a given filename, Javascript.
code_blocks = [CodeBlock(code="// filename: test.js\nconsole.log('hello world!')", language="javascript")]
code_result = executor.execute_code_blocks(code_blocks)
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
assert os.path.basename(code_result.code_file) == "test.js"

# Test saving to a given filename without "filename" prefix, Javascript.
code_blocks = [CodeBlock(code="// test.js\nconsole.log('hello world!')", language="javascript")]
code_result = executor.execute_code_blocks(code_blocks)
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
assert os.path.basename(code_result.code_file) == "test.js"

# Test saving to a given filename, CSS.
code_blocks = [CodeBlock(code="/* filename: test.css */\nh1 { color: red; }", language="css")]
code_result = executor.execute_code_blocks(code_blocks)
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
assert os.path.basename(code_result.code_file) == "test.css"

# Test saving to a given filename without "filename" prefix, CSS.
code_blocks = [CodeBlock(code="/* test.css */\nh1 { color: red; }", language="css")]
code_result = executor.execute_code_blocks(code_blocks)
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
assert os.path.basename(code_result.code_file) == "test.css"

# Test saving to a given filename, HTML.
code_blocks = [CodeBlock(code="<!-- filename: test.html -->\n<h1>hello world!</h1>", language="html")]
code_result = executor.execute_code_blocks(code_blocks)
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
assert os.path.basename(code_result.code_file) == "test.html"

# Test saving to a given filename without "filename" prefix, HTML.
code_blocks = [CodeBlock(code="<!-- test.html -->\n<h1>hello world!</h1>", language="html")]
code_result = executor.execute_code_blocks(code_blocks)
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
assert os.path.basename(code_result.code_file) == "test.html"


@pytest.mark.parametrize("cls", classes_to_test)
def test_commandline_code_executor_timeout(cls) -> None:
with tempfile.TemporaryDirectory() as temp_dir:
Expand Down

0 comments on commit 31fe75a

Please sign in to comment.