From a1c5ec6f3bab99ba1a460d3ac4a785ca73f2c662 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Sun, 4 Jan 2026 10:27:41 +0900 Subject: [PATCH 1/2] pythonrun --- crates/vm/src/vm/compile.rs | 197 +-------------------------------- crates/vm/src/vm/mod.rs | 2 + crates/vm/src/vm/python_run.rs | 186 +++++++++++++++++++++++++++++++ src/lib.rs | 77 ++++++++++--- 4 files changed, 258 insertions(+), 204 deletions(-) create mode 100644 crates/vm/src/vm/python_run.rs diff --git a/crates/vm/src/vm/compile.rs b/crates/vm/src/vm/compile.rs index 07ab4b833d2..97f0f9e97b8 100644 --- a/crates/vm/src/vm/compile.rs +++ b/crates/vm/src/vm/compile.rs @@ -1,9 +1,11 @@ +//! Python code compilation functions. +//! +//! For code execution functions, see python_run.rs + use crate::{ - AsObject, PyObjectRef, PyRef, PyResult, VirtualMachine, - builtins::{PyCode, PyDictRef}, + PyRef, VirtualMachine, + builtins::PyCode, compiler::{self, CompileError, CompileOpts}, - convert::TryFromObject, - scope::Scope, }; impl VirtualMachine { @@ -25,191 +27,4 @@ impl VirtualMachine { ) -> Result, CompileError> { compiler::compile(source, mode, &source_path, opts).map(|code| self.ctx.new_code(code)) } - - // pymain_run_file_obj - pub fn run_script(&self, scope: Scope, path: &str) -> PyResult<()> { - // when pymain_run_module? - if get_importer(path, self)?.is_some() { - self.insert_sys_path(self.new_pyobj(path))?; - let runpy = self.import("runpy", 0)?; - let run_module_as_main = runpy.get_attr("_run_module_as_main", self)?; - run_module_as_main.call((identifier!(self, __main__).to_owned(), false), self)?; - return Ok(()); - } - - // TODO: check if this is proper place - if !self.state.config.settings.safe_path { - let dir = std::path::Path::new(path) - .parent() - .unwrap() - .to_str() - .unwrap(); - self.insert_sys_path(self.new_pyobj(dir))?; - } - - self.run_any_file(scope, path) - } - - // = _PyRun_AnyFileObject - fn run_any_file(&self, scope: Scope, path: &str) -> PyResult<()> { - let path = if path.is_empty() { "???" } else { path }; - - self.run_simple_file(scope, path) - } - - // = _PyRun_SimpleFileObject - fn run_simple_file(&self, scope: Scope, path: &str) -> PyResult<()> { - // __main__ is given by scope - let sys_modules = self.sys_module.get_attr(identifier!(self, modules), self)?; - let main_module = sys_modules.get_item(identifier!(self, __main__), self)?; - let module_dict = main_module.dict().expect("main module must have __dict__"); - if !module_dict.contains_key(identifier!(self, __file__), self) { - module_dict.set_item( - identifier!(self, __file__), - self.ctx.new_str(path).into(), - self, - )?; - module_dict.set_item(identifier!(self, __cached__), self.ctx.none(), self)?; - } - - // Consider to use enum to distinguish `path` - // https://github.com/RustPython/RustPython/pull/6276#discussion_r2529849479 - - let pyc = maybe_pyc_file(path); - if pyc { - // pyc file execution - set_main_loader(&module_dict, path, "SourcelessFileLoader", self)?; - let loader = module_dict.get_item("__loader__", self)?; - let get_code = loader.get_attr("get_code", self)?; - let code_obj = get_code.call((identifier!(self, __main__).to_owned(),), self)?; - let code = code_obj - .downcast::() - .map_err(|_| self.new_runtime_error("Bad code object in .pyc file".to_owned()))?; - self.run_code_obj(code, scope)?; - } else { - if path != "" { - set_main_loader(&module_dict, path, "SourceFileLoader", self)?; - } - // TODO: replace to something equivalent to py_run_file - match std::fs::read_to_string(path) { - Ok(source) => { - let code_obj = self - .compile(&source, compiler::Mode::Exec, path.to_owned()) - .map_err(|err| self.new_syntax_error(&err, Some(&source)))?; - // trace!("Code object: {:?}", code_obj.borrow()); - self.run_code_obj(code_obj, scope)?; - } - Err(err) => { - error!("Failed reading file '{path}': {err}"); - // TODO: Need to change to ExitCode or Termination - std::process::exit(1); - } - } - } - Ok(()) - } - - // TODO: deprecate or reimplement using other primitive functions - pub fn run_code_string(&self, scope: Scope, source: &str, source_path: String) -> PyResult { - let code_obj = self - .compile(source, compiler::Mode::Exec, source_path.clone()) - .map_err(|err| self.new_syntax_error(&err, Some(source)))?; - // trace!("Code object: {:?}", code_obj.borrow()); - // Only set __file__ for real file paths, not pseudo-paths like - if !(source_path.starts_with('<') && source_path.ends_with('>')) { - scope.globals.set_item( - identifier!(self, __file__), - self.new_pyobj(source_path), - self, - )?; - } - self.run_code_obj(code_obj, scope) - } - - pub fn run_block_expr(&self, scope: Scope, source: &str) -> PyResult { - let code_obj = self - .compile(source, compiler::Mode::BlockExpr, "".to_owned()) - .map_err(|err| self.new_syntax_error(&err, Some(source)))?; - // trace!("Code object: {:?}", code_obj.borrow()); - self.run_code_obj(code_obj, scope) - } -} - -fn set_main_loader( - module_dict: &PyDictRef, - filename: &str, - loader_name: &str, - vm: &VirtualMachine, -) -> PyResult<()> { - vm.import("importlib.machinery", 0)?; - let sys_modules = vm.sys_module.get_attr(identifier!(vm, modules), vm)?; - let machinery = sys_modules.get_item("importlib.machinery", vm)?; - let loader_name = vm.ctx.new_str(loader_name); - let loader_class = machinery.get_attr(&loader_name, vm)?; - let loader = loader_class.call((identifier!(vm, __main__).to_owned(), filename), vm)?; - module_dict.set_item("__loader__", loader, vm)?; - Ok(()) -} - -/// Check whether a file is maybe a pyc file. -/// -/// Detection is performed by: -/// 1. Checking if the filename ends with ".pyc" -/// 2. If not, reading the first 2 bytes and comparing with the magic number -fn maybe_pyc_file(path: &str) -> bool { - // 1. Check if filename ends with ".pyc" - if path.ends_with(".pyc") { - return true; - } - maybe_pyc_file_with_magic(path, &crate::version::PYC_MAGIC_NUMBER_BYTES).unwrap_or(false) -} - -fn maybe_pyc_file_with_magic(path: &str, magic_number: &[u8]) -> std::io::Result { - // part of maybe_pyc_file - // For non-.pyc extension, check magic number - let path_obj = std::path::Path::new(path); - if !path_obj.is_file() { - return Ok(false); - } - - let mut file = std::fs::File::open(path)?; - let mut buf = [0u8; 2]; - - use std::io::Read; - if file.read(&mut buf)? != 2 || magic_number.len() < 2 { - return Ok(false); - } - - // Read only two bytes of the magic. If the file was opened in - // text mode, the bytes 3 and 4 of the magic (\r\n) might not - // be read as they are on disk. - Ok(buf == magic_number[..2]) -} - -fn get_importer(path: &str, vm: &VirtualMachine) -> PyResult> { - let path_importer_cache = vm.sys_module.get_attr("path_importer_cache", vm)?; - let path_importer_cache = PyDictRef::try_from_object(vm, path_importer_cache)?; - if let Some(importer) = path_importer_cache.get_item_opt(path, vm)? { - return Ok(Some(importer)); - } - let path = vm.ctx.new_str(path); - let path_hooks = vm.sys_module.get_attr("path_hooks", vm)?; - let mut importer = None; - let path_hooks: Vec = path_hooks.try_into_value(vm)?; - for path_hook in path_hooks { - match path_hook.call((path.clone(),), vm) { - Ok(imp) => { - importer = Some(imp); - break; - } - Err(e) if e.fast_isinstance(vm.ctx.exceptions.import_error) => continue, - Err(e) => return Err(e), - } - } - Ok(if let Some(imp) = importer { - let imp = path_importer_cache.get_or_insert(vm, path.into(), || imp.clone())?; - Some(imp) - } else { - None - }) } diff --git a/crates/vm/src/vm/mod.rs b/crates/vm/src/vm/mod.rs index 7b974389418..d3f18bf9f46 100644 --- a/crates/vm/src/vm/mod.rs +++ b/crates/vm/src/vm/mod.rs @@ -8,6 +8,8 @@ mod compile; mod context; mod interpreter; mod method; +#[cfg(feature = "rustpython-compiler")] +mod python_run; mod setting; pub mod thread; mod vm_new; diff --git a/crates/vm/src/vm/python_run.rs b/crates/vm/src/vm/python_run.rs new file mode 100644 index 00000000000..31e8c7be45e --- /dev/null +++ b/crates/vm/src/vm/python_run.rs @@ -0,0 +1,186 @@ +//! Python code execution functions. + +use crate::{ + PyResult, VirtualMachine, + builtins::{PyCode, PyDictRef}, + compiler::{self}, + scope::Scope, +}; + +impl VirtualMachine { + /// _PyRun_AnyFileObject (internal) + /// + /// Execute a Python file. Currently always delegates to run_simple_file + /// (interactive mode is handled separately in shell.rs). + /// + /// Note: This is an internal function. Use `run_file` for the public interface. + #[doc(hidden)] + pub fn run_any_file(&self, scope: Scope, path: &str) -> PyResult<()> { + let path = if path.is_empty() { "???" } else { path }; + self.run_simple_file(scope, path) + } + + /// _PyRun_SimpleFileObject + /// + /// Execute a Python file with __main__ module setup. + /// Sets __file__ and __cached__ before execution, removes them after. + fn run_simple_file(&self, scope: Scope, path: &str) -> PyResult<()> { + let sys_modules = self.sys_module.get_attr(identifier!(self, modules), self)?; + let main_module = sys_modules.get_item(identifier!(self, __main__), self)?; + let module_dict = main_module.dict().expect("main module must have __dict__"); + + // Track whether we set __file__ (for cleanup) + let set_file_name = !module_dict.contains_key(identifier!(self, __file__), self); + if set_file_name { + module_dict.set_item( + identifier!(self, __file__), + self.ctx.new_str(path).into(), + self, + )?; + module_dict.set_item(identifier!(self, __cached__), self.ctx.none(), self)?; + } + + let result = self.run_simple_file_inner(&module_dict, scope, path); + + self.flush_io(); + + // Cleanup __file__ and __cached__ after execution + if set_file_name { + let _ = module_dict.del_item(identifier!(self, __file__), self); + let _ = module_dict.del_item(identifier!(self, __cached__), self); + } + + result + } + + fn run_simple_file_inner( + &self, + module_dict: &PyDictRef, + scope: Scope, + path: &str, + ) -> PyResult<()> { + let pyc = maybe_pyc_file(path); + if pyc { + // pyc file execution + set_main_loader(module_dict, path, "SourcelessFileLoader", self)?; + let loader = module_dict.get_item("__loader__", self)?; + let get_code = loader.get_attr("get_code", self)?; + let code_obj = get_code.call((identifier!(self, __main__).to_owned(),), self)?; + let code = code_obj + .downcast::() + .map_err(|_| self.new_runtime_error("Bad code object in .pyc file".to_owned()))?; + self.run_code_obj(code, scope)?; + } else { + if path != "" { + set_main_loader(module_dict, path, "SourceFileLoader", self)?; + } + match std::fs::read_to_string(path) { + Ok(source) => { + let code_obj = self + .compile(&source, compiler::Mode::Exec, path.to_owned()) + .map_err(|err| self.new_syntax_error(&err, Some(&source)))?; + self.run_code_obj(code_obj, scope)?; + } + Err(err) => { + return Err(self.new_os_error(err.to_string())); + } + } + } + Ok(()) + } + + /// PyRun_SimpleString + /// + /// Execute a string of Python code in a new scope with builtins. + pub fn run_simple_string(&self, source: &str) -> PyResult { + let scope = self.new_scope_with_builtins(); + self.run_string(scope, source, "".to_owned()) + } + + /// PyRun_String + /// + /// Execute a string of Python code with explicit scope and source path. + pub fn run_string(&self, scope: Scope, source: &str, source_path: String) -> PyResult { + let code_obj = self + .compile(source, compiler::Mode::Exec, source_path) + .map_err(|err| self.new_syntax_error(&err, Some(source)))?; + self.run_code_obj(code_obj, scope) + } + + #[deprecated(note = "use run_string instead")] + pub fn run_code_string(&self, scope: Scope, source: &str, source_path: String) -> PyResult { + self.run_string(scope, source, source_path) + } + + // #[deprecated(note = "use rustpython::run_file instead; if this changes causes problems, please report an issue.")] + pub fn run_script(&self, scope: Scope, path: &str) -> PyResult<()> { + self.run_any_file(scope, path) + } + + pub fn run_block_expr(&self, scope: Scope, source: &str) -> PyResult { + let code_obj = self + .compile(source, compiler::Mode::BlockExpr, "".to_owned()) + .map_err(|err| self.new_syntax_error(&err, Some(source)))?; + self.run_code_obj(code_obj, scope) + } + + /// flush_io + /// + /// Flush stdout and stderr. Errors are silently ignored. + fn flush_io(&self) { + if let Ok(stdout) = self.sys_module.get_attr("stdout", self) { + let _ = self.call_method(&stdout, identifier!(self, flush).as_str(), ()); + } + if let Ok(stderr) = self.sys_module.get_attr("stderr", self) { + let _ = self.call_method(&stderr, identifier!(self, flush).as_str(), ()); + } + } +} + +fn set_main_loader( + module_dict: &PyDictRef, + filename: &str, + loader_name: &str, + vm: &VirtualMachine, +) -> PyResult<()> { + vm.import("importlib.machinery", 0)?; + let sys_modules = vm.sys_module.get_attr(identifier!(vm, modules), vm)?; + let machinery = sys_modules.get_item("importlib.machinery", vm)?; + let loader_name = vm.ctx.new_str(loader_name); + let loader_class = machinery.get_attr(&loader_name, vm)?; + let loader = loader_class.call((identifier!(vm, __main__).to_owned(), filename), vm)?; + module_dict.set_item("__loader__", loader, vm)?; + Ok(()) +} + +/// Check whether a file is maybe a pyc file. +/// +/// Detection is performed by: +/// 1. Checking if the filename ends with ".pyc" +/// 2. If not, reading the first 2 bytes and comparing with the magic number +fn maybe_pyc_file(path: &str) -> bool { + if path.ends_with(".pyc") { + return true; + } + maybe_pyc_file_with_magic(path, &crate::version::PYC_MAGIC_NUMBER_BYTES).unwrap_or(false) +} + +fn maybe_pyc_file_with_magic(path: &str, magic_number: &[u8]) -> std::io::Result { + let path_obj = std::path::Path::new(path); + if !path_obj.is_file() { + return Ok(false); + } + + let mut file = std::fs::File::open(path)?; + let mut buf = [0u8; 2]; + + use std::io::Read; + if file.read(&mut buf)? != 2 || magic_number.len() < 2 { + return Ok(false); + } + + // Read only two bytes of the magic. If the file was opened in + // text mode, the bytes 3 and 4 of the magic (\r\n) might not + // be read as they are on disk. + Ok(buf == magic_number[..2]) +} diff --git a/src/lib.rs b/src/lib.rs index 9dff074e20d..b8e6917bbc3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -49,7 +49,7 @@ mod interpreter; mod settings; mod shell; -use rustpython_vm::{PyResult, VirtualMachine, scope::Scope}; +use rustpython_vm::{AsObject, PyObjectRef, PyResult, VirtualMachine, scope::Scope}; use std::env; use std::io::IsTerminal; use std::process::ExitCode; @@ -144,7 +144,7 @@ __import__("io").TextIOWrapper( .downcast() .expect("TextIOWrapper.read() should return str"); eprintln!("running get-pip.py..."); - vm.run_code_string(scope, getpip_code.as_str(), "get-pip.py".to_owned())?; + vm.run_string(scope, getpip_code.as_str(), "get-pip.py".to_owned())?; Ok(()) } @@ -162,6 +162,61 @@ fn install_pip(installer: InstallPipMode, scope: Scope, vm: &VirtualMachine) -> } } +// pymain_run_file_obj in Modules/main.c +fn run_file(vm: &VirtualMachine, scope: Scope, path: &str) -> PyResult<()> { + // Check if path is a package/directory with __main__.py + if let Some(_importer) = get_importer(path, vm)? { + vm.insert_sys_path(vm.new_pyobj(path))?; + let runpy = vm.import("runpy", 0)?; + let run_module_as_main = runpy.get_attr("_run_module_as_main", vm)?; + run_module_as_main.call((vm::identifier!(vm, __main__).to_owned(), false), vm)?; + return Ok(()); + } + + // Add script directory to sys.path[0] + if !vm.state.config.settings.safe_path { + let dir = std::path::Path::new(path) + .parent() + .unwrap() + .to_str() + .unwrap(); + vm.insert_sys_path(vm.new_pyobj(dir))?; + } + + vm.run_any_file(scope, path) +} + +fn get_importer(path: &str, vm: &VirtualMachine) -> PyResult> { + use rustpython_vm::builtins::PyDictRef; + use rustpython_vm::convert::TryFromObject; + + let path_importer_cache = vm.sys_module.get_attr("path_importer_cache", vm)?; + let path_importer_cache = PyDictRef::try_from_object(vm, path_importer_cache)?; + if let Some(importer) = path_importer_cache.get_item_opt(path, vm)? { + return Ok(Some(importer)); + } + let path_obj = vm.ctx.new_str(path); + let path_hooks = vm.sys_module.get_attr("path_hooks", vm)?; + let mut importer = None; + let path_hooks: Vec = path_hooks.try_into_value(vm)?; + for path_hook in path_hooks { + match path_hook.call((path_obj.clone(),), vm) { + Ok(imp) => { + importer = Some(imp); + break; + } + Err(e) if e.fast_isinstance(vm.ctx.exceptions.import_error) => continue, + Err(e) => return Err(e), + } + } + Ok(if let Some(imp) = importer { + let imp = path_importer_cache.get_or_insert(vm, path_obj.into(), || imp.clone())?; + Some(imp) + } else { + None + }) +} + // pymain_run_python fn run_rustpython(vm: &VirtualMachine, run_mode: RunMode) -> PyResult<()> { #[cfg(feature = "flame-it")] @@ -199,11 +254,7 @@ fn run_rustpython(vm: &VirtualMachine, run_mode: RunMode) -> PyResult<()> { // Enable faulthandler if -X faulthandler, PYTHONFAULTHANDLER or -X dev is set // _PyFaulthandler_Init() if vm.state.config.settings.faulthandler { - let _ = vm.run_code_string( - vm.new_scope_with_builtins(), - "import faulthandler; faulthandler.enable()", - "".to_owned(), - ); + let _ = vm.run_simple_string("import faulthandler; faulthandler.enable()"); } let is_repl = matches!(run_mode, RunMode::Repl); @@ -226,7 +277,7 @@ fn run_rustpython(vm: &VirtualMachine, run_mode: RunMode) -> PyResult<()> { let res = match run_mode { RunMode::Command(command) => { debug!("Running command {command}"); - vm.run_code_string(scope.clone(), &command, "".to_owned()) + vm.run_string(scope.clone(), &command, "".to_owned()) .map(drop) } RunMode::Module(module) => { @@ -235,9 +286,9 @@ fn run_rustpython(vm: &VirtualMachine, run_mode: RunMode) -> PyResult<()> { } RunMode::InstallPip(installer) => install_pip(installer, scope.clone(), vm), RunMode::Script(script_path) => { - // pymain_run_file + // pymain_run_file_obj debug!("Running script {}", &script_path); - vm.run_script(scope.clone(), &script_path) + run_file(vm, scope.clone(), &script_path) } RunMode::Repl => Ok(()), }; @@ -318,11 +369,11 @@ mod tests { vm.unwrap_pyresult((|| { let scope = setup_main_module(vm)?; // test file run - vm.run_script(scope, "extra_tests/snippets/dir_main/__main__.py")?; + vm.run_any_file(scope, "extra_tests/snippets/dir_main/__main__.py")?; let scope = setup_main_module(vm)?; - // test module run - vm.run_script(scope, "extra_tests/snippets/dir_main")?; + // test module run (directory with __main__.py) + run_file(vm, scope, "extra_tests/snippets/dir_main")?; Ok(()) })()); From 50098dec0f1edf2b5ec4289c4aa7da51e8b88e8b Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Sun, 4 Jan 2026 19:06:03 +0900 Subject: [PATCH 2/2] Update src/lib.rs Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- src/lib.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b8e6917bbc3..ad5894860d2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -177,9 +177,8 @@ fn run_file(vm: &VirtualMachine, scope: Scope, path: &str) -> PyResult<()> { if !vm.state.config.settings.safe_path { let dir = std::path::Path::new(path) .parent() - .unwrap() - .to_str() - .unwrap(); + .and_then(|p| p.to_str()) + .unwrap_or(""); vm.insert_sys_path(vm.new_pyobj(dir))?; }