From 5bb1a14c19e7dcb0c2070b22c22ddc82a8250704 Mon Sep 17 00:00:00 2001 From: Keith Hall Date: Mon, 11 Aug 2025 20:45:22 +0300 Subject: [PATCH 01/52] Fix behavior of repetition on an empty match Previously it would fail the branch, but Oniguruma treats it as success if enough iterations were completed and breaks out the repetition. So the change is to match Oniguruma behavior. --- src/vm.rs | 5 +++-- tests/finding.rs | 24 ++++++++++++++++++++++++ tests/oniguruma/test_utf8_ignore.c | 12 ------------ 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/vm.rs b/src/vm.rs index 5de21d4..43970df 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -646,8 +646,9 @@ pub(crate) fn run( } => { let repcount = state.get(repeat); if repcount > lo && state.get(check) == ix { - // prevent zero-length match on repeat - break 'fail; + // zero-length match on repeat, then move to next instruction + pc = next; + continue; } state.save(repeat, repcount + 1); if repcount >= lo { diff --git a/tests/finding.rs b/tests/finding.rs index 1b2cd64..44d6bc5 100644 --- a/tests/finding.rs +++ b/tests/finding.rs @@ -261,6 +261,30 @@ fn find_iter_attributes() { } #[test] +fn find_iter_empty_repeat_issue70() { + fn assert_expected_matches(pattern: &str) { + let text = "a\naaa\n"; + let regex = common::regex(pattern); + + let matches: Vec<_> = regex.find_iter(text).collect(); + assert_eq!(matches.len(), 4); + + for i in 0..matches.len() { + let mat = &matches[i].as_ref().unwrap(); + match i { + 0 => assert_eq!((mat.start(), mat.end()), (0, 0)), + 1 => assert_eq!((mat.start(), mat.end()), (2, 2)), + 2 => assert_eq!((mat.start(), mat.end()), (3, 5)), + 3 => assert_eq!((mat.start(), mat.end()), (6, 6)), + i => panic!("Expected 4 results, got {}", i + 1), + } + } + } + + assert_expected_matches(r"(?m)(?:^|a)+"); + assert_expected_matches(r"(?m)(?:^|a)(?:^|a)*"); +} + fn find_iter_collect_when_backtrack_limit_hit() { use fancy_regex::RegexBuilder; let r = RegexBuilder::new("(x+x+)+(?=y)") diff --git a/tests/oniguruma/test_utf8_ignore.c b/tests/oniguruma/test_utf8_ignore.c index a320632..4652235 100644 --- a/tests/oniguruma/test_utf8_ignore.c +++ b/tests/oniguruma/test_utf8_ignore.c @@ -151,15 +151,9 @@ // Compile failed: CompileError(FeatureNotYetSupported("Subroutine Call")) x2("(?\\(([^\\(\\)]++|\\g)*+\\))", "((a))", 0, 5); - // No match found - x2("()*\\1", "", 0, 0); - // No match found x2("(?:()|())*\\1\\2", "", 0, 0); - // Expected group to exist - x3("(?:\\1a|())*", "a", 0, 0, 1); - // No match found x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0); @@ -211,12 +205,6 @@ // Compile failed: ParseError(0, InvalidEscape("\\N")) x2("\\N", "a", 0, 1); - // No match found - x2("(?:()|())*\\1", "abc", 0, 0); - - // No match found - x2("(?:()|())*\\2", "abc", 0, 0); - // No match found x2("(?:()|()|())*\\3\\1", "abc", 0, 0); From 170094ac56dee3f2023d83ab39d4749193fb543c Mon Sep 17 00:00:00 2001 From: Keith Hall Date: Mon, 11 Aug 2025 20:45:49 +0300 Subject: [PATCH 02/52] Fix test which relied on positive trailing lookahead --- tests/finding.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/finding.rs b/tests/finding.rs index 44d6bc5..0e28912 100644 --- a/tests/finding.rs +++ b/tests/finding.rs @@ -285,14 +285,23 @@ fn find_iter_empty_repeat_issue70() { assert_expected_matches(r"(?m)(?:^|a)(?:^|a)*"); } +#[test] fn find_iter_collect_when_backtrack_limit_hit() { + use fancy_regex::Error; use fancy_regex::RegexBuilder; - let r = RegexBuilder::new("(x+x+)+(?=y)") + use fancy_regex::RuntimeError; + + let r = RegexBuilder::new("(x+x+)+(?>y)") .backtrack_limit(1) .build() .unwrap(); let result: Vec<_> = r.find_iter("xxxxxxxxxxy").collect(); assert_eq!(result.len(), 1); + assert!(result[0].is_err()); + match &result[0].as_ref().err() { + Some(Error::RuntimeError(RuntimeError::BacktrackLimitExceeded)) => {} + _ => panic!("Expected RuntimeError::BacktrackLimitExceeded"), + } } #[test] From adfd1206347c3945a08107633a9c6b9964f3afb5 Mon Sep 17 00:00:00 2001 From: Keith Hall Date: Tue, 12 Aug 2025 05:52:04 +0300 Subject: [PATCH 03/52] Ensure same results for wrapped regex and fancy regex --- tests/finding.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/finding.rs b/tests/finding.rs index 0e28912..a0effef 100644 --- a/tests/finding.rs +++ b/tests/finding.rs @@ -283,6 +283,8 @@ fn find_iter_empty_repeat_issue70() { assert_expected_matches(r"(?m)(?:^|a)+"); assert_expected_matches(r"(?m)(?:^|a)(?:^|a)*"); + assert_expected_matches(r"(?m)(?>^|a)+"); + assert_expected_matches(r"(?m)(?>^|a)(?:^|a)*"); } #[test] From 614ad05d037612e49f1384d065a8b2ce2fdb61ac Mon Sep 17 00:00:00 2001 From: Keith Hall Date: Wed, 13 Aug 2025 20:47:49 +0300 Subject: [PATCH 04/52] Fix behavior of non-greedy repetition on empty match --- src/vm.rs | 9 ++++---- tests/finding.rs | 58 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/src/vm.rs b/src/vm.rs index 43970df..ad05a4b 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -645,7 +645,7 @@ pub(crate) fn run( check, } => { let repcount = state.get(repeat); - if repcount > lo && state.get(check) == ix { + if repcount > 0 && state.get(check) == ix { // zero-length match on repeat, then move to next instruction pc = next; continue; @@ -663,9 +663,10 @@ pub(crate) fn run( check, } => { let repcount = state.get(repeat); - if repcount > lo && state.get(check) == ix { - // prevent zero-length match on repeat - break 'fail; + if repcount > 0 && state.get(check) == ix { + // zero-length match on repeat, then move to next instruction + pc = next; + continue; } state.save(repeat, repcount + 1); if repcount >= lo { diff --git a/tests/finding.rs b/tests/finding.rs index a0effef..03df509 100644 --- a/tests/finding.rs +++ b/tests/finding.rs @@ -283,8 +283,62 @@ fn find_iter_empty_repeat_issue70() { assert_expected_matches(r"(?m)(?:^|a)+"); assert_expected_matches(r"(?m)(?:^|a)(?:^|a)*"); - assert_expected_matches(r"(?m)(?>^|a)+"); - assert_expected_matches(r"(?m)(?>^|a)(?:^|a)*"); + assert_expected_matches(r"(?m)(?>)(?:^|a)+"); + assert_expected_matches(r"(?m)(?>)(?:^|a)(?:^|a)*"); +} + +#[test] +fn find_iter_empty_repeat_non_greedy_issue70() { + fn assert_expected_matches(pattern: &str) { + let text = "a\naaa\n"; + let regex = common::regex(pattern); + + let matches: Vec<_> = regex.find_iter(text).collect(); + assert_eq!(matches.len(), 5); + + for i in 0..matches.len() { + let mat = &matches[i].as_ref().unwrap(); + match i { + 0 => assert_eq!((mat.start(), mat.end()), (0, 0)), + 1 => assert_eq!((mat.start(), mat.end()), (2, 2)), + 2 => assert_eq!((mat.start(), mat.end()), (3, 4)), + 3 => assert_eq!((mat.start(), mat.end()), (4, 5)), + 4 => assert_eq!((mat.start(), mat.end()), (6, 6)), + i => panic!("Expected 4 results, got {}", i + 1), + } + } + } + + assert_expected_matches(r"(?m)(?:^|a)+?"); + assert_expected_matches(r"(?m)(?:^|a)(?:^|a)*?"); + assert_expected_matches(r"(?m)(?>)(?:^|a)+?"); + assert_expected_matches(r"(?m)(?>)(?:^|a)(?:^|a)*?"); +} + +#[test] +fn find_iter_empty_repeat_anchored_non_greedy_issue70() { + fn assert_expected_matches(pattern: &str) { + let text = "a\naaa\n"; + let regex = common::regex(pattern); + + let matches: Vec<_> = regex.find_iter(text).collect(); + assert_eq!(matches.len(), 3); + + for i in 0..matches.len() { + let mat = &matches[i].as_ref().unwrap(); + match i { + 0 => assert_eq!((mat.start(), mat.end()), (0, 1)), + 1 => assert_eq!((mat.start(), mat.end()), (2, 5)), + 2 => assert_eq!((mat.start(), mat.end()), (6, 6)), + i => panic!("Expected 4 results, got {}", i + 1), + } + } + } + + assert_expected_matches(r"(?m)(?:^|a)+?$"); + assert_expected_matches(r"(?m)(?:^|a)(?:^|a)*?$"); + assert_expected_matches(r"(?m)(?>)(?:^|a)+?$"); + assert_expected_matches(r"(?m)(?>)(?:^|a)(?:^|a)*?$"); } #[test] From 6e40d240749b009dccaa27f7591cf69675436bb8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 Aug 2025 13:32:41 +0000 Subject: [PATCH 05/52] Initial plan From 7ca4cdc35e1e978a5b210fd86322a323e6dd053f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 Aug 2025 13:58:13 +0000 Subject: [PATCH 06/52] Complete WASM playground implementation with full functionality Co-authored-by: keith-hall <11882719+keith-hall@users.noreply.github.com> --- .github/workflows/deploy-playground.yml | 53 ++++ README.md | 11 + playground/.gitignore | 19 ++ playground/Cargo.toml | 29 ++ playground/README.md | 257 +++++++++++++++++ playground/build.sh | 31 +++ playground/serve.py | 55 ++++ playground/src/lib.rs | 255 +++++++++++++++++ playground/web/app.js | 313 +++++++++++++++++++++ playground/web/index.html | 349 ++++++++++++++++++++++++ 10 files changed, 1372 insertions(+) create mode 100644 .github/workflows/deploy-playground.yml create mode 100644 playground/.gitignore create mode 100644 playground/Cargo.toml create mode 100644 playground/README.md create mode 100755 playground/build.sh create mode 100755 playground/serve.py create mode 100644 playground/src/lib.rs create mode 100644 playground/web/app.js create mode 100644 playground/web/index.html diff --git a/.github/workflows/deploy-playground.yml b/.github/workflows/deploy-playground.yml new file mode 100644 index 0000000..d65b2dd --- /dev/null +++ b/.github/workflows/deploy-playground.yml @@ -0,0 +1,53 @@ +name: Deploy Playground to GitHub Pages + +on: + push: + branches: [ main ] + paths: [ 'playground/**' ] + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + build-and-deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + target: wasm32-unknown-unknown + + - name: Install wasm-pack + run: cargo install wasm-pack + + - name: Build WASM + run: | + cd playground + wasm-pack build --target web --out-dir pkg --release + cp -r pkg web/ + + - name: Setup Pages + uses: actions/configure-pages@v4 + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: './playground/web' + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 \ No newline at end of file diff --git a/README.md b/README.md index dbbce4d..76d1246 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,17 @@ NFA-based implementations (exemplified by [![ci](https://github.com/fancy-regex/fancy-regex/workflows/ci/badge.svg)](https://github.com/fancy-regex/fancy-regex/actions?query=workflow%3Aci) [![codecov](https://codecov.io/gh/fancy-regex/fancy-regex/branch/main/graph/badge.svg)](https://codecov.io/gh/fancy-regex/fancy-regex) +## ๐ŸŽฏ Try it Online! + +**[fancy-regex Playground](https://fancy-regex.github.io/fancy-regex/)** - Test and explore regular expressions with advanced features in your browser. + +The playground includes: +- Real-time regex testing with highlighting +- Support for all fancy-regex features (backreferences, lookaround, etc.) +- Visual parse tree and analysis output +- Configurable regex flags +- Mobile-friendly responsive design + A goal is to be as efficient as possible. For a given regex, the NFA implementation has asymptotic running time linear in the length of the input, while in the general case a backtracking implementation has diff --git a/playground/.gitignore b/playground/.gitignore new file mode 100644 index 0000000..6a34062 --- /dev/null +++ b/playground/.gitignore @@ -0,0 +1,19 @@ +# WASM build artifacts +/pkg/ +/web/pkg/ + +# Rust/Cargo +target/ +Cargo.lock + +# IDE +.vscode/ +.idea/ + +# OS +.DS_Store +Thumbs.db + +# Local development +node_modules/ +*.log \ No newline at end of file diff --git a/playground/Cargo.toml b/playground/Cargo.toml new file mode 100644 index 0000000..3eb1163 --- /dev/null +++ b/playground/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "fancy-regex-playground" +version = "0.1.0" +edition = "2021" +description = "WASM playground for fancy-regex" + +[lib] +crate-type = ["cdylib"] + +[package.metadata.wasm-pack.profile.release] +wasm-opt = false + +[dependencies] +fancy-regex = { path = "..", default-features = false, features = ["std"] } +wasm-bindgen = "0.2" +serde = { version = "1.0", features = ["derive"] } +serde-wasm-bindgen = "0.6" +serde_json = "1.0" +js-sys = "0.3" + +[dependencies.web-sys] +version = "0.3" +features = [ + "console", +] + +[profile.release] +opt-level = "s" +lto = true \ No newline at end of file diff --git a/playground/README.md b/playground/README.md new file mode 100644 index 0000000..b503ae7 --- /dev/null +++ b/playground/README.md @@ -0,0 +1,257 @@ +# Fancy Regex Playground + +A browser-based interactive playground for testing and exploring the [fancy-regex](https://github.com/fancy-regex/fancy-regex) crate. This playground is compiled to WebAssembly (WASM) and provides a user-friendly interface for experimenting with advanced regex features like backreferences and lookaround. + +## โœจ Features + +- **Interactive Regex Testing**: Enter regex patterns and test text to see real-time results +- **Advanced Regex Support**: Full support for fancy-regex features including: + - Backreferences (`\1`, `\2`, etc.) + - Named capture groups (`(?...)` and `\k`) + - Lookahead and lookbehind assertions + - All other fancy-regex features +- **Real-time Highlighting**: Visual highlighting of matches and capture groups in the test text +- **Regex Flags**: Support for case-insensitive, multi-line, dot-matches-newline, and ignore-whitespace flags +- **Parse Tree Visualization**: View the internal parse tree structure of your regex +- **Analysis Output**: See detailed analysis information about your regex pattern +- **Error Handling**: Clear error messages for invalid patterns or runtime errors +- **Debounced Updates**: Smooth real-time updates with debouncing to prevent excessive computation +- **Responsive Design**: Works well on desktop and mobile devices + +## ๐Ÿš€ Quick Start + +### Running Locally + +1. **Prerequisites**: + - Rust (latest stable) + - `wasm-pack` - Install with: `cargo install wasm-pack` + - Python 3 (for local server) + +2. **Clone and build**: + ```bash + git clone https://github.com/fancy-regex/fancy-regex.git + cd fancy-regex/playground + + # Build the WASM module + wasm-pack build --target web --out-dir pkg + + # Copy WASM files to web directory + cp -r pkg web/ + + # Start local server + python3 serve.py + ``` + +3. **Open in browser**: Visit `http://localhost:8000` + +### Building for Production + +To build optimized WASM for production deployment: + +```bash +cd playground +wasm-pack build --target web --out-dir pkg --release +cp -r pkg web/ +``` + +## ๐Ÿ“ฆ Publishing to GitHub Pages + +### Automatic Deployment (Recommended) + +1. **Create a GitHub Actions workflow** (`.github/workflows/deploy-playground.yml`): + +```yaml +name: Deploy Playground to GitHub Pages + +on: + push: + branches: [ main ] + paths: [ 'playground/**' ] + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + build-and-deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + target: wasm32-unknown-unknown + + - name: Install wasm-pack + run: cargo install wasm-pack + + - name: Build WASM + run: | + cd playground + wasm-pack build --target web --out-dir pkg --release + cp -r pkg web/ + + - name: Setup Pages + uses: actions/configure-pages@v4 + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: './playground/web' + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 +``` + +2. **Enable GitHub Pages**: + - Go to your repository settings + - Navigate to "Pages" section + - Set source to "GitHub Actions" + - The playground will be available at `https://yourusername.github.io/fancy-regex/` + +### Manual Deployment + +If you prefer manual deployment: + +1. **Build the playground**: + ```bash + cd playground + wasm-pack build --target web --out-dir pkg --release + cp -r pkg web/ + ``` + +2. **Create `gh-pages` branch**: + ```bash + git checkout --orphan gh-pages + git rm -rf . + cp -r playground/web/* . + git add . + git commit -m "Deploy playground to GitHub Pages" + git push origin gh-pages + ``` + +3. **Configure GitHub Pages** to use the `gh-pages` branch + +## ๐ŸŽฏ Usage Examples + +### Basic Regex Testing +- Pattern: `hello` +- Text: `hello world hello there` +- Result: Highlights both "hello" occurrences + +### Backreferences +- Pattern: `(\w+)\s+\1` +- Text: `hello hello world test test` +- Result: Matches repeated words like "hello hello" and "test test" + +### Named Capture Groups +- Pattern: `(?\w+)\s+\k` +- Text: `apple apple orange banana banana` +- Result: Matches repeated words using named backreferences + +### Lookahead Assertions +- Pattern: `\w+(?=\s+world)` +- Text: `hello world goodbye moon` +- Result: Matches "hello" (word followed by " world") + +## ๐Ÿ—๏ธ Architecture + +The playground consists of: + +- **WASM Module** (`playground/src/lib.rs`): Rust code compiled to WASM that exposes fancy-regex functionality +- **Frontend** (`playground/web/`): HTML/CSS/JavaScript interface for user interaction +- **Server Script** (`playground/serve.py`): Simple Python HTTP server for local development + +### Key WASM Functions + +- `find_matches(pattern, text, flags)`: Find all matches in text +- `find_captures(pattern, text, flags)`: Find matches with capture groups +- `parse_regex(pattern)`: Parse regex and return AST +- `analyze_regex(pattern)`: Analyze regex and return detailed information +- `is_match(pattern, text, flags)`: Test if pattern matches text + +## ๐Ÿ› ๏ธ Development + +### Project Structure +``` +playground/ +โ”œโ”€โ”€ Cargo.toml # WASM crate configuration +โ”œโ”€โ”€ src/ +โ”‚ โ””โ”€โ”€ lib.rs # WASM wrapper implementation +โ”œโ”€โ”€ web/ +โ”‚ โ”œโ”€โ”€ index.html # Main playground interface +โ”‚ โ”œโ”€โ”€ app.js # JavaScript application logic +โ”‚ โ””โ”€โ”€ pkg/ # Generated WASM files (copied from ../pkg/) +โ”œโ”€โ”€ pkg/ # Generated WASM output +โ””โ”€โ”€ serve.py # Local development server +``` + +### Adding New Features + +1. **Add WASM function** in `src/lib.rs`: + ```rust + #[wasm_bindgen] + pub fn your_function(pattern: &str) -> Result { + // Implementation + } + ``` + +2. **Update frontend** in `web/app.js`: + ```javascript + import { your_function } from '../pkg/fancy_regex_playground.js'; + + // Use the function in your UI logic + ``` + +3. **Rebuild**: + ```bash + wasm-pack build --target web --out-dir pkg + cp -r pkg web/ + ``` + +### Testing + +The playground includes error handling for: +- Invalid regex patterns +- Compilation errors +- Runtime errors during matching +- WASM module loading issues + +Test with various patterns to ensure robust error handling. + +## ๐Ÿค Contributing + +Contributions are welcome! Please: + +1. Fork the repository +2. Create a feature branch +3. Make your changes +4. Test thoroughly +5. Submit a pull request + +## ๐Ÿ“„ License + +This project follows the same license as the fancy-regex crate (MIT License). + +## ๐Ÿ™ Acknowledgments + +- Built on top of the excellent [fancy-regex](https://github.com/fancy-regex/fancy-regex) crate +- Uses [wasm-bindgen](https://github.com/rustwasm/wasm-bindgen) for Rust/JavaScript interop +- Inspired by various online regex testing tools + +--- + +**Note**: This playground demonstrates the capabilities of the fancy-regex crate. For production use, always validate and sanitize user input appropriately. \ No newline at end of file diff --git a/playground/build.sh b/playground/build.sh new file mode 100755 index 0000000..2b0f52f --- /dev/null +++ b/playground/build.sh @@ -0,0 +1,31 @@ +#!/bin/bash +set -e + +echo "๐Ÿ”ง Building Fancy Regex Playground..." + +# Check if wasm-pack is installed +if ! command -v wasm-pack &> /dev/null; then + echo "โŒ wasm-pack is not installed. Installing..." + cargo install wasm-pack +fi + +# Check if wasm32 target is installed +if ! rustup target list --installed | grep -q "wasm32-unknown-unknown"; then + echo "๐ŸŽฏ Adding wasm32-unknown-unknown target..." + rustup target add wasm32-unknown-unknown +fi + +# Build the WASM module +echo "๐Ÿš€ Building WASM module..." +wasm-pack build --target web --out-dir pkg --release + +# Copy WASM files to web directory +echo "๐Ÿ“ฆ Copying WASM files to web directory..." +cp -r pkg web/ + +echo "โœ… Build complete!" +echo "" +echo "To run locally:" +echo " python3 serve.py" +echo "" +echo "Then open: http://localhost:8000" \ No newline at end of file diff --git a/playground/serve.py b/playground/serve.py new file mode 100755 index 0000000..79e3a89 --- /dev/null +++ b/playground/serve.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +""" +Simple HTTP server for testing the Fancy Regex Playground locally. +""" + +import http.server +import socketserver +import os +import sys +import webbrowser +from pathlib import Path + +class MyHTTPRequestHandler(http.server.SimpleHTTPRequestHandler): + def end_headers(self): + # Add CORS headers for WASM + self.send_header('Cross-Origin-Embedder-Policy', 'require-corp') + self.send_header('Cross-Origin-Opener-Policy', 'same-origin') + super().end_headers() + + def guess_type(self, path): + mimetype = super().guess_type(path) + # Ensure WASM files are served with correct MIME type + if path.endswith('.wasm'): + return 'application/wasm' + return mimetype + +def main(): + port = 8000 + web_dir = Path(__file__).parent / "web" + + if not web_dir.exists(): + print(f"Error: Web directory not found at {web_dir}") + sys.exit(1) + + os.chdir(web_dir) + + with socketserver.TCPServer(("", port), MyHTTPRequestHandler) as httpd: + print(f"๐Ÿš€ Fancy Regex Playground server starting...") + print(f"๐Ÿ“‚ Serving from: {web_dir}") + print(f"๐ŸŒ Open: http://localhost:{port}") + print(f"โน๏ธ Press Ctrl+C to stop") + + try: + # Try to open browser automatically + webbrowser.open(f'http://localhost:{port}') + except: + pass + + try: + httpd.serve_forever() + except KeyboardInterrupt: + print("\n๐Ÿ‘‹ Server stopped") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/playground/src/lib.rs b/playground/src/lib.rs new file mode 100644 index 0000000..39fb688 --- /dev/null +++ b/playground/src/lib.rs @@ -0,0 +1,255 @@ +use fancy_regex::{Regex, RegexBuilder}; +use serde::{Deserialize, Serialize}; +use wasm_bindgen::prelude::*; + +// Expose console.log for debugging +#[wasm_bindgen] +extern "C" { + #[wasm_bindgen(js_namespace = console)] + fn log(s: &str); +} + +// Macro for console.log +macro_rules! console_log { + ($($t:tt)*) => (log(&format_args!($($t)*).to_string())) +} + +#[derive(Serialize, Deserialize)] +pub struct Match { + pub start: usize, + pub end: usize, + pub text: String, +} + +#[derive(Serialize, Deserialize)] +pub struct CaptureGroup { + pub index: usize, + pub name: Option, + pub start: Option, + pub end: Option, + pub text: Option, +} + +#[derive(Serialize, Deserialize)] +pub struct MatchResult { + pub full_match: Option, + pub captures: Vec, +} + +#[derive(Serialize, Deserialize)] +pub struct RegexFlags { + pub case_insensitive: bool, + pub multi_line: bool, + pub dot_matches_new_line: bool, + pub ignore_whitespace: bool, + pub unicode: bool, +} + +impl Default for RegexFlags { + fn default() -> Self { + Self { + case_insensitive: false, + multi_line: false, + dot_matches_new_line: false, + ignore_whitespace: false, + unicode: true, + } + } +} + +#[wasm_bindgen] +pub fn create_regex(pattern: &str, flags: JsValue) -> Result { + let flags: RegexFlags = if flags.is_undefined() { + RegexFlags::default() + } else { + serde_wasm_bindgen::from_value(flags).map_err(|e| { + JsValue::from_str(&format!("Invalid flags: {}", e)) + })? + }; + + let mut builder = RegexBuilder::new(pattern); + + if flags.case_insensitive { + builder.case_insensitive(true); + } + if flags.multi_line { + builder.multi_line(true); + } + if flags.dot_matches_new_line { + builder.dot_matches_new_line(true); + } + if flags.ignore_whitespace { + builder.ignore_whitespace(true); + } + + match builder.build() { + Ok(_regex) => { + // Store the pattern for later use since we can't serialize the regex directly + let regex_info = serde_json::json!({ + "pattern": pattern, + "flags": flags + }); + Ok(JsValue::from_str(®ex_info.to_string())) + } + Err(e) => Err(JsValue::from_str(&format!("Regex compilation error: {}", e))), + } +} + +#[wasm_bindgen] +pub fn find_matches(pattern: &str, text: &str, flags: JsValue) -> Result { + let flags: RegexFlags = if flags.is_undefined() { + RegexFlags::default() + } else { + serde_wasm_bindgen::from_value(flags).map_err(|e| { + JsValue::from_str(&format!("Invalid flags: {}", e)) + })? + }; + + let regex = build_regex(pattern, &flags)?; + + let mut matches = Vec::new(); + for mat in regex.find_iter(text) { + match mat { + Ok(m) => { + matches.push(Match { + start: m.start(), + end: m.end(), + text: m.as_str().to_string(), + }); + } + Err(e) => return Err(JsValue::from_str(&format!("Match error: {}", e))), + } + } + + serde_wasm_bindgen::to_value(&matches).map_err(|e| { + JsValue::from_str(&format!("Serialization error: {}", e)) + }) +} + +#[wasm_bindgen] +pub fn find_captures(pattern: &str, text: &str, flags: JsValue) -> Result { + let flags: RegexFlags = if flags.is_undefined() { + RegexFlags::default() + } else { + serde_wasm_bindgen::from_value(flags).map_err(|e| { + JsValue::from_str(&format!("Invalid flags: {}", e)) + })? + }; + + let regex = build_regex(pattern, &flags)?; + + let mut all_captures = Vec::new(); + + for caps_result in regex.captures_iter(text) { + match caps_result { + Ok(caps) => { + let full_match = caps.get(0).map(|m| Match { + start: m.start(), + end: m.end(), + text: m.as_str().to_string(), + }); + + let mut captures = Vec::new(); + for i in 0..caps.len() { + let capture = if let Some(m) = caps.get(i) { + CaptureGroup { + index: i, + name: None, // Named groups aren't directly accessible by index + start: Some(m.start()), + end: Some(m.end()), + text: Some(m.as_str().to_string()), + } + } else { + CaptureGroup { + index: i, + name: None, + start: None, + end: None, + text: None, + } + }; + captures.push(capture); + } + + all_captures.push(MatchResult { + full_match, + captures, + }); + } + Err(e) => return Err(JsValue::from_str(&format!("Capture error: {}", e))), + } + } + + serde_wasm_bindgen::to_value(&all_captures).map_err(|e| { + JsValue::from_str(&format!("Serialization error: {}", e)) + }) +} + +#[wasm_bindgen] +pub fn parse_regex(pattern: &str) -> Result { + match fancy_regex::Expr::parse_tree(pattern) { + Ok(tree) => Ok(format!("{:#?}", tree)), + Err(e) => Err(JsValue::from_str(&format!("Parse error: {}", e))), + } +} + +#[wasm_bindgen] +pub fn analyze_regex(pattern: &str) -> Result { + use fancy_regex::internal::{analyze, optimize}; + + match fancy_regex::Expr::parse_tree(pattern) { + Ok(mut tree) => { + optimize(&mut tree); + match analyze(&tree, 1) { + Ok(info) => Ok(format!("{:#?}", info)), + Err(e) => Err(JsValue::from_str(&format!("Analysis error: {}", e))), + } + } + Err(e) => Err(JsValue::from_str(&format!("Parse error: {}", e))), + } +} + +#[wasm_bindgen] +pub fn is_match(pattern: &str, text: &str, flags: JsValue) -> Result { + let flags: RegexFlags = if flags.is_undefined() { + RegexFlags::default() + } else { + serde_wasm_bindgen::from_value(flags).map_err(|e| { + JsValue::from_str(&format!("Invalid flags: {}", e)) + })? + }; + + let regex = build_regex(pattern, &flags)?; + + match regex.is_match(text) { + Ok(result) => Ok(result), + Err(e) => Err(JsValue::from_str(&format!("Match error: {}", e))), + } +} + +fn build_regex(pattern: &str, flags: &RegexFlags) -> Result { + let mut builder = RegexBuilder::new(pattern); + + if flags.case_insensitive { + builder.case_insensitive(true); + } + if flags.multi_line { + builder.multi_line(true); + } + if flags.dot_matches_new_line { + builder.dot_matches_new_line(true); + } + if flags.ignore_whitespace { + builder.ignore_whitespace(true); + } + + builder.build().map_err(|e| { + JsValue::from_str(&format!("Regex compilation error: {}", e)) + }) +} + +// Initialize the module +#[wasm_bindgen(start)] +pub fn main() { + console_log!("fancy-regex WASM playground initialized"); +} \ No newline at end of file diff --git a/playground/web/app.js b/playground/web/app.js new file mode 100644 index 0000000..03faf6e --- /dev/null +++ b/playground/web/app.js @@ -0,0 +1,313 @@ +import init, { + find_matches, + find_captures, + parse_regex, + analyze_regex, + is_match +} from '../pkg/fancy_regex_playground.js'; + +class FancyRegexPlayground { + constructor() { + this.isInitialized = false; + this.debounceTimer = null; + this.elements = {}; + this.lastResults = null; + } + + async init() { + // Initialize the WASM module + await init(); + this.isInitialized = true; + + // Get DOM elements + this.elements = { + regexInput: document.getElementById('regex-input'), + textInput: document.getElementById('text-input'), + matchResults: document.getElementById('match-results'), + highlightedText: document.getElementById('highlighted-text'), + parseTreeSection: document.getElementById('parse-tree-section'), + parseTreeDisplay: document.getElementById('parse-tree-display'), + analysisSection: document.getElementById('analysis-section'), + analysisDisplay: document.getElementById('analysis-display'), + showParseTreeBtn: document.getElementById('show-parse-tree'), + showAnalysisBtn: document.getElementById('show-analysis'), + flags: { + caseInsensitive: document.getElementById('flag-case-insensitive'), + multiLine: document.getElementById('flag-multi-line'), + dotMatchesNewline: document.getElementById('flag-dot-matches-newline'), + ignoreWhitespace: document.getElementById('flag-ignore-whitespace') + } + }; + + this.setupEventListeners(); + this.loadExampleData(); + } + + setupEventListeners() { + // Debounced input handlers + this.elements.regexInput.addEventListener('input', () => this.debounceUpdate()); + this.elements.textInput.addEventListener('input', () => this.debounceUpdate()); + + // Flag change handlers + Object.values(this.elements.flags).forEach(flag => { + flag.addEventListener('change', () => this.debounceUpdate()); + }); + + // Toggle button handlers + this.elements.showParseTreeBtn.addEventListener('click', () => this.toggleParseTree()); + this.elements.showAnalysisBtn.addEventListener('click', () => this.toggleAnalysis()); + } + + debounceUpdate() { + clearTimeout(this.debounceTimer); + this.debounceTimer = setTimeout(() => this.updateResults(), 300); + } + + getFlags() { + return { + case_insensitive: this.elements.flags.caseInsensitive.checked, + multi_line: this.elements.flags.multiLine.checked, + dot_matches_new_line: this.elements.flags.dotMatchesNewline.checked, + ignore_whitespace: this.elements.flags.ignoreWhitespace.checked, + unicode: true + }; + } + + async updateResults() { + if (!this.isInitialized) return; + + const pattern = this.elements.regexInput.value.trim(); + const text = this.elements.textInput.value; + + if (!pattern) { + this.clearResults(); + return; + } + + try { + this.setLoading(true); + const flags = this.getFlags(); + + // Test if pattern is valid by checking if it matches anything + const isValid = await this.testRegexValidity(pattern, flags); + if (!isValid) return; + + // Find matches + const matches = find_matches(pattern, text, flags); + + // Find captures + const captures = find_captures(pattern, text, flags); + + this.displayResults(matches, captures, text); + this.updateParseTreeIfVisible(pattern); + this.updateAnalysisIfVisible(pattern); + + } catch (error) { + this.displayError(error.toString()); + } finally { + this.setLoading(false); + } + } + + async testRegexValidity(pattern, flags) { + try { + // Try to parse the regex first + parse_regex(pattern); + return true; + } catch (error) { + this.displayError(`Pattern error: ${error.toString()}`); + return false; + } + } + + displayResults(matches, captures, originalText) { + // Display match results + if (matches.length === 0) { + this.elements.matchResults.innerHTML = '
No matches found
'; + this.elements.highlightedText.innerHTML = '
No matches to highlight
'; + return; + } + + // Format match results + let resultHtml = '
Found ' + matches.length + ' match(es):

'; + + if (captures.length > 0) { + resultHtml += 'Captures:
'; + captures.forEach((captureSet, setIndex) => { + resultHtml += `Match ${setIndex + 1}:
`; + captureSet.captures.forEach((capture, index) => { + if (capture.text !== null) { + resultHtml += ` Group ${index}: "${this.escapeHtml(capture.text)}" (${capture.start}-${capture.end})
`; + } else { + resultHtml += ` Group ${index}: not captured
`; + } + }); + resultHtml += '
'; + }); + } else { + resultHtml += 'Simple matches:
'; + matches.forEach((match, index) => { + resultHtml += `${index + 1}: "${this.escapeHtml(match.text)}" (${match.start}-${match.end})
`; + }); + } + + this.elements.matchResults.innerHTML = resultHtml; + + // Highlight matches in text + this.highlightMatches(originalText, matches); + } + + highlightMatches(text, matches) { + if (matches.length === 0) { + this.elements.highlightedText.textContent = text; + return; + } + + // Sort matches by start position (descending) to avoid position shifts during insertion + const sortedMatches = [...matches].sort((a, b) => b.start - a.start); + + let highlightedText = text; + + sortedMatches.forEach((match, index) => { + const before = highlightedText.substring(0, match.start); + const matchText = highlightedText.substring(match.start, match.end); + const after = highlightedText.substring(match.end); + + highlightedText = before + + `${this.escapeHtml(matchText)}` + + after; + }); + + this.elements.highlightedText.innerHTML = highlightedText; + } + + toggleParseTree() { + const isVisible = !this.elements.parseTreeSection.classList.contains('hidden'); + + if (isVisible) { + this.elements.parseTreeSection.classList.add('hidden'); + this.elements.showParseTreeBtn.classList.remove('active'); + this.elements.showParseTreeBtn.textContent = 'Show Parse Tree'; + } else { + this.elements.parseTreeSection.classList.remove('hidden'); + this.elements.showParseTreeBtn.classList.add('active'); + this.elements.showParseTreeBtn.textContent = 'Hide Parse Tree'; + this.updateParseTree(); + } + } + + toggleAnalysis() { + const isVisible = !this.elements.analysisSection.classList.contains('hidden'); + + if (isVisible) { + this.elements.analysisSection.classList.add('hidden'); + this.elements.showAnalysisBtn.classList.remove('active'); + this.elements.showAnalysisBtn.textContent = 'Show Analysis'; + } else { + this.elements.analysisSection.classList.remove('hidden'); + this.elements.showAnalysisBtn.classList.add('active'); + this.elements.showAnalysisBtn.textContent = 'Hide Analysis'; + this.updateAnalysis(); + } + } + + updateParseTreeIfVisible(pattern) { + if (!this.elements.parseTreeSection.classList.contains('hidden')) { + this.updateParseTree(pattern); + } + } + + updateAnalysisIfVisible(pattern) { + if (!this.elements.analysisSection.classList.contains('hidden')) { + this.updateAnalysis(pattern); + } + } + + updateParseTree(pattern = null) { + if (pattern === null) { + pattern = this.elements.regexInput.value.trim(); + } + + if (!pattern) { + this.elements.parseTreeDisplay.textContent = 'Enter a regex pattern to see its parse tree'; + return; + } + + try { + const parseTree = parse_regex(pattern); + this.elements.parseTreeDisplay.textContent = parseTree; + } catch (error) { + this.elements.parseTreeDisplay.textContent = `Parse error: ${error.toString()}`; + } + } + + updateAnalysis(pattern = null) { + if (pattern === null) { + pattern = this.elements.regexInput.value.trim(); + } + + if (!pattern) { + this.elements.analysisDisplay.textContent = 'Enter a regex pattern to see its analysis'; + return; + } + + try { + const analysis = analyze_regex(pattern); + this.elements.analysisDisplay.textContent = analysis; + } catch (error) { + this.elements.analysisDisplay.textContent = `Analysis error: ${error.toString()}`; + } + } + + displayError(message) { + const errorHtml = `
${this.escapeHtml(message)}
`; + this.elements.matchResults.innerHTML = errorHtml; + this.elements.highlightedText.innerHTML = '
Fix the pattern error to see highlights
'; + } + + clearResults() { + this.elements.matchResults.innerHTML = '
Enter a regex pattern and test text to see results
'; + this.elements.highlightedText.innerHTML = '
Matches will be highlighted here
'; + } + + setLoading(loading) { + if (loading) { + document.body.classList.add('loading'); + } else { + document.body.classList.remove('loading'); + } + } + + escapeHtml(text) { + const div = document.createElement('div'); + div.textContent = text; + return div.innerHTML; + } + + loadExampleData() { + // Load a sample regex and text for demonstration + this.elements.regexInput.value = '(?\\w+)\\s+\\k'; + this.elements.textInput.value = `This is a test test with some some repeated words. +Another line line with more more examples. +Single words here. +And and final test test case.`; + + // Trigger initial update + setTimeout(() => this.updateResults(), 100); + } +} + +// Initialize the playground when the page loads +const playground = new FancyRegexPlayground(); +playground.init().then(() => { + console.log('Fancy Regex Playground initialized successfully!'); +}).catch(error => { + console.error('Failed to initialize playground:', error); + document.body.innerHTML = ` +
+

Failed to load Fancy Regex Playground

+

Error: ${error.message}

+

Please check the browser console for more details.

+
+ `; +}); \ No newline at end of file diff --git a/playground/web/index.html b/playground/web/index.html new file mode 100644 index 0000000..42fefbf --- /dev/null +++ b/playground/web/index.html @@ -0,0 +1,349 @@ + + + + + + Codestin Search App + + + +
+

๐ŸŽฏ Fancy Regex Playground

+

Test and explore regular expressions with advanced features like backreferences and lookaround

+
+ +
+ +
+
Input
+
+
+ + +
+ +
+ +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+
+ +
+ + +
+ +
+ + +
+
+
+ + +
+
Results
+
+
+
+
Match Results
+
+
Enter a regex pattern and test text to see results
+
+
+ +
+
Highlighted Text
+
+
Matches will be highlighted here
+
+
+ + + + +
+
+
+
+ + + + \ No newline at end of file From 443d8bba2c0fb046d81faba2b57ae822842b08d9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 Aug 2025 20:12:07 +0000 Subject: [PATCH 07/52] Fix playground Cargo.toml, update Rust functions, and enhance UI with new features Co-authored-by: keith-hall <11882719+keith-hall@users.noreply.github.com> --- playground/Cargo.toml | 4 +++- playground/src/lib.rs | 26 +++++++++++++++++++++-- playground/web/app.js | 43 ++++++++++++++++++++++++++------------- playground/web/index.html | 15 ++++++++++++++ 4 files changed, 71 insertions(+), 17 deletions(-) diff --git a/playground/Cargo.toml b/playground/Cargo.toml index 3eb1163..0a17d77 100644 --- a/playground/Cargo.toml +++ b/playground/Cargo.toml @@ -3,6 +3,8 @@ name = "fancy-regex-playground" version = "0.1.0" edition = "2021" description = "WASM playground for fancy-regex" +repository = "https://github.com/fancy-regex/fancy-regex" +license = "MIT" [lib] crate-type = ["cdylib"] @@ -11,7 +13,7 @@ crate-type = ["cdylib"] wasm-opt = false [dependencies] -fancy-regex = { path = "..", default-features = false, features = ["std"] } +fancy-regex = { path = ".." } wasm-bindgen = "0.2" serde = { version = "1.0", features = ["derive"] } serde-wasm-bindgen = "0.6" diff --git a/playground/src/lib.rs b/playground/src/lib.rs index 39fb688..23af623 100644 --- a/playground/src/lib.rs +++ b/playground/src/lib.rs @@ -186,7 +186,18 @@ pub fn find_captures(pattern: &str, text: &str, flags: JsValue) -> Result Result { +pub fn parse_regex(pattern: &str, flags: JsValue) -> Result { + let flags: RegexFlags = if flags.is_undefined() { + RegexFlags::default() + } else { + serde_wasm_bindgen::from_value(flags).map_err(|e| { + JsValue::from_str(&format!("Invalid flags: {}", e)) + })? + }; + + // Build the regex with flags to get proper parse tree representation + let regex = build_regex(pattern, &flags)?; + match fancy_regex::Expr::parse_tree(pattern) { Ok(tree) => Ok(format!("{:#?}", tree)), Err(e) => Err(JsValue::from_str(&format!("Parse error: {}", e))), @@ -194,7 +205,18 @@ pub fn parse_regex(pattern: &str) -> Result { } #[wasm_bindgen] -pub fn analyze_regex(pattern: &str) -> Result { +pub fn analyze_regex(pattern: &str, flags: JsValue) -> Result { + let flags: RegexFlags = if flags.is_undefined() { + RegexFlags::default() + } else { + serde_wasm_bindgen::from_value(flags).map_err(|e| { + JsValue::from_str(&format!("Invalid flags: {}", e)) + })? + }; + + // Build the regex with flags to ensure analysis takes flags into account + let _regex = build_regex(pattern, &flags)?; + use fancy_regex::internal::{analyze, optimize}; match fancy_regex::Expr::parse_tree(pattern) { diff --git a/playground/web/app.js b/playground/web/app.js index 03faf6e..c387386 100644 --- a/playground/web/app.js +++ b/playground/web/app.js @@ -31,11 +31,13 @@ class FancyRegexPlayground { analysisDisplay: document.getElementById('analysis-display'), showParseTreeBtn: document.getElementById('show-parse-tree'), showAnalysisBtn: document.getElementById('show-analysis'), + debounceDelayInput: document.getElementById('debounce-delay'), flags: { caseInsensitive: document.getElementById('flag-case-insensitive'), multiLine: document.getElementById('flag-multi-line'), dotMatchesNewline: document.getElementById('flag-dot-matches-newline'), - ignoreWhitespace: document.getElementById('flag-ignore-whitespace') + ignoreWhitespace: document.getElementById('flag-ignore-whitespace'), + unicode: document.getElementById('flag-unicode') } }; @@ -53,6 +55,12 @@ class FancyRegexPlayground { flag.addEventListener('change', () => this.debounceUpdate()); }); + // Debounce delay change handler + this.elements.debounceDelayInput.addEventListener('input', () => { + // No need to debounce the debounce delay change itself + this.updateResults(); + }); + // Toggle button handlers this.elements.showParseTreeBtn.addEventListener('click', () => this.toggleParseTree()); this.elements.showAnalysisBtn.addEventListener('click', () => this.toggleAnalysis()); @@ -60,7 +68,8 @@ class FancyRegexPlayground { debounceUpdate() { clearTimeout(this.debounceTimer); - this.debounceTimer = setTimeout(() => this.updateResults(), 300); + const delay = parseInt(this.elements.debounceDelayInput.value, 10) || 300; + this.debounceTimer = setTimeout(() => this.updateResults(), delay); } getFlags() { @@ -69,7 +78,7 @@ class FancyRegexPlayground { multi_line: this.elements.flags.multiLine.checked, dot_matches_new_line: this.elements.flags.dotMatchesNewline.checked, ignore_whitespace: this.elements.flags.ignoreWhitespace.checked, - unicode: true + unicode: this.elements.flags.unicode.checked }; } @@ -99,8 +108,8 @@ class FancyRegexPlayground { const captures = find_captures(pattern, text, flags); this.displayResults(matches, captures, text); - this.updateParseTreeIfVisible(pattern); - this.updateAnalysisIfVisible(pattern); + this.updateParseTreeIfVisible(pattern, flags); + this.updateAnalysisIfVisible(pattern, flags); } catch (error) { this.displayError(error.toString()); @@ -112,7 +121,7 @@ class FancyRegexPlayground { async testRegexValidity(pattern, flags) { try { // Try to parse the regex first - parse_regex(pattern); + parse_regex(pattern, flags); return true; } catch (error) { this.displayError(`Pattern error: ${error.toString()}`); @@ -211,22 +220,25 @@ class FancyRegexPlayground { } } - updateParseTreeIfVisible(pattern) { + updateParseTreeIfVisible(pattern, flags) { if (!this.elements.parseTreeSection.classList.contains('hidden')) { - this.updateParseTree(pattern); + this.updateParseTree(pattern, flags); } } - updateAnalysisIfVisible(pattern) { + updateAnalysisIfVisible(pattern, flags) { if (!this.elements.analysisSection.classList.contains('hidden')) { - this.updateAnalysis(pattern); + this.updateAnalysis(pattern, flags); } } - updateParseTree(pattern = null) { + updateParseTree(pattern = null, flags = null) { if (pattern === null) { pattern = this.elements.regexInput.value.trim(); } + if (flags === null) { + flags = this.getFlags(); + } if (!pattern) { this.elements.parseTreeDisplay.textContent = 'Enter a regex pattern to see its parse tree'; @@ -234,17 +246,20 @@ class FancyRegexPlayground { } try { - const parseTree = parse_regex(pattern); + const parseTree = parse_regex(pattern, flags); this.elements.parseTreeDisplay.textContent = parseTree; } catch (error) { this.elements.parseTreeDisplay.textContent = `Parse error: ${error.toString()}`; } } - updateAnalysis(pattern = null) { + updateAnalysis(pattern = null, flags = null) { if (pattern === null) { pattern = this.elements.regexInput.value.trim(); } + if (flags === null) { + flags = this.getFlags(); + } if (!pattern) { this.elements.analysisDisplay.textContent = 'Enter a regex pattern to see its analysis'; @@ -252,7 +267,7 @@ class FancyRegexPlayground { } try { - const analysis = analyze_regex(pattern); + const analysis = analyze_regex(pattern, flags); this.elements.analysisDisplay.textContent = analysis; } catch (error) { this.elements.analysisDisplay.textContent = `Analysis error: ${error.toString()}`; diff --git a/playground/web/index.html b/playground/web/index.html index 42fefbf..774743b 100644 --- a/playground/web/index.html +++ b/playground/web/index.html @@ -261,6 +261,12 @@

๐ŸŽฏ Fancy Regex Playground

Test and explore regular expressions with advanced features like backreferences and lookaround

+
@@ -292,9 +298,18 @@

๐ŸŽฏ Fancy Regex Playground

+
+ + +
+
+ + +
+
From 98ea7128d8df1b52940377c909329aacafe65967 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 17 Aug 2025 21:04:12 +0000 Subject: [PATCH 08/52] Refactor WASM playground: extract common code, fix flag handling, move footer info, automate version Co-authored-by: keith-hall <11882719+keith-hall@users.noreply.github.com> --- playground/README.md | 6 +- playground/build.sh | 8 +++ playground/src/lib.rs | 148 ++++++++++++++++---------------------- playground/web/index.html | 13 ++-- 4 files changed, 78 insertions(+), 97 deletions(-) diff --git a/playground/README.md b/playground/README.md index b503ae7..90af927 100644 --- a/playground/README.md +++ b/playground/README.md @@ -178,9 +178,9 @@ The playground consists of: ### Key WASM Functions - `find_matches(pattern, text, flags)`: Find all matches in text -- `find_captures(pattern, text, flags)`: Find matches with capture groups -- `parse_regex(pattern)`: Parse regex and return AST -- `analyze_regex(pattern)`: Analyze regex and return detailed information +- `find_captures(pattern, text, flags)`: Find matches with capture groups +- `parse_regex(pattern, flags)`: Parse regex and return AST with flag consideration +- `analyze_regex(pattern, flags)`: Analyze regex and return detailed information with flag consideration - `is_match(pattern, text, flags)`: Test if pattern matches text ## ๐Ÿ› ๏ธ Development diff --git a/playground/build.sh b/playground/build.sh index 2b0f52f..ade493e 100755 --- a/playground/build.sh +++ b/playground/build.sh @@ -15,6 +15,14 @@ if ! rustup target list --installed | grep -q "wasm32-unknown-unknown"; then rustup target add wasm32-unknown-unknown fi +# Extract version from main Cargo.toml +VERSION=$(grep '^version = ' ../Cargo.toml | head -1 | sed 's/.*"\(.*\)".*/\1/') +echo "๐Ÿ“ Extracted version: $VERSION" + +# Update version in HTML file +echo "๐Ÿ”ง Updating version in index.html..." +sed -i.bak "s/.*<\/span>/v$VERSION<\/span>/" web/index.html + # Build the WASM module echo "๐Ÿš€ Building WASM module..." wasm-pack build --target web --out-dir pkg --release diff --git a/playground/src/lib.rs b/playground/src/lib.rs index 23af623..41210d9 100644 --- a/playground/src/lib.rs +++ b/playground/src/lib.rs @@ -57,54 +57,77 @@ impl Default for RegexFlags { } } -#[wasm_bindgen] -pub fn create_regex(pattern: &str, flags: JsValue) -> Result { - let flags: RegexFlags = if flags.is_undefined() { - RegexFlags::default() +// Helper function to deserialize flags or use default +fn get_flags(flags: JsValue) -> Result { + if flags.is_undefined() { + Ok(RegexFlags::default()) } else { serde_wasm_bindgen::from_value(flags).map_err(|e| { JsValue::from_str(&format!("Invalid flags: {}", e)) - })? - }; + }) + } +} +// Helper function to build regex with flags +fn build_regex(pattern: &str, flags: &RegexFlags) -> Result { let mut builder = RegexBuilder::new(pattern); + builder.case_insensitive(flags.case_insensitive); + builder.multi_line(flags.multi_line); + builder.dot_matches_new_line(flags.dot_matches_new_line); + builder.ignore_whitespace(flags.ignore_whitespace); + builder.unicode_mode(flags.unicode); + + builder.build().map_err(|e| { + JsValue::from_str(&format!("Regex compilation error: {}", e)) + }) +} + +// Helper function to compute regex flags for parse_tree_with_flags +fn compute_regex_flags(flags: &RegexFlags) -> u32 { + const FLAG_CASEI: u32 = 1; + const FLAG_MULTI: u32 = 1 << 1; + const FLAG_DOTNL: u32 = 1 << 2; + const FLAG_IGNORE_SPACE: u32 = 1 << 4; + const FLAG_UNICODE: u32 = 1 << 5; + + let mut result = 0; if flags.case_insensitive { - builder.case_insensitive(true); + result |= FLAG_CASEI; } if flags.multi_line { - builder.multi_line(true); + result |= FLAG_MULTI; } if flags.dot_matches_new_line { - builder.dot_matches_new_line(true); + result |= FLAG_DOTNL; } if flags.ignore_whitespace { - builder.ignore_whitespace(true); + result |= FLAG_IGNORE_SPACE; } - - match builder.build() { - Ok(_regex) => { - // Store the pattern for later use since we can't serialize the regex directly - let regex_info = serde_json::json!({ - "pattern": pattern, - "flags": flags - }); - Ok(JsValue::from_str(®ex_info.to_string())) - } - Err(e) => Err(JsValue::from_str(&format!("Regex compilation error: {}", e))), + if flags.unicode { + result |= FLAG_UNICODE; } + result } #[wasm_bindgen] -pub fn find_matches(pattern: &str, text: &str, flags: JsValue) -> Result { - let flags: RegexFlags = if flags.is_undefined() { - RegexFlags::default() - } else { - serde_wasm_bindgen::from_value(flags).map_err(|e| { - JsValue::from_str(&format!("Invalid flags: {}", e)) - })? - }; +pub fn create_regex(pattern: &str, flags: JsValue) -> Result { + let flags = get_flags(flags)?; + + // Test build the regex to validate pattern and flags + let _regex = build_regex(pattern, &flags)?; + + // Store the pattern for later use since we can't serialize the regex directly + let regex_info = serde_json::json!({ + "pattern": pattern, + "flags": flags + }); + Ok(JsValue::from_str(®ex_info.to_string())) +} +#[wasm_bindgen] +pub fn find_matches(pattern: &str, text: &str, flags: JsValue) -> Result { + let flags = get_flags(flags)?; let regex = build_regex(pattern, &flags)?; let mut matches = Vec::new(); @@ -128,14 +151,7 @@ pub fn find_matches(pattern: &str, text: &str, flags: JsValue) -> Result Result { - let flags: RegexFlags = if flags.is_undefined() { - RegexFlags::default() - } else { - serde_wasm_bindgen::from_value(flags).map_err(|e| { - JsValue::from_str(&format!("Invalid flags: {}", e)) - })? - }; - + let flags = get_flags(flags)?; let regex = build_regex(pattern, &flags)?; let mut all_captures = Vec::new(); @@ -187,18 +203,10 @@ pub fn find_captures(pattern: &str, text: &str, flags: JsValue) -> Result Result { - let flags: RegexFlags = if flags.is_undefined() { - RegexFlags::default() - } else { - serde_wasm_bindgen::from_value(flags).map_err(|e| { - JsValue::from_str(&format!("Invalid flags: {}", e)) - })? - }; - - // Build the regex with flags to get proper parse tree representation - let regex = build_regex(pattern, &flags)?; + let flags = get_flags(flags)?; + let regex_flags = compute_regex_flags(&flags); - match fancy_regex::Expr::parse_tree(pattern) { + match fancy_regex::Expr::parse_tree_with_flags(pattern, regex_flags) { Ok(tree) => Ok(format!("{:#?}", tree)), Err(e) => Err(JsValue::from_str(&format!("Parse error: {}", e))), } @@ -206,20 +214,12 @@ pub fn parse_regex(pattern: &str, flags: JsValue) -> Result { #[wasm_bindgen] pub fn analyze_regex(pattern: &str, flags: JsValue) -> Result { - let flags: RegexFlags = if flags.is_undefined() { - RegexFlags::default() - } else { - serde_wasm_bindgen::from_value(flags).map_err(|e| { - JsValue::from_str(&format!("Invalid flags: {}", e)) - })? - }; - - // Build the regex with flags to ensure analysis takes flags into account - let _regex = build_regex(pattern, &flags)?; + let flags = get_flags(flags)?; + let regex_flags = compute_regex_flags(&flags); use fancy_regex::internal::{analyze, optimize}; - match fancy_regex::Expr::parse_tree(pattern) { + match fancy_regex::Expr::parse_tree_with_flags(pattern, regex_flags) { Ok(mut tree) => { optimize(&mut tree); match analyze(&tree, 1) { @@ -233,14 +233,7 @@ pub fn analyze_regex(pattern: &str, flags: JsValue) -> Result { #[wasm_bindgen] pub fn is_match(pattern: &str, text: &str, flags: JsValue) -> Result { - let flags: RegexFlags = if flags.is_undefined() { - RegexFlags::default() - } else { - serde_wasm_bindgen::from_value(flags).map_err(|e| { - JsValue::from_str(&format!("Invalid flags: {}", e)) - })? - }; - + let flags = get_flags(flags)?; let regex = build_regex(pattern, &flags)?; match regex.is_match(text) { @@ -249,27 +242,6 @@ pub fn is_match(pattern: &str, text: &str, flags: JsValue) -> Result Result { - let mut builder = RegexBuilder::new(pattern); - - if flags.case_insensitive { - builder.case_insensitive(true); - } - if flags.multi_line { - builder.multi_line(true); - } - if flags.dot_matches_new_line { - builder.dot_matches_new_line(true); - } - if flags.ignore_whitespace { - builder.ignore_whitespace(true); - } - - builder.build().map_err(|e| { - JsValue::from_str(&format!("Regex compilation error: {}", e)) - }) -} - // Initialize the module #[wasm_bindgen(start)] pub fn main() { diff --git a/playground/web/index.html b/playground/web/index.html index 774743b..aeb8770 100644 --- a/playground/web/index.html +++ b/playground/web/index.html @@ -261,12 +261,6 @@

๐ŸŽฏ Fancy Regex Playground

Test and explore regular expressions with advanced features like backreferences and lookaround

-
@@ -359,6 +353,13 @@

๐ŸŽฏ Fancy Regex Playground

+ + \ No newline at end of file From 541acc8a00470b8ecec415fa0246aabad9dfd8fc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 18 Aug 2025 11:17:20 +0000 Subject: [PATCH 09/52] Address PR feedback: remove unused create_regex, export flags, fix UI, improve capture names Co-authored-by: keith-hall <11882719+keith-hall@users.noreply.github.com> --- playground/src/lib.rs | 28 +++++++--------------------- playground/web/index.html | 25 +++++++++++++++++++++---- src/flags.rs | 6 ++++++ src/lib.rs | 1 + 4 files changed, 35 insertions(+), 25 deletions(-) diff --git a/playground/src/lib.rs b/playground/src/lib.rs index 41210d9..16e26a3 100644 --- a/playground/src/lib.rs +++ b/playground/src/lib.rs @@ -1,4 +1,5 @@ use fancy_regex::{Regex, RegexBuilder}; +use fancy_regex::internal::{FLAG_CASEI, FLAG_MULTI, FLAG_DOTNL, FLAG_IGNORE_SPACE, FLAG_UNICODE}; use serde::{Deserialize, Serialize}; use wasm_bindgen::prelude::*; @@ -85,12 +86,6 @@ fn build_regex(pattern: &str, flags: &RegexFlags) -> Result { // Helper function to compute regex flags for parse_tree_with_flags fn compute_regex_flags(flags: &RegexFlags) -> u32 { - const FLAG_CASEI: u32 = 1; - const FLAG_MULTI: u32 = 1 << 1; - const FLAG_DOTNL: u32 = 1 << 2; - const FLAG_IGNORE_SPACE: u32 = 1 << 4; - const FLAG_UNICODE: u32 = 1 << 5; - let mut result = 0; if flags.case_insensitive { result |= FLAG_CASEI; @@ -110,20 +105,7 @@ fn compute_regex_flags(flags: &RegexFlags) -> u32 { result } -#[wasm_bindgen] -pub fn create_regex(pattern: &str, flags: JsValue) -> Result { - let flags = get_flags(flags)?; - // Test build the regex to validate pattern and flags - let _regex = build_regex(pattern, &flags)?; - - // Store the pattern for later use since we can't serialize the regex directly - let regex_info = serde_json::json!({ - "pattern": pattern, - "flags": flags - }); - Ok(JsValue::from_str(®ex_info.to_string())) -} #[wasm_bindgen] pub fn find_matches(pattern: &str, text: &str, flags: JsValue) -> Result { @@ -154,6 +136,9 @@ pub fn find_captures(pattern: &str, text: &str, flags: JsValue) -> Result> = regex.capture_names().collect(); + let mut all_captures = Vec::new(); for caps_result in regex.captures_iter(text) { @@ -167,10 +152,11 @@ pub fn find_captures(pattern: &str, text: &str, flags: JsValue) -> Result Result @@ -292,7 +309,7 @@

๐ŸŽฏ Fancy Regex Playground

-
+ @@ -353,9 +370,9 @@

๐ŸŽฏ Fancy Regex Playground

-