Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 67cc20a

Browse files
Merge pull request #184 from theuselessai/fix/drop-gherlint
refactor: drop gherlint, use pure Python AST lint checks
2 parents 2a3b551 + 8dcd6f7 commit 67cc20a

3 files changed

Lines changed: 282 additions & 276 deletions

File tree

Lines changed: 126 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1-
"""Validate Gherkin tool component — syntax parsing and lint checks."""
1+
"""Validate Gherkin tool component — syntax parsing and structural lint checks.
2+
3+
Tier 1: Syntax validation via gherkin-official parser.
4+
Tier 2: Structural lint checks via pure Python against the parsed AST.
5+
No external lint tools or subprocess calls required.
6+
"""
27

38
from __future__ import annotations
49

5-
import base64
610
import json
711
import logging
8-
import shlex
9-
import uuid
1012

1113
from langchain_core.tools import tool
1214

@@ -19,27 +21,6 @@
1921
def validate_gherkin_factory(node):
2022
"""Return a LangChain tool that validates Gherkin .feature specs."""
2123

22-
# Resolve parent workspace and build sandbox backend (same pattern as run_command)
23-
from components.run_command import _resolve_parent_workspace
24-
from components._agent_shared import _build_backend
25-
26-
parent_extra = _resolve_parent_workspace(node)
27-
backend = None
28-
if parent_extra.get("workspace_id"):
29-
try:
30-
backend = _build_backend(parent_extra)
31-
logger.info(
32-
"validate_gherkin %s: using sandbox backend (workspace_id=%s)",
33-
node.node_id,
34-
parent_extra["workspace_id"],
35-
)
36-
except Exception:
37-
logger.warning(
38-
"validate_gherkin %s: failed to build sandbox backend, lint checks will be skipped",
39-
node.node_id,
40-
exc_info=True,
41-
)
42-
4324
@tool
4425
def validate_gherkin(gherkin_spec: str) -> str:
4526
"""Validate a Gherkin feature spec for syntax errors and lint warnings.
@@ -71,15 +52,13 @@ def validate_gherkin(gherkin_spec: str) -> str:
7152
from gherkin.parser import Parser
7253

7354
parser = Parser()
74-
parser.parse(gherkin_spec)
55+
doc = parser.parse(gherkin_spec)
7556
except Exception as e:
7657
result["valid"] = False
7758
error_info = {"message": str(e), "line": 0}
78-
# Try to extract line number from the error message
7959
err_str = str(e)
8060
if "(" in err_str and ":" in err_str:
8161
try:
82-
# gherkin-official errors often contain "(line:col)" patterns
8362
parts = err_str.split("(")
8463
for part in parts:
8564
if ":" in part and ")" in part:
@@ -92,106 +71,134 @@ def validate_gherkin(gherkin_spec: str) -> str:
9271
result["parse_errors"].append(error_info)
9372
return json.dumps(result)
9473

95-
# ── Tier 2: Lint via gherlint CLI (sandboxed) ────────────────────
96-
if backend is not None:
97-
try:
98-
# Encode content as base64 so arbitrary Gherkin can be safely
99-
# embedded in a shell command without quoting issues.
100-
encoded = base64.b64encode(gherkin_spec.encode()).decode()
101-
temp_filename = f"/tmp/_validate_gherkin_{uuid.uuid4().hex}.feature"
102-
cmd = (
103-
f"echo {shlex.quote(encoded)} | base64 -d > {shlex.quote(temp_filename)}"
104-
f" && gherlint lint {shlex.quote(temp_filename)}"
105-
f"; STATUS=$?; rm -f {shlex.quote(temp_filename)}; exit $STATUS"
106-
)
107-
resp = backend.execute(cmd, timeout=30)
108-
_parse_gherlint_output(resp.output or "", "", resp.exit_code or 0, result)
109-
except Exception:
110-
logger.warning("gherlint lint failed", exc_info=True)
111-
else:
112-
logger.debug("validate_gherkin: no sandbox backend, skipping lint checks")
74+
# ── Tier 2: Structural lint checks against parsed AST ────────────
75+
_lint_ast(doc, result)
11376

11477
return json.dumps(result)
11578

11679
return validate_gherkin
11780

11881

119-
def _parse_gherlint_output(
120-
stdout: str, stderr: str, returncode: int, result: dict
121-
) -> None:
122-
"""Parse gherlint CLI output and populate result dict."""
123-
output = (stdout + "\n" + stderr).strip()
124-
if not output:
82+
def _lint_ast(doc: dict, result: dict) -> None:
83+
"""Run structural lint checks against a parsed Gherkin AST."""
84+
feature = doc.get("feature")
85+
86+
if not feature:
87+
result["lint_errors"].append({
88+
"code": "E001",
89+
"message": "No Feature block found",
90+
"line": 0,
91+
})
92+
result["valid"] = False
12593
return
12694

127-
for line in output.splitlines():
128-
line = line.strip()
129-
if not line:
130-
continue
95+
# Check feature has a name
96+
if not feature.get("name", "").strip():
97+
result["lint_warnings"].append({
98+
"code": "W001",
99+
"message": "Feature has no name",
100+
"line": feature.get("location", {}).get("line", 0),
101+
})
102+
103+
children = feature.get("children", [])
104+
scenarios = [c for c in children if "scenario" in c]
105+
backgrounds = [c for c in children if "background" in c]
106+
107+
# Check feature has scenarios
108+
if not scenarios:
109+
result["lint_warnings"].append({
110+
"code": "W002",
111+
"message": "Feature has no scenarios",
112+
"line": feature.get("location", {}).get("line", 0),
113+
})
114+
return
115+
116+
# Check for duplicate scenario names
117+
seen_names: dict[str, int] = {}
118+
for child in scenarios:
119+
sc = child["scenario"]
120+
name = sc.get("name", "").strip()
121+
line = sc.get("location", {}).get("line", 0)
122+
if name:
123+
if name in seen_names:
124+
result["lint_warnings"].append({
125+
"code": "W003",
126+
"message": f"Duplicate scenario name: '{name}' (first at line {seen_names[name]})",
127+
"line": line,
128+
})
129+
else:
130+
seen_names[name] = line
131+
132+
# Check each scenario
133+
for child in scenarios:
134+
sc = child["scenario"]
135+
name = sc.get("name", "").strip()
136+
line = sc.get("location", {}).get("line", 0)
137+
steps = sc.get("steps", [])
138+
139+
# Unnamed scenario
140+
if not name:
141+
result["lint_warnings"].append({
142+
"code": "W004",
143+
"message": "Scenario has no name",
144+
"line": line,
145+
})
131146

132-
# gherlint output format is typically:
133-
# filename:line:col: CODE message
134-
# or just warning/error messages
135-
entry = _parse_lint_line(line)
136-
if entry is None:
147+
# Empty scenario
148+
if not steps:
149+
result["lint_warnings"].append({
150+
"code": "W005",
151+
"message": f"Scenario '{name or '(unnamed)'}' has no steps",
152+
"line": line,
153+
})
137154
continue
138155

139-
code = entry.get("code", "")
140-
# Convention: Cxxx = convention, Wxxx = warning, Exxx = error
141-
if code.startswith("E"):
142-
result["lint_errors"].append(entry)
143-
result["valid"] = False
144-
else:
145-
# W, C, and other codes are treated as warnings
146-
result["lint_warnings"].append(entry)
147-
148-
149-
def _parse_lint_line(line: str) -> dict | None:
150-
"""Parse a single gherlint output line into a structured dict.
151-
152-
Expected formats:
153-
filename.feature:10:1: C0101 Step should start with a capital letter
154-
filename.feature:5: W0301 Scenario has no Given step
155-
C0101: Step should start with a capital letter (line 10)
156-
"""
157-
# Format: path:line:col: CODE message
158-
parts = line.split(":", maxsplit=3)
159-
if len(parts) >= 4:
160-
try:
161-
line_no = int(parts[1].strip())
162-
remainder = parts[3].strip()
163-
code, _, message = remainder.partition(" ")
164-
if code and code[0].isalpha() and any(c.isdigit() for c in code):
165-
return {"code": code, "message": message.strip(), "line": line_no}
166-
except (ValueError, IndexError):
167-
pass
168-
169-
# Format: path:line: CODE message (no column)
170-
if len(parts) >= 3:
171-
try:
172-
line_no = int(parts[1].strip())
173-
remainder = parts[2].strip()
174-
code, _, message = remainder.partition(" ")
175-
if code and code[0].isalpha() and any(c.isdigit() for c in code):
176-
return {"code": code, "message": message.strip(), "line": line_no}
177-
except (ValueError, IndexError):
178-
pass
179-
180-
# Format: CODE: message (line N) or CODE message
181-
if line and line[0].isalpha():
182-
code_part = line.split()[0].rstrip(":")
183-
if any(c.isdigit() for c in code_part):
184-
message = line[len(code_part):].strip().lstrip(": ")
185-
line_no = 0
186-
# Try to extract (line N) from message
187-
if "(line" in message:
188-
try:
189-
idx = message.index("(line")
190-
num = message[idx + 5:].split(")")[0].strip()
191-
line_no = int(num)
192-
message = message[:idx].strip()
193-
except (ValueError, IndexError):
194-
pass
195-
return {"code": code_part, "message": message, "line": line_no}
156+
# Extract keyword types (Given, When, Then, And, But, *)
157+
keywords = [s.get("keyword", "").strip() for s in steps]
158+
159+
# Check for missing Given
160+
if "Given" not in keywords:
161+
result["lint_warnings"].append({
162+
"code": "C001",
163+
"message": f"Scenario '{name or '(unnamed)'}' has no Given step",
164+
"line": line,
165+
})
166+
167+
# Check for missing When
168+
if "When" not in keywords:
169+
result["lint_warnings"].append({
170+
"code": "C002",
171+
"message": f"Scenario '{name or '(unnamed)'}' has no When step",
172+
"line": line,
173+
})
174+
175+
# Check for missing Then
176+
if "Then" not in keywords:
177+
result["lint_warnings"].append({
178+
"code": "C003",
179+
"message": f"Scenario '{name or '(unnamed)'}' has no Then step",
180+
"line": line,
181+
})
182+
183+
# Check backgrounds
184+
for child in backgrounds:
185+
bg = child["background"]
186+
bg_steps = bg.get("steps", [])
187+
bg_line = bg.get("location", {}).get("line", 0)
188+
189+
if not bg_steps:
190+
result["lint_warnings"].append({
191+
"code": "W006",
192+
"message": "Background has no steps",
193+
"line": bg_line,
194+
})
196195

197-
return None
196+
# Background should only contain Given steps
197+
for step in bg_steps:
198+
kw = step.get("keyword", "").strip()
199+
if kw not in ("Given", "And", "But", "*"):
200+
result["lint_warnings"].append({
201+
"code": "C004",
202+
"message": f"Background contains non-Given step: '{kw}'",
203+
"line": step.get("location", {}).get("line", 0),
204+
})

platform/requirements.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,6 @@ requests>=2.31
2424
jinja2>=3.1
2525
pyotp>=2.9
2626
gherkin-official>=29.0.0
27-
# gherlint is used via subprocess (CLI), NOT as a Python import.
28-
# It requires pydantic v1 which conflicts with our stack.
29-
# Install separately: pipx install gherlint
30-
# The validate_gherkin tool gracefully handles gherlint being absent.
3127

3228
# Testing
3329
pytest>=8.0

0 commit comments

Comments
 (0)