Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c06e5e3

Browse files
authored
Merge pull request #50 from maui314159/fix/analyzer-ticket-detection-config
fix: forward ticket_detection_config from GitAnalyzer to TicketExtractor
2 parents f624ddb + 3925b0e commit c06e5e3

6 files changed

Lines changed: 72 additions & 1 deletion

File tree

src/gitflow_analytics/cli_analysis_orchestrator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,7 @@ def analyze(
444444
llm_config=llm_config,
445445
branch_analysis_config=branch_analysis_config,
446446
exclude_merge_commits=cfg.analysis.exclude_merge_commits,
447+
ticket_detection_config=getattr(cfg.analysis, "ticket_detection", None),
447448
)
448449

449450
# ------------------------------------------------------------------

src/gitflow_analytics/cli_identity_commands.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ def identities(config: Path, weeks: int, apply: bool) -> None:
134134
llm_config=llm_config,
135135
branch_analysis_config=branch_analysis_config,
136136
exclude_merge_commits=cfg.analysis.exclude_merge_commits,
137+
ticket_detection_config=getattr(cfg.analysis, "ticket_detection", None),
137138
)
138139

139140
click.echo("Analyzing repositories for developer identities...")
@@ -355,6 +356,7 @@ def aliases_command(
355356
llm_config=llm_config,
356357
branch_analysis_config=branch_analysis_config,
357358
exclude_merge_commits=cfg.analysis.exclude_merge_commits,
359+
ticket_detection_config=getattr(cfg.analysis, "ticket_detection", None),
358360
)
359361

360362
all_commits = []

src/gitflow_analytics/core/analyzer.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import logging
44
from datetime import datetime
55
from pathlib import Path
6-
from typing import Any, Optional
6+
from typing import TYPE_CHECKING, Any, Optional
77

88
from git import Repo
99

@@ -16,6 +16,9 @@
1616
from .cache import GitAnalysisCache
1717
from .progress import get_progress_service
1818

19+
if TYPE_CHECKING:
20+
from ..config.schema import TicketDetectionConfig
21+
1922
# Get logger for this module
2023
logger = logging.getLogger(__name__)
2124

@@ -40,6 +43,7 @@ def __init__(
4043
classification_config: Optional[dict[str, Any]] = None,
4144
branch_analysis_config: Optional[dict[str, Any]] = None,
4245
exclude_merge_commits: bool = False,
46+
ticket_detection_config: Optional["TicketDetectionConfig"] = None,
4347
):
4448
"""Initialize analyzer with cache and optional ML categorization and commit classification.
4549
@@ -55,6 +59,12 @@ def __init__(
5559
classification_config: Configuration for commit classification
5660
branch_analysis_config: Configuration for branch analysis optimization
5761
exclude_merge_commits: Exclude merge commits from filtered line count calculations
62+
ticket_detection_config: Optional TicketDetectionConfig forwarded to the
63+
ticket extractor so user-supplied ``analysis.ticket_detection.patterns``
64+
/ ``exclude_patterns`` / ``position`` settings are honored at analyze
65+
time. Without this, the analyzer's in-memory ticket re-extraction
66+
(the "Analyzing commits for tickets" pass) silently falls back to
67+
hard-coded defaults, producing different results from GitDataFetcher.
5868
"""
5969
self.cache = cache
6070
self.batch_size = batch_size
@@ -65,6 +75,7 @@ def __init__(
6575
ml_config=ml_categorization_config,
6676
llm_config=llm_config,
6777
cache_dir=cache.cache_dir / "ml_predictions",
78+
ticket_detection_config=ticket_detection_config,
6879
)
6980
self.branch_mapper = build_branch_mapper(branch_mapping_rules)
7081
self.exclude_paths = exclude_paths or []

src/gitflow_analytics/pipeline_report.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ def _emit(msg: str) -> None:
240240
llm_config=llm_cfg,
241241
branch_analysis_config=branch_analysis_config,
242242
exclude_merge_commits=cfg.analysis.exclude_merge_commits,
243+
ticket_detection_config=getattr(cfg.analysis, "ticket_detection", None),
243244
)
244245

245246
_emit("Loading cached PRs from database...")

src/gitflow_analytics/training/pipeline.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ def _extract_labeled_commits(
219219
self.config.analysis, "allowed_ticket_platforms", None
220220
),
221221
story_point_patterns=getattr(self.config.analysis, "story_point_patterns", None),
222+
ticket_detection_config=getattr(self.config.analysis, "ticket_detection", None),
222223
)
223224

224225
for repo_config in repositories:

tests/core/test_analyzer.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,61 @@ def test_init(self, temp_dir):
2828
assert analyzer.batch_size == 1000
2929
assert analyzer.exclude_paths == []
3030

31+
def test_ticket_detection_config_is_forwarded_to_extractor(self, temp_dir):
32+
"""Custom ticket_detection patterns must reach the analyzer's TicketExtractor.
33+
34+
Regression test: GitAnalyzer's in-memory ticket re-extraction (the
35+
"Analyzing commits for tickets" pass) used to silently fall back to the
36+
hard-coded default patterns because ticket_detection_config was never
37+
forwarded to build_ticket_extractor(). That caused user-supplied
38+
``analysis.ticket_detection.patterns`` (e.g. an Azure DevOps ``AB#NNNN``
39+
regex) to be ignored during analyze, while GitDataFetcher honored them
40+
at fetch time -- producing inconsistent ticket-coverage numbers between
41+
a fresh fetch and a cached re-run.
42+
"""
43+
from gitflow_analytics.config.schema import TicketDetectionConfig
44+
45+
cache = GitAnalysisCache(temp_dir / ".gitflow-cache")
46+
47+
# Custom pattern that overrides the github default (#(\d+)) and matches
48+
# only a distinctive token that the default regex cannot match.
49+
custom_pattern = r"\bABCD-(\d+)\b"
50+
td_cfg = TicketDetectionConfig(
51+
patterns={"github": custom_pattern},
52+
exclude_patterns=[],
53+
)
54+
55+
analyzer = GitAnalyzer(cache, ticket_detection_config=td_cfg)
56+
57+
# The github pattern in the analyzer's extractor must be the custom one.
58+
github_patterns = [
59+
p.pattern for p in analyzer.ticket_extractor.compiled_patterns.get("github", [])
60+
]
61+
assert github_patterns == [
62+
custom_pattern
63+
], f"Expected custom github pattern to be forwarded, got: {github_patterns}"
64+
65+
# Behavior: the custom pattern matches its token, the old default does not.
66+
msg = "Implements ABCD-42 across the codebase"
67+
ticket_ids = [t["id"] for t in analyzer.ticket_extractor.extract_from_text(msg)]
68+
assert "42" in ticket_ids
69+
70+
# And the default github "#(\\d+)" no longer applies, so a bare "#99"
71+
# in the message is NOT picked up under the custom github pattern.
72+
msg2 = "See #99 for context"
73+
ticket_ids2 = [t["id"] for t in analyzer.ticket_extractor.extract_from_text(msg2)]
74+
assert "99" not in ticket_ids2
75+
76+
def test_ticket_detection_config_default_is_backward_compatible(self, temp_dir):
77+
"""Without ticket_detection_config the analyzer behaves as before."""
78+
cache = GitAnalysisCache(temp_dir / ".gitflow-cache")
79+
analyzer = GitAnalyzer(cache)
80+
81+
# The default github pattern (#(\\d+)) should still match.
82+
msg = "Closes #1234 final cleanup"
83+
ticket_ids = [t["id"] for t in analyzer.ticket_extractor.extract_from_text(msg)]
84+
assert "1234" in ticket_ids
85+
3186
@patch("gitflow_analytics.core.analyzer.Repo")
3287
def test_analyze_repository_basic(self, mock_repo_class, temp_dir):
3388
"""Test basic repository analysis functionality."""

0 commit comments

Comments
 (0)