@@ -222,6 +222,12 @@ def _apply_manual_mappings(self, manual_mappings: list[dict[str, Any]]) -> None:
222222 )
223223 aliases = mapping .get ("aliases" , [])
224224 preferred_name = mapping .get ("name" ) # Optional display name
225+ # Bug #44: Allow operators to supply github_username manually
226+ # for developers who never commit via the GitHub web UI (so the
227+ # noreply heuristic never fires for them).
228+ mapping_github_username : str | None = mapping .get ("github_username" )
229+ if mapping_github_username :
230+ mapping_github_username = mapping_github_username .strip ().lower () or None
225231
226232 # Bug #29 fix: canonical_email is required but aliases can be empty
227233 # when the mapping only intends to set/rename the primary_name.
@@ -255,6 +261,7 @@ def _apply_manual_mappings(self, manual_mappings: list[dict[str, Any]]) -> None:
255261 canonical_id = canonical_id ,
256262 primary_name = preferred_name or canonical_email .split ("@" )[0 ],
257263 primary_email = canonical_email ,
264+ github_username = mapping_github_username ,
258265 first_seen = datetime .now (timezone .utc ),
259266 last_seen = datetime .now (timezone .utc ),
260267 total_commits = 0 ,
@@ -273,6 +280,12 @@ def _apply_manual_mappings(self, manual_mappings: list[dict[str, Any]]) -> None:
273280 )
274281 canonical_identity .primary_name = preferred_name
275282
283+ # Bug #44: Backfill github_username on an already-existing identity
284+ # if the manual mapping provides one. This is deferred until after
285+ # any merge() so it always applies to the surviving canonical row.
286+ if mapping_github_username and not canonical_identity .github_username :
287+ canonical_identity .github_username = mapping_github_username
288+
276289 # Process each alias
277290 for alias_email in aliases :
278291 alias_email = alias_email .lower ().strip ()
@@ -407,11 +420,14 @@ def resolve_developer(
407420 return canonical_id
408421
409422 # Fix 2: Detect GitHub noreply emails and resolve via username.
410- # Pattern: {numeric_id}+{username}@users.noreply.github.com
423+ # Supports both forms:
424+ # - {numeric_id}+{username}@users.noreply.github.com (with ID prefix)
425+ # - {username}@users.noreply.github.com (bare form)
411426 # Extract the username portion and try to match it against existing aliases.
412- if email .endswith ("@users.noreply.github.com" ) and "+" in email :
427+ if email .endswith ("@users.noreply.github.com" ):
413428 local_part = email .split ("@" )[0 ]
414- github_username = local_part .split ("+" , 1 )[1 ] # part after the numeric ID
429+ # Handle both forms: "id+username" and bare "username"
430+ github_username = local_part .split ("+" , 1 )[1 ] if "+" in local_part else local_part
415431 # Look for an existing alias or identity with this username as email/alias
416432 with self .get_session () as session :
417433 # Search aliases where email equals the plain username (common pattern
@@ -426,6 +442,13 @@ def resolve_developer(
426442 # Register the noreply address under the same identity so future
427443 # lookups hit the cache without another DB round-trip.
428444 self ._add_alias (username_alias .canonical_id , name , email )
445+ # Bug #44: Backfill github_username on existing identity if missing.
446+ # Without this, identities created from corporate emails before a
447+ # noreply commit ever lands never get their github_username set,
448+ # breaking resolve_by_github_username() for PR review/ticketing.
449+ self ._set_github_username_if_missing (
450+ username_alias .canonical_id , github_username
451+ )
429452 self ._cache [cache_key ] = username_alias .canonical_id
430453 logger .debug (
431454 f"Matched GitHub noreply email { email !r} to username "
@@ -441,6 +464,10 @@ def resolve_developer(
441464 )
442465 if username_identity :
443466 self ._add_alias (username_identity .canonical_id , name , email )
467+ # Bug #44: Backfill github_username on existing identity if missing.
468+ self ._set_github_username_if_missing (
469+ username_identity .canonical_id , github_username
470+ )
444471 self ._cache [cache_key ] = username_identity .canonical_id
445472 logger .debug (
446473 f"Matched GitHub noreply email { email !r} to primary identity "
@@ -615,6 +642,51 @@ def _create_identity(self, name: str, email: str, github_username: Optional[str]
615642
616643 return canonical_id
617644
645+ def _set_github_username_if_missing (self , canonical_id : str , github_username : str ) -> None :
646+ """Populate ``github_username`` on an existing identity when missing.
647+
648+ WHY (#44): When we detect a noreply email or a manual mapping for an
649+ identity that already exists in the database, the ``github_username``
650+ column is often NULL because the identity was created earlier from a
651+ corporate email (and ``_create_identity`` only writes the column at
652+ creation time). Without this field populated, ``resolve_by_github_username``
653+ cannot bridge ticketing/PR-review actors to canonical IDs, which makes
654+ the ticketing score always 0.0 for those developers.
655+
656+ The update only runs when the column is NULL or empty so we never
657+ overwrite a previously-resolved username.
658+ """
659+ if not github_username or not self ._database_available :
660+ # Keep in-memory cache consistent for the fallback path too.
661+ if github_username and not self ._database_available :
662+ identity = self ._in_memory_identities .get (canonical_id )
663+ if identity and not identity .get ("github_username" ):
664+ identity ["github_username" ] = github_username .lower ()
665+ return
666+
667+ username_normalized = github_username .lower ().strip ()
668+ if not username_normalized :
669+ return
670+
671+ with self .get_session () as session :
672+ identity = (
673+ session .query (DeveloperIdentity )
674+ .filter (DeveloperIdentity .canonical_id == canonical_id )
675+ .first ()
676+ )
677+ if identity and not identity .github_username :
678+ identity .github_username = username_normalized
679+ logger .debug (
680+ "Backfilled github_username=%r on canonical_id=%s" ,
681+ username_normalized ,
682+ canonical_id ,
683+ )
684+ # Refresh cache entry for the identity so downstream lookups
685+ # see the new value without waiting for a full reload.
686+ cached = self ._cache .get (canonical_id )
687+ if isinstance (cached , dict ):
688+ cached ["github_username" ] = username_normalized
689+
618690 def _add_alias (self , canonical_id : str , name : str , email : str ):
619691 """Add alias for existing developer."""
620692 with self .get_session () as session :
0 commit comments