diff --git a/README.md b/README.md index 7551ddf4..27e34571 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,14 @@ Docs Update ---- +### 2025-09-27 +if database is in use, run the following SQL to update: + +```sql + +alter table lilac.pkglog add column builder text not null default 'local'; +``` + ### 2024-06-28 if database is in use, run the following SQL to update: diff --git a/config.toml.sample b/config.toml.sample index c0c28de8..e331f1c7 100644 --- a/config.toml.sample +++ b/config.toml.sample @@ -40,6 +40,29 @@ logurl = "https://example.com/${pkgbase}/${datetime}.html" # the schema to use; by default lilac uses the schema "lilac" # schema = "lilac" max_concurrency = 1 +# whether to disable local worker (and use remote only) +# disable_local_worker = false + +# build packages over ssh +[[remoteworker]] +# this is also used to name the git remote +name = "remotebuilder" +# ssh host string; ControlMaster is advised to be enabled in ~/.ssh/config +host = "builder.example.org" +# the same as local "repodir" but in remote host +repodir = "/path/to/gitrepo" +max_concurrency = 2 +enabled = true + +# run some commands before each run +# prerun = [ +# "sudo update_something", +# ] + +# run some commands after each run +# postrun = [ +# "do_something", +# ] [nvchecker] # set proxy for nvchecker diff --git a/docs/setup.rst b/docs/setup.rst index 19fcf194..a50fda7a 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -45,6 +45,24 @@ Make sure in ``/etc/makepkg.conf`` or similar files there aren't any changes to The ``PKGBUILD`` files needs to be in a git repo. A subdirectory inside it is recommended. +Configure the desired git committer name and email in ``~/.gitconfig`` or using a ``git config`` command. + +To avoid an issue with recent git version, configure the follow git option too: + +.. code-block:: sh + + git config --global maintenance.autoDetach false + +Or you may get the following errors from time to time:: + + fatal: update_ref failed for ref 'HEAD': cannot lock ref 'HEAD': Unable to create '...repo/.git/HEAD.lock': File exists. + + Another git process seems to be running in this repository, e.g. + an editor opened by 'git commit'. Please make sure all processes + are terminated then try again. If it still fails, a git process + may have crashed in this repository earlier: + remove the file manually to continue. + Setup a passphrase-less GPG key for the build user to sign packages: .. code-block:: sh diff --git a/lilac b/lilac index 85bc27bc..9ffb1b2a 100755 --- a/lilac +++ b/lilac @@ -8,7 +8,7 @@ import traceback import logging import time from collections import defaultdict -from typing import List, Any, DefaultDict, Tuple, Optional +from typing import List, Any, DefaultDict, Tuple, Optional, cast from collections.abc import Set from pathlib import Path import graphlib @@ -42,14 +42,14 @@ from lilac2.cmd import ( ) from lilac2 import tools from lilac2.repo import Repo -from lilac2.const import mydir, _G, PACMAN_DB_DIR +from lilac2.const import mydir, _G from lilac2.nvchecker import packages_need_update, nvtake, NvResults from lilac2.nomypy import BuildResult, BuildReason # type: ignore -from lilac2 import pkgbuild from lilac2.building import build_package, MissingDependencies from lilac2 import slogconf from lilac2 import intl -from lilac2.typing import PkgToBuild +from lilac2.workerman import WorkerManager +from lilac2.typing import PkgToBuild, Rusages try: from lilac2 import db except ImportError: @@ -57,6 +57,7 @@ except ImportError: USE = False config = tools.read_config() +TLS = threading.local() # Setting up environment variables os.environ.update(config.get('envvars', ())) @@ -74,6 +75,7 @@ logger = logging.getLogger(__name__) build_logger_old = logging.getLogger('build') build_logger = structlog.get_logger(logger_name='build') REPO = _G.repo = Repo(config) +_G.reponame = REPO.name EMPTY_COMMIT = '4b825dc642cb6eb9a060e54bf8d69288fbee4904' @@ -246,25 +248,26 @@ def start_build( logdir: Path, failed: dict[str, tuple[str, ...]], built: set[str], - max_concurrency: int, + workermans: list[WorkerManager], ) -> None: # built is used to collect built package names sorter, depmap = packages_with_depends(repo) + max_workers = sum(wm.max_concurrency for wm in workermans) try: buildsorter = BuildSorter(sorter, depmap) - futures: dict[Future, str] = {} + futures: dict[Future, PkgToBuild] = {} with ThreadPoolExecutor( - max_workers = max_concurrency, + max_workers = max_workers, initializer = setup_thread, ) as executor: while True: pkgs = try_pick_some( repo, buildsorter, failed, - running = frozenset(futures.values()), - limit = max_concurrency - len(futures), + running = frozenset(x.pkgbase for x in futures.values()), starving = not bool(futures), + workermans = workermans, ) for pkg in pkgs: if pkg.pkgbase not in nvdata: @@ -272,10 +275,12 @@ def start_build( # a package is pulled in by OnBuild logger.warning('%s not in nvdata, skipping', pkg.pkgbase) buildsorter.done(pkg.pkgbase) + wm = cast(WorkerManager, pkg.workerman) + wm.current_task_count -= 1 continue fu = executor.submit( build_it, pkg, repo, buildsorter, built, failed) - futures[fu] = pkg.pkgbase + futures[fu] = pkg if not pkgs and not futures: # no more packages and no task is running: we're done @@ -283,7 +288,9 @@ def start_build( done, pending = futures_wait(futures, return_when=FIRST_COMPLETED) for fu in done: - del futures[fu] + pkg = futures.pop(fu) + wm = cast(WorkerManager, pkg.workerman) + wm.current_task_count -= 1 fu.result() # at least one task is done, try pick new tasks @@ -296,8 +303,8 @@ def try_pick_some( buildsorter: BuildSorter, failed: dict[str, tuple[str, ...]], running: Set[str], - limit: int, starving: bool, + workermans: list[WorkerManager], ) -> list[PkgToBuild]: if not buildsorter.is_active(): return [] @@ -306,9 +313,6 @@ def try_pick_some( if not ready: return [] - cpu_ratio = tools.get_running_task_cpu_ratio() - memory_avail = tools.get_avail_memory() - ready_to_build = [pkg for pkg in ready if pkg not in running] if not ready_to_build: return [] @@ -316,64 +320,46 @@ def try_pick_some( if db.USE: rusages = db.get_pkgs_last_rusage(ready_to_build) else: - rusages = {} - - def sort_key(pkg): - p = buildsorter.priority_func(pkg) - cpu = (r := rusages.get(pkg)) and (r.cputime / r.elapsed) or 1.0 - return (p, cpu) - ready_to_build.sort(key=sort_key) - logger.debug('sorted ready_to_build: %r', ready_to_build) - if cpu_ratio < 1.0: - # low cpu usage, build a big package - p = buildsorter.priority_func(ready_to_build[0]) - for idx, pkg in enumerate(ready_to_build): - if buildsorter.priority_func(pkg) != p: - if idx > 2: - ready_to_build.insert(0, ready_to_build.pop(idx-1)) - break - else: - logger.info('high cpu usage (%.2f), preferring low-cpu-usage builds', cpu_ratio) + rusages = Rusages({}) ret: list[PkgToBuild] = [] - limited_by_memory = False - for pkg in ready_to_build: - if (r := rusages.get(pkg)) and r.memory > memory_avail: - logger.debug('package %s used %d memory last time, but now only %d is available', pkg, r.memory, memory_avail) - limited_by_memory = True - continue - - to_build = check_buildability(pkg, repo, buildsorter, failed) - if to_build is None: - continue - - ret.append(to_build) - if len(ret) == limit: + for wm in workermans: + if not ready_to_build: + break + to_builds = wm.try_accept_package( + ready_to_build, + rusages, + buildsorter.priority_func, + lambda pkg: check_buildability(pkg,repo, buildsorter, failed), + ) + ret.extend(to_builds) + # remove picked packages from ready_to_build + picked = {x.pkgbase for x in to_builds} + ready_to_build = [x for x in ready_to_build + if x not in picked] + + if not ret and starving: + wm = workermans[0] + def sort_key(pkg): + p = buildsorter.priority_func(pkg) + r = rusages.for_package(pkg, [wm.name]) + if r is not None: + m = r.memory + else: + m = 10 * 1024**3 + return (p, m) + ready_to_build.sort(key=sort_key) + logger.debug('sorted ready_to_build: %r', ready_to_build) + memory_avail = wm.get_resource_usage()[1] + logger.info('insufficient memory, starting only one build on %s (available: %d)', wm.name, memory_avail) + for pkg in ready_to_build: + to_build = check_buildability(pkg, repo, buildsorter, failed) + if to_build is None: + continue + to_build.workerman = wm + ret.append(to_build) break - - if r := rusages.get(pkg): - memory_avail -= r.memory - else: - memory_avail -= 10 * 1024 ** 3 - - if not ret and limited_by_memory: - if starving: - def sort_key(pkg): - p = buildsorter.priority_func(pkg) - r = (r := rusages.get(pkg)) and r.memory or 10 * 1024**3 - return (p, r) - ready_to_build.sort(key=sort_key) - logger.debug('sorted ready_to_build: %r', ready_to_build) - logger.info('insufficient memory, starting only one build (available: %d)', memory_avail) - for pkg in ready_to_build: - to_build = check_buildability(pkg, repo, buildsorter, failed) - if to_build is None: - continue - ret.append(to_build) - break - else: - logger.info('insufficient memory, not starting another concurrent build (available: %d)', memory_avail) return ret @@ -383,6 +369,7 @@ def check_buildability( buildsorter: BuildSorter, failed: dict[str, tuple[str, ...]], ) -> Optional[PkgToBuild]: + '''NOTE: caller needs to set workerman on returned value''' to_build = PkgToBuild(pkg) if pkg in failed: @@ -456,8 +443,8 @@ def check_buildability( (new, new) for _, new in db.get_update_on_build_vers(update_on_build) ] to_build = PkgToBuild(pkg, vers) - else: - logger.warning('%s not in lilacinfos.', pkg) + else: + logger.warning('%s not in lilacinfos.', pkg) return to_build @@ -468,6 +455,8 @@ def build_it( pkg = to_build.pkgbase logger.info('building %s', pkg) logfile = logdir / f'{pkg}.log' + wm = cast(WorkerManager, to_build.workerman) + worker_no = TLS.worker_no - wm.workers_before_me if db.USE: with db.get_session() as s: @@ -496,6 +485,7 @@ def build_it( myname = MYNAME, destdir = DESTDIR, logfile = logfile, + worker_no = worker_no, ) elapsed = r.elapsed @@ -594,10 +584,10 @@ def build_it( s.execute( '''insert into pkglog (pkgbase, nv_version, pkg_version, elapsed, result, cputime, memory, - msg, build_reasons, maintainers) values - (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''', + msg, build_reasons, maintainers, builder) values + (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''', (pkg, newver, version, elapsed, r.__class__.__name__, cputime, memory, - msg, reason_s, maintainers)) + msg, reason_s, maintainers, wm.name)) db.mark_pkg_as(s, pkg, 'done') db.build_updated(s) @@ -613,7 +603,6 @@ WORKER_NO_LOCK = threading.Lock() def setup_thread() -> None: global WORKER_NO - from lilac2.building import TLS with WORKER_NO_LOCK: TLS.worker_no = WORKER_NO WORKER_NO += 1 @@ -628,6 +617,30 @@ def build_nvchecker_reason( changes.append((ver_change.oldver, ver_change.newver)) return BuildReason.NvChecker(items, changes) +def get_workermans(): + from lilac2 import workerman + + ret = [] + workers_before = 0 + + if not config['lilac'].get('disable_local_worker', False): + max_concurrency = config['lilac'].get('max_concurrency', 1) + local = workerman.LocalWorkerManager(max_concurrency) + ret.append(local) + workers_before = max_concurrency + + remotes = [ + workerman.RemoteWorkerManager(remote) + for remote in config.get('remoteworker', []) + if remote.get('enabled', False) + ] + ret.extend(remotes) + + for remote in remotes: + remote.workers_before_me = workers_before + workers_before += remote.max_concurrency + return ret + def main_may_raise( D: dict[str, Any], pkgs_from_args: List[str], logdir: Path, ) -> None: @@ -640,7 +653,9 @@ def main_may_raise( logger.warning('/etc/resolv.conf is a symlink; this might not work!') pacman_conf = config['misc'].get('pacman_conf') - pkgbuild.update_data(PACMAN_DB_DIR, pacman_conf) + workermans = get_workermans() + for wm in workermans: + wm.prepare_batch(pacman_conf) if dburl := config['lilac'].get('dburl'): schema = config['lilac'].get('schema') @@ -772,9 +787,12 @@ def main_may_raise( s.execute('insert into batch (event, logdir) values (%s, %s)', ('start', logdir_name)) db.build_updated(s) - start_build(REPO, logdir, failed, update_succeeded, - config['lilac'].get('max_concurrency', 1)) + start_build(REPO, logdir, failed, update_succeeded, workermans) finally: + # fetch remote commits + for wm in workermans: + wm.finish_batch() + D['last_commit'] = git_last_commit() # handle what has been processed even on exception for k, v in failed.items(): @@ -848,7 +866,7 @@ def setup() -> Path: enable_pretty_logging('DEBUG') if 'MAKEFLAGS' not in os.environ: - cores = os.cpu_count() + cores = os.process_cpu_count() if cores is not None: os.environ['MAKEFLAGS'] = '-j{0}'.format(cores) diff --git a/lilac2/api.py b/lilac2/api.py index 83c67374..e6b81daf 100644 --- a/lilac2/api.py +++ b/lilac2/api.py @@ -39,6 +39,7 @@ UserAgent = 'lilac/0.2b (package auto-build bot, by lilydjwg)' logging.getLogger('httpcore').setLevel(logging.ERROR) +logging.getLogger('hpack').setLevel(logging.ERROR) s = httpx.Client(http2=True) s.headers['User-Agent'] = UserAgent diff --git a/lilac2/building.py b/lilac2/building.py index 02c07b5d..836087c6 100644 --- a/lilac2/building.py +++ b/lilac2/building.py @@ -1,17 +1,15 @@ from __future__ import annotations import os -import sys import logging import subprocess from typing import ( - Optional, Iterable, List, Set, TYPE_CHECKING, + Optional, Iterable, Set, TYPE_CHECKING, ) import tempfile from pathlib import Path import time import json -import threading import signal from contextlib import suppress @@ -22,6 +20,7 @@ from .nomypy import BuildResult # type: ignore from . import systemd from . import intl +from .workerman import WorkerManager if TYPE_CHECKING: from .repo import Repo @@ -29,7 +28,6 @@ del Repo logger = logging.getLogger(__name__) -TLS = threading.local() class MissingDependencies(Exception): def __init__(self, pkgs: Set[str]) -> None: @@ -55,6 +53,7 @@ def build_package( myname: str, destdir: Path, logfile: Path, + worker_no: int, ) -> tuple[BuildResult, Optional[str]]: '''return BuildResult and version string if successful''' start_time = time.time() @@ -67,13 +66,17 @@ def build_package( packager = '%s (on behalf of %s) <%s>' % ( myname, maintainer.name, maintainer.email) + assert to_build.workerman is not None depend_packages = resolve_depends(repo, depends) + to_build.workerman.sync_depended_packages(depend_packages) pkgdir = repo.repodir / pkgbase try: pkg_version, rusage, error = call_worker( + repo = repo, + lilacinfo = lilacinfo, pkgbase = pkgbase, pkgdir = pkgdir, - depend_packages = [str(x) for x in depend_packages], + depend_packages = depend_packages, update_info = update_info, on_build_vers = to_build.on_build_vers, bindmounts = bindmounts, @@ -82,11 +85,14 @@ def build_package( logfile = logfile, deadline = start_time + time_limit_hours * 3600, packager = packager, + worker_no = worker_no, + workerman = to_build.workerman, ) if error: raise error finally: - may_need_cleanup() + if to_build.workerman.name == 'local': + may_need_cleanup() reap_zombies() staging = lilacinfo.staging @@ -128,9 +134,10 @@ def build_package( ) return result, pkg_version -def resolve_depends(repo: Optional[Repo], depends: Iterable[Dependency]) -> List[str]: +def resolve_depends(repo: Optional[Repo], depends: Iterable[Dependency]) -> list[str]: need_build_first = set() depend_packages = [] + cwd = os.getcwd() for x in depends: p = x.resolve() @@ -140,7 +147,7 @@ def resolve_depends(repo: Optional[Repo], depends: Iterable[Dependency]) -> List continue need_build_first.add(x.pkgname) else: - depend_packages.append(str(p)) + depend_packages.append(f'../{p.relative_to(cwd)}') if need_build_first: raise MissingDependencies(need_build_first) @@ -161,7 +168,7 @@ def sign_and_copy(pkgdir: Path, dest: Path) -> None: for pkg in pkgs: subprocess.run([ 'gpg', '--pinentry-mode', 'loopback', - '--passphrase', '', '--detach-sign', '--', pkg, + '--passphrase', '', '--detach-sign', '--yes', '--', pkg, ]) for f in pkgs + [x.with_name(x.name + '.sig') for x in pkgs]: with suppress(FileExistsError): @@ -176,10 +183,12 @@ def notify_maintainers( repo.sendmail(addresses, subject, body) def call_worker( + repo: Repo, + lilacinfo: LilacInfo, pkgbase: str, pkgdir: Path, logfile: Path, - depend_packages: List[str], + depend_packages: list[str], update_info: NvResults, on_build_vers: OnBuildVers, commit_msg_template: str, @@ -187,6 +196,8 @@ def call_worker( tmpfs: list[str], deadline: float, packager: str, + worker_no: int, + workerman: WorkerManager, ) -> tuple[Optional[str], RUsage, Optional[Exception]]: ''' return: package version, resource usage, error information @@ -198,8 +209,10 @@ def call_worker( 'commit_msg_template': commit_msg_template, 'bindmounts': bindmounts, 'tmpfs': tmpfs, - 'logfile': str(logfile), # for sending error reports - 'worker_no': TLS.worker_no, + 'worker_no': worker_no, + 'workerman': workerman.name, + 'deadline': deadline, + 'reponame': repo.name, } fd, resultpath = tempfile.mkstemp(prefix=f'{pkgbase}-', suffix='.lilac') os.close(fd) @@ -207,17 +220,12 @@ def call_worker( input_bytes = json.dumps(input).encode() logger.debug('worker input: %r', input_bytes) - cmd = [ - sys.executable, - '-Xno_debug_ranges', # save space - '-P', # don't prepend cwd to sys.path where unexpected directories may exist - '-m', 'lilac2.worker', pkgbase, - ] + cmd = workerman.get_worker_cmd(pkgbase) if systemd.available(): _call_cmd = _call_cmd_systemd else: _call_cmd = _call_cmd_subprocess - name = f'lilac-worker-{TLS.worker_no}' + name = f'lilac-worker-{workerman.name}-{worker_no}' rusage, timedout = _call_cmd( name, cmd, logfile, pkgdir, deadline, input_bytes, packager, @@ -247,11 +255,20 @@ def call_worker( elif st == 'skipped': error = SkipBuild(r['msg']) elif st == 'failed': + if report := r.get('report'): + repo.send_error_report( + lilacinfo, + subject = report['subject'], + msg = report['msg'], + logfile = logfile, + ) error = BuildFailed(r['msg']) else: error = RuntimeError('unknown status from worker', st) version = r['version'] + if ru2 := r.get('rusage'): + rusage = RUsage(*ru2) return version, rusage, error def _call_cmd_subprocess( diff --git a/lilac2/const.py b/lilac2/const.py index 8462217b..67d3a7f5 100644 --- a/lilac2/const.py +++ b/lilac2/const.py @@ -18,6 +18,5 @@ # repo: Repo # mod: LilacMod # worker: -# repo: Repo (for sending reports; not loading all lilacinfos) # mod: LilacMod # built_version: Optional[str] diff --git a/lilac2/db.py b/lilac2/db.py index 5f3b7827..b87e0442 100644 --- a/lilac2/db.py +++ b/lilac2/db.py @@ -3,11 +3,12 @@ import re import logging from functools import partial +from itertools import groupby import psycopg2 import psycopg2.pool -from .typing import UsedResource, OnBuildEntry, OnBuildVers +from .typing import UsedResource, OnBuildEntry, OnBuildVers, Rusages logger = logging.getLogger(__name__) @@ -67,21 +68,23 @@ def get_pkgs_last_success_times(pkgs: list[str]) -> list[tuple[str, datetime.dat r = s.fetchall() return r -def get_pkgs_last_rusage(pkgs: list[str]) -> dict[str, UsedResource]: +def get_pkgs_last_rusage(pkgs: list[str]) -> Rusages: if not pkgs: - return {} + return Rusages({}) with get_session() as s: s.execute(''' - select pkgbase, cputime, memory, elapsed from ( - select pkgbase, cputime, memory, elapsed, row_number() over (partition by pkgbase order by ts desc) as k + select pkgbase, builder, cputime, memory, elapsed from ( + select pkgbase, builder, cputime, memory, elapsed, row_number() over (partition by pkgbase, builder order by ts desc) as k from pkglog where pkgbase = any(%s) and result in ('successful', 'staged') ) as w where k = 1''', (pkgs,)) rs = s.fetchall() - ret = {r[0]: UsedResource(r[1], r[2], r[3]) for r in rs} + ret = {} + for pkgbase, rr in groupby(rs, lambda r: r[0]): + ret[pkgbase] = {r[1]: UsedResource(r[2], r[3], r[4]) for r in rr} - return ret + return Rusages(ret) def _get_last_two_versions(s, pkg: str) -> tuple[str, str]: s.execute( diff --git a/lilac2/lilacyaml.py b/lilac2/lilacyaml.py index a49c3ab6..ba94fdc9 100644 --- a/lilac2/lilacyaml.py +++ b/lilac2/lilacyaml.py @@ -108,7 +108,7 @@ def load_lilacinfo(dir: Path) -> LilacInfo: def expand_alias_arg(value: str) -> str: return value.format( pacman_db_dir = PACMAN_DB_DIR, - repo_name = _G.repo.name, + repo_name = _G.reponame, ) def parse_update_on( @@ -134,7 +134,7 @@ def parse_update_on( if alias == 'alpm-lilac': entry['source'] = 'alpm' entry.setdefault('dbpath', str(PACMAN_DB_DIR)) - entry.setdefault('repo', _G.repo.name) + entry.setdefault('repo', _G.reponame) elif alias is not None: for k, v in ALIASES[alias].items(): diff --git a/lilac2/pkgbuild.py b/lilac2/pkgbuild.py index 09f0ec20..747a5e56 100644 --- a/lilac2/pkgbuild.py +++ b/lilac2/pkgbuild.py @@ -71,8 +71,10 @@ def update_pacmandb(dbpath: Path, pacman_conf: Optional[str] = None, else: p.check_returncode() -def update_data(dbpath: Path, pacman_conf: Optional[str], +def update_data(pacman_conf: Optional[str], *, quiet: bool = False) -> None: + from .const import PACMAN_DB_DIR + dbpath = PACMAN_DB_DIR update_pacmandb(dbpath, pacman_conf, quiet=quiet) now = int(time.time()) @@ -167,3 +169,8 @@ def _get_package_version(srcinfo: List[str]) -> PkgVers: assert pkgver is not None assert pkgrel is not None return PkgVers(epoch, pkgver, pkgrel) + +if __name__ == '__main__': + import sys + conf = sys.argv[1] if len(sys.argv) == 2 else None + update_data(conf) diff --git a/lilac2/remote/__init__.py b/lilac2/remote/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lilac2/remote/git_pull.py b/lilac2/remote/git_pull.py new file mode 100644 index 00000000..03161f23 --- /dev/null +++ b/lilac2/remote/git_pull.py @@ -0,0 +1,14 @@ +import os +import sys + +from ..cmd import git_pull_override, run_cmd + +def main(): + cmd = ['git', 'reset', '--hard', 'origin/master'] + run_cmd(cmd) + git_pull_override() + +if __name__ == '__main__': + wd = sys.argv[1] + os.chdir(wd) + main() diff --git a/lilac2/remote/runner.py b/lilac2/remote/runner.py new file mode 100644 index 00000000..5a2366f5 --- /dev/null +++ b/lilac2/remote/runner.py @@ -0,0 +1,64 @@ +import sys +import json +import subprocess +import logging +import tempfile +import os + +from .. import systemd +from ..vendor.nicelogger import enable_pretty_logging + +logger = logging.getLogger(__name__) + +def main() -> None: + enable_pretty_logging('DEBUG') + + input = json.load(sys.stdin) + logger.debug('[remote.runner] got input: %r', input) + + name = input.pop('name') + deadline = input.pop('deadline') + myresultpath = input.pop('result') + worker_no = input['worker_no'] + + cmd = [ + sys.executable, + '-Xno_debug_ranges', # save space + '-P', # don't prepend cwd to sys.path where unexpected directories may exist + '-m', 'lilac2.worker', + ] + sys.argv[1:] + + fd, resultpath = tempfile.mkstemp(prefix='remoterunner-', suffix='.lilac') + os.close(fd) + input['result'] = resultpath + + setenv = input.pop('setenv') + if v := os.environ.get('MAKEFLAGS'): + setenv['MAKEFLAGS'] = v + else: + cores = os.process_cpu_count() + if cores is not None: + setenv['MAKEFLAGS'] = '-j{0}'.format(cores) + + p = systemd.start_cmd( + name, + cmd, + stdin = subprocess.PIPE, + cwd = input.pop('pkgdir'), + setenv = setenv, + ) + p.stdin.write(json.dumps(input).encode()) # type: ignore + p.stdin.close() # type: ignore + + rusage, _ = systemd.poll_rusage(name, deadline, worker_no=worker_no) + p.wait() + + with open(resultpath, 'rb') as f: + r = json.load(f) + r['rusage'] = rusage + + with open(myresultpath, 'w') as f2: + json.dump(r, f2) + +if __name__ == '__main__': + main() diff --git a/lilac2/remote/worker.py b/lilac2/remote/worker.py new file mode 100644 index 00000000..8cdfb35d --- /dev/null +++ b/lilac2/remote/worker.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import logging +import json +import sys +import os + +from ..vendor.nicelogger import enable_pretty_logging + +from ..tools import kill_child_processes, read_config +from ..workerman import WorkerManager + +logger = logging.getLogger(__name__) + +def main() -> None: + enable_pretty_logging('DEBUG') + + config = read_config() + + input = json.load(sys.stdin) + logger.debug('[remote.worker] got input: %r', input) + workerman = WorkerManager.from_name(config, input.pop('workerman')) + worker_no = input['worker_no'] + # make remote process to exit 60s earlier so that we could do some cleanup + deadline = input.pop('deadline') - 60 + myresultpath = input.pop('result') + + remote_r = {'status': 'done', 'version': None} + r = {} + try: + pkgname = os.path.basename(os.getcwd()) + remote_r = workerman.run_remote(pkgname, deadline, worker_no, input) + workerman.fetch_files(pkgname) + except Exception as e: + r = { + 'status': 'failed', + 'msg': repr(e), + } + sys.stdout.flush() + except KeyboardInterrupt: + logger.info('KeyboardInterrupt received') + r = { + 'status': 'failed', + 'msg': 'KeyboardInterrupt', + } + finally: + # say goodbye to all our children + kill_child_processes() + + with open(myresultpath, 'w') as f: + remote_r.update(r) + json.dump(remote_r, f) + +if __name__ == '__main__': + main() diff --git a/lilac2/repo.py b/lilac2/repo.py index dcd9939d..65aa4351 100644 --- a/lilac2/repo.py +++ b/lilac2/repo.py @@ -20,7 +20,7 @@ from .mail import MailService from .packages import get_built_package_files from .tools import ansi_escape_re -from . import api, lilacyaml, intl +from . import lilacyaml, intl from .typing import LilacMod, Maintainer, LilacInfos, LilacInfo from .nomypy import BuildResult # type: ignore if TYPE_CHECKING: @@ -44,7 +44,7 @@ def __init__(self, config: dict[str, Any]) -> None: self.commit_msg_prefix = config['lilac'].get('commit_msg_prefix', '') self.repodir = Path(config['repository']['repodir']).expanduser() - self.bindmounts = self._get_bindmounts(config.get('bindmounts')) + self.bindmounts = config.get('bindmounts', []) self.tmpfs = config.get('misc', {}).get('tmpfs', []) self.ms = MailService(config) @@ -56,7 +56,7 @@ def __init__(self, config: dict[str, Any]) -> None: self.on_built_cmds = config.get('misc', {}).get('postbuild', []) - self.lilacinfos: LilacInfos = {} # to be filled by self.load_all_lilac_and_report() + self.lilacinfos: LilacInfos = {} # to be filled by self.load_managed_lilac_and_report() self.yamls: dict[str, Any] = {} self._maint_cache: dict[str, list[Maintainer]] = {} @@ -286,12 +286,6 @@ def send_error_report( msgs.append(msg1 + '\n\n' + exc.output) msg1 = l10n.format_value('packaging-error-traceback') msgs.append(msg1 + '\n\n' + tb) - elif isinstance(exc, api.AurDownloadError): - subject_real = subject or l10n.format_value('packaging-error-aur-subject') - msg1 = l10n.format_value('packaging-error-aur') - msgs.append(msg1 + '\n\n') - msg1 = l10n.format_value('packaging-error-traceback') - msgs.append(msg1 + '\n\n' + tb) elif isinstance(exc, TimeoutError): subject_real = subject or l10n.format_value('packaging-error-timeout-subject') else: @@ -312,6 +306,15 @@ def send_error_report( # strictly encoded, disallowing surrogate pairs with logfile.open(errors='replace') as f: build_output = f.read() + + if len(build_output) > 200 * 1024: + too_long = l10n.format_value('log-too-long') + build_output = ( + build_output[:100 * 1024] + + '\n\n' + too_long + '\n\n' + + build_output[-100 * 1024:] + ) + if build_output: log_header = l10n.format_value('packaging-log') with suppress(ValueError, KeyError): # invalid template or wrong key @@ -380,13 +383,3 @@ def on_built(self, pkg: str, result: BuildResult, version: Optional[str]) -> Non except Exception: logger.exception('postbuild cmd error for %r', cmd) - def _get_bindmounts( - self, bindmounts: Optional[dict[str, str]], - ) -> list[str]: - if bindmounts is None: - return [] - - items = [(os.path.expanduser(src), dst) - for src, dst in bindmounts.items()] - items.sort(reverse=True) - return [f'{src}:{dst}' for src, dst in items] diff --git a/lilac2/systemd.py b/lilac2/systemd.py index 91257d65..140394ab 100644 --- a/lilac2/systemd.py +++ b/lilac2/systemd.py @@ -13,12 +13,12 @@ _available = None _check_lock = threading.Lock() -def available() -> bool | dict[str, bool]: +def available(worker_no: Optional[int] = None) -> bool | dict[str, bool]: global _available with _check_lock: if _available is None: - _available = _check_availability() + _available = _check_availability(worker_no) logger.debug('systemd availability: %s', _available) return _available @@ -35,15 +35,21 @@ def _cgroup_cpu_usage(cgroup: str) -> int: return int(l.split()[1]) * 1000 return 0 -def _check_availability() -> bool | dict[str, bool]: +def _check_availability(worker_no: Optional[int]) -> bool | dict[str, bool]: if 'DBUS_SESSION_BUS_ADDRESS' not in os.environ: dbus = f'/run/user/{os.getuid()}/bus' if not os.path.exists(dbus): return False os.environ['DBUS_SESSION_BUS_ADDRESS'] = f'unix:path={dbus}' + + if worker_no is None: + unit_name = 'lilac-check' + else: + unit_name = f'lilac-check-{worker_no}' + p = subprocess.run([ 'systemd-run', '--quiet', '--user', - '--remain-after-exit', '-u', 'lilac-check', 'true', + '--remain-after-exit', '-u', unit_name, 'true', ]) if p.returncode != 0: return False @@ -55,7 +61,7 @@ def _check_availability() -> bool | dict[str, bool]: 'MemoryPeak': None, 'MainPID': None, } - _read_service_int_properties('lilac-check', ps) + _read_service_int_properties(unit_name, ps) if ps['MainPID'] != 0: time.sleep(0.01) continue @@ -66,7 +72,7 @@ def _check_availability() -> bool | dict[str, bool]: return ret finally: - subprocess.run(['systemctl', '--user', 'stop', '--quiet', 'lilac-check']) + subprocess.run(['systemctl', '--user', 'stop', '--quiet', unit_name]) def _read_service_int_properties(name: str, properties: dict[str, Optional[int]]) -> None: cmd = [ @@ -144,10 +150,18 @@ def _poll_cmd(pid: int) -> Generator[None, None, None]: logger.debug('worker exited') return yield + except KeyboardInterrupt: + # give up the service and continue + pass finally: os.close(pidfd) -def poll_rusage(name: str, deadline: float) -> tuple[RUsage, bool]: +def poll_rusage( + name: str, + deadline: float, + worker_no: Optional[int] = None, +) -> tuple[RUsage, bool]: + '''worker_no: for remote.runner and used to make check unit name unique''' timedout = False done_state = ['exited', 'failed'] @@ -171,7 +185,7 @@ def poll_rusage(name: str, deadline: float) -> tuple[RUsage, bool]: nsec = 0 mem_max = 0 - availability = available() + availability = available(worker_no) assert isinstance(availability, dict) for _ in _poll_cmd(pid): if not availability['CPUUsageNSec']: diff --git a/lilac2/tools.py b/lilac2/tools.py index f2c9b479..51b5335f 100644 --- a/lilac2/tools.py +++ b/lilac2/tools.py @@ -49,3 +49,8 @@ def get_avail_memory() -> int: if l.startswith('MemAvailable:'): return int(l.split()[1]) * 1024 return 10 * 1024 ** 3 + +if __name__ == '__main__': + cpu = get_running_task_cpu_ratio() + mem = get_avail_memory() + print(cpu, mem) diff --git a/lilac2/typing.py b/lilac2/typing.py index d6b425a8..0e49e382 100644 --- a/lilac2/typing.py +++ b/lilac2/typing.py @@ -3,12 +3,15 @@ import types from typing import ( Union, Dict, Tuple, Type, NamedTuple, Optional, - Sequence, + Sequence, TYPE_CHECKING, ) from pathlib import Path import dataclasses import datetime +if TYPE_CHECKING: + from .workerman import WorkerManager + class LilacMod(types.ModuleType): time_limit_hours: float pkgbase: str @@ -90,7 +93,29 @@ class UsedResource(NamedTuple): memory: int elapsed: int +class Rusages: + def __init__(self, data: dict[str, dict[str, UsedResource]]) -> None: + '''data: pkgbase -> builder -> UsedResource''' + self.data = data + + def for_package( + self, + pkgbase: str, + builder_hints: list[str], + ) -> Optional[UsedResource]: + if a := self.data.get(pkgbase): + for builder in builder_hints: + if b := a.get(builder): + return b + if a: + return next(iter(a.values())) + + return None + OnBuildVers = list[tuple[str, str]] -class PkgToBuild(NamedTuple): + +@dataclasses.dataclass +class PkgToBuild: pkgbase: str - on_build_vers: OnBuildVers = [] + on_build_vers: OnBuildVers = dataclasses.field(default_factory=list) + workerman: Optional[WorkerManager] = None diff --git a/lilac2/worker.py b/lilac2/worker.py index e1170f83..cd6bebfb 100644 --- a/lilac2/worker.py +++ b/lilac2/worker.py @@ -3,13 +3,14 @@ import os import logging import subprocess -from typing import Optional, List, Generator, Union +from typing import Optional, Generator, Any from types import SimpleNamespace import contextlib import json import sys from pathlib import Path import platform +import traceback import pyalpm @@ -17,7 +18,7 @@ from .vendor.myutils import file_lock from . import pkgbuild -from .typing import LilacMod, LilacInfo, Cmd, OnBuildVers +from .typing import LilacMod, Cmd, OnBuildVers from .cmd import run_cmd, UNTRUSTED_PREFIX from .api import ( vcs_update, get_pkgver_and_pkgrel, update_pkgrel, @@ -26,10 +27,8 @@ from .nvchecker import NvResults from .tools import kill_child_processes from .lilacpy import load_lilac -from .lilacyaml import load_lilacinfo from .const import _G, PACMAN_DB_DIR, mydir -from .repo import Repo -from . import intl +from . import intl, api logger = logging.getLogger(__name__) @@ -57,6 +56,12 @@ def may_update_pkgrel() -> Generator[None, None, None]: # pkgrel is not a number, resetting to 1 update_pkgrel(1) +def get_bindmounts(bindmounts: dict[str, str]) -> list[str]: + items = [(os.path.expanduser(src), dst) + for src, dst in bindmounts.items()] + items.sort(reverse=True) + return [f'{src}:{dst}' for src, dst in items] + def lilac_build( worker_no: int, mod: LilacMod, @@ -109,7 +114,7 @@ def lilac_build( if not isinstance(build_prefix, str): raise TypeError('build_prefix', build_prefix) - build_args: List[str] = [] + build_args: list[str] = [] if hasattr(mod, 'build_args'): build_args = mod.build_args @@ -141,12 +146,12 @@ def lilac_build( post_build_always(success=success) def call_build_cmd( - build_prefix: str, depends: List[str], + build_prefix: str, depends: list[str], bindmounts: list[str] = [], tmpfs: list[str] = [], build_args: list[str] = [], - makechrootpkg_args: List[str] = [], - makepkg_args: List[str] = [], + makechrootpkg_args: list[str] = [], + makepkg_args: list[str] = [], ) -> None: cmd: Cmd if build_prefix == 'makepkg': @@ -207,16 +212,15 @@ def run_build_cmd(cmd: Cmd) -> None: def main() -> None: enable_pretty_logging('DEBUG') - from .tools import read_config - config = read_config() - repo = _G.repo = Repo(config) pkgbuild.load_data(PACMAN_DB_DIR) input = json.load(sys.stdin) - logger.debug('got input: %r', input) + logger.debug('[worker] got input: %r', input) _G.commit_msg_template = input['commit_msg_template'] + _G.reponame = input['reponame'] + r: dict[str, Any] try: with load_lilac(Path('.')) as mod: _G.mod = mod @@ -226,7 +230,7 @@ def main() -> None: depend_packages = input['depend_packages'], update_info = NvResults.from_list(input['update_info']), on_build_vers = input.get('on_build_vers', []), - bindmounts = input['bindmounts'], + bindmounts = get_bindmounts(input['bindmounts']), tmpfs = input['tmpfs'], ) r = {'status': 'done'} @@ -241,12 +245,7 @@ def main() -> None: 'msg': repr(e), } sys.stdout.flush() - try: - handle_failure(e, repo, mod, Path(input['logfile'])) - except UnboundLocalError: - # mod failed to load - info = load_lilacinfo(Path('.')) - handle_failure(e, repo, info, Path(input['logfile'])) + r['report'] = gen_failure_report(e) except KeyboardInterrupt: logger.info('KeyboardInterrupt received') r = { @@ -257,17 +256,17 @@ def main() -> None: # say goodbye to all our children kill_child_processes() - r['version'] = getattr(_G, 'built_version', None) # type: ignore + r['version'] = getattr(_G, 'built_version', None) with open(input['result'], 'w') as f: json.dump(r, f) -def handle_failure( - e: Exception, repo: Repo, mod: Union[LilacMod, LilacInfo], logfile: Path, -) -> None: +def gen_failure_report(e: Exception) -> dict[str, str]: logger.error('build failed', exc_info=e) l10n = intl.get_l10n('mail') + report = {} + if isinstance(e, pkgbuild.ConflictWithOfficialError): reason = '' if e.groups: @@ -275,23 +274,46 @@ def handle_failure( if e.packages: reason += l10n.format_value('package-replacing-official-package', {'packages': repr(e.packages)}) + '\n' subj = l10n.format_value('package-conflicts-with-official-repos') - repo.send_error_report( - mod, subject = subj, msg = reason, - ) + report['subject'] = subj + report['msg'] = reason, elif isinstance(e, pkgbuild.DowngradingError): - repo.send_error_report( - mod, - subject = l10n.format_value('package-older-subject'), - msg = l10n.format_value('package-older-body', { - 'pkg': e.pkgname, - 'built_version': e.built_version, - 'repo_version': e.repo_version, - }) + '\n', - ) + report['subject'] = l10n.format_value('package-older-subject') + report['msg'] = l10n.format_value('package-older-body', { + 'pkg': e.pkgname, + 'built_version': e.built_version, + 'repo_version': e.repo_version, + }) + '\n' else: - repo.send_error_report(mod, exc=e, logfile=logfile) + msgs = [] + tb = ''.join(traceback.format_exception(type(e), e, e.__traceback__)) + if isinstance(e, subprocess.CalledProcessError): + subject = l10n.format_value('packaging-error-subprocess-subject') + msg1 = l10n.format_value('packaging-error-subprocess', { + 'cmd': repr(e.cmd), + 'returncode': e.returncode, + }) + msgs.append(msg1) + if e.output: + msg1 = l10n.format_value('packaging-error-subprocess-output') + msgs.append(msg1 + '\n\n' + e.output) + msg1 = l10n.format_value('packaging-error-traceback') + msgs.append(msg1 + '\n\n' + tb) + elif isinstance(e, api.AurDownloadError): + subject = l10n.format_value('packaging-error-aur-subject') + msg1 = l10n.format_value('packaging-error-aur') + msgs.append(msg1 + '\n\n') + msg1 = l10n.format_value('packaging-error-traceback') + msgs.append(msg1 + '\n\n' + tb) + else: + subject = l10n.format_value('packaging-error-unknown-subject') + msg1 = l10n.format_value('packaging-error-unknown') + msgs.append(msg1 + '\n\n' + tb) + report['subject'] = subject + report['msg'] = '\n'.join(msgs) + + return report if __name__ == '__main__': main() diff --git a/lilac2/workerman.py b/lilac2/workerman.py new file mode 100644 index 00000000..429ef33c --- /dev/null +++ b/lilac2/workerman.py @@ -0,0 +1,349 @@ +from typing import override, Callable, Optional, Any +import logging +import subprocess +import os +import json +import signal +import sys +import tempfile + +from .typing import PkgToBuild, Rusages + +logger = logging.getLogger(__name__) + +class WorkerManager: + name: str + max_concurrency: int + workers_before_me: int = 0 + current_task_count: int = 0 + + def get_worker_cmd(self, pkgbase: str) -> list[str]: + raise NotImplementedError + + def get_resource_usage(self) -> tuple[float, int]: + raise NotImplementedError + + def sync_depended_packages(self, depends: list[str]) -> None: + raise NotImplementedError + + def prepare_batch( + self, + pacman_conf: Optional[str], + ) -> None: + raise NotImplementedError + + def finish_batch(self) -> None: + raise NotImplementedError + + def try_accept_package( + self, + ready_to_build: list[str], + rusages: Rusages, + priority_func: Callable[[str], int], + check_buildability: Callable[[str], Optional[PkgToBuild]], + ) -> list[PkgToBuild]: + if self.current_task_count >= self.max_concurrency: + return [] + + cpu_ratio, memory_avail = self.get_resource_usage() + + if cpu_ratio > 1.0 and self.current_task_count > 0: + return [] + + def sort_key(pkg): + p = priority_func(pkg) + r = rusages.for_package(pkg, [self.name]) + if r is not None: + cpu = r.cputime / r.elapsed + else: + cpu = 1.0 + return (p, cpu) + ready_to_build.sort(key=sort_key) + logger.debug('[%s] sorted ready_to_build: %r', + self.name, ready_to_build) + + if cpu_ratio < 0.9: + # low cpu usage, build a big package + p = priority_func(ready_to_build[0]) + for idx, pkg in enumerate(ready_to_build): + if priority_func(pkg) != p: + if idx > 2: + ready_to_build.insert(0, ready_to_build.pop(idx-1)) + break + else: + logger.info('high cpu usage (%.2f), preferring low-cpu-usage builds', cpu_ratio) + + ret: list[PkgToBuild] = [] + + limited_by_memory = False + for pkg in ready_to_build: + r = rusages.for_package(pkg, [self.name]) + if r and r.memory > memory_avail: + logger.debug('package %s used %d memory last time, but now only %d is available', pkg, r.memory, memory_avail) + limited_by_memory = True + continue + + to_build = check_buildability(pkg) + if to_build is None: + continue + + to_build.workerman = self + ret.append(to_build) + if len(ret) + self.current_task_count >= self.max_concurrency: + break + + if r: + memory_avail -= r.memory + else: + memory_avail -= 10 * 1024 ** 3 + + if not ret and limited_by_memory: + logger.info('insufficient memory, not starting another concurrent build (available: %d)', memory_avail) + + self.current_task_count += len(ret) + return ret + + @staticmethod + def from_name(config: dict[str, Any], name: str): + if name == 'local': + max_concurrency = config['lilac'].get('max_concurrency', 1) + return LocalWorkerManager(max_concurrency) + else: + remote = [ + x for x in config['remoteworker'] + if x.get('enabled', False) and x['name'] == name + ][0] + return RemoteWorkerManager(remote) + +class LocalWorkerManager(WorkerManager): + name: str = 'local' + max_concurrency: int + + def __init__(self, max_concurrency) -> None: + self.max_concurrency = max_concurrency + + @override + def get_worker_cmd(self, pkgbase: str) -> list[str]: + return [ + sys.executable, + '-Xno_debug_ranges', # save space + '-P', # don't prepend cwd to sys.path where unexpected directories may exist + '-m', 'lilac2.worker', pkgbase, + ] + + @override + def get_resource_usage(self) -> tuple[float, int]: + from . import tools + cpu_ratio = tools.get_running_task_cpu_ratio() + memory_avail = tools.get_avail_memory() + return cpu_ratio, memory_avail + + @override + def sync_depended_packages(self, depends: list[str]) -> None: + pass + + @override + def prepare_batch( + self, + pacman_conf: Optional[str], + ) -> None: + from . import pkgbuild + logger.info('[%s] updating pacman databases', self.name) + pkgbuild.update_data(pacman_conf) + + @override + def finish_batch(self) -> None: + pass + +class RemoteWorkerManager(WorkerManager): + name: str + max_concurrency: int + repodir: str + host: str + config: dict[str, Any] + + def __init__(self, remote: dict[str, Any]) -> None: + self.name = remote['name'] + self.repodir = remote['repodir'] + self.host = remote['host'] + self.max_concurrency = remote.get('max_concurrency', 1) + self.config = remote + + @override + def get_worker_cmd(self, pkgbase: str) -> list[str]: + return [ + sys.executable, + '-Xno_debug_ranges', # save space + '-P', # don't prepend cwd to sys.path where unexpected directories may exist + '-m', 'lilac2.remote.worker', pkgbase, self.name, + ] + + @override + def get_resource_usage(self) -> tuple[float, int]: + sshcmd = self.get_sshcmd_prefix() + ['python', '-m', 'lilac2.tools'] + out = subprocess.check_output(sshcmd, text=True) + cpu, mem = out.split() + return float(cpu), int(mem) + + @override + def sync_depended_packages(self, depends: list[str]) -> None: + if not depends: + return + + includes = ''.join(f'/{p.rsplit('/', 2)[1]}\n' for p in depends) + rsync_cmd = [ + 'rsync', '-avi', + '--include-from=-', + '--exclude=/.*', '--exclude=*/', '--include=*.pkg.tar.zst', '--exclude=*/*', + '--delete', + './', f'{self.host}:{self.repodir.removesuffix('/')}', + ] + logger.info('[%s] sync_depended_packages: %s', self.name, rsync_cmd) + subprocess.run(rsync_cmd, text=True, input=includes, check=True) + + @override + def prepare_batch( + self, + pacman_conf: Optional[str], + ) -> None: + # update pacman databases + sshcmd = self.get_sshcmd_prefix() + [ + 'python', '-Xno_debug_ranges', '-P', + '-m', 'lilac2.pkgbuild', pacman_conf or '', + ] + logger.info('[%s] running %s', self.name, sshcmd) + subprocess.check_call(sshcmd) + + sshcmd = self.get_sshcmd_prefix() + [ + 'python', '-Xno_debug_ranges', '-P', + '-m', 'lilac2.remote.git_pull', f'"{self.repodir}"', + ] + logger.info('[%s] running %s', self.name, sshcmd) + subprocess.run(sshcmd, check=True) + + if prerun := self.config.get('prerun'): + self.run_cmds(prerun) + + @override + def finish_batch(self) -> None: + out = subprocess.check_output(['git', 'remote'], text=True) + remotes = out.splitlines() + if self.name not in remotes: + sshcmd = self.get_sshcmd_prefix() + [ + f'cd "{self.repodir}" && git rev-parse --show-prefix' + ] + out = subprocess.check_output(sshcmd, text=True).strip('\n/') + if out: + reporoot = self.repodir.removesuffix(out).rstrip('/') + else: + reporoot = self.repodir + subprocess.check_call([ + 'git', 'remote', 'add', self.name, f'{self.host}:{reporoot}', + ]) + subprocess.check_call([ + 'git', 'pull', '--no-edit', self.name, 'master', + ]) + + if postrun := self.config.get('postrun'): + self.run_cmds(postrun) + + def fetch_files(self, pkgname: str) -> None: + # run in remote.worker + rsync_cmd = [ + 'rsync', '-avi', + '--include=*.pkg.tar.zst', '--exclude=*', + f'{self.host}:{self.repodir.removesuffix('/')}/{pkgname}/', + '.', + ] + logger.info('[%s] fetch_files: %s', self.name, rsync_cmd) + subprocess.run(rsync_cmd, check=True) + + def run_remote( + self, + pkgname: str, + deadline: float, + worker_no: int, + input: dict[str, Any], + ) -> dict[str, Any]: + # run in remote.worker + + setenv = { + 'PACKAGER': os.environ.get('PACKAGER', ''), + 'LANG': os.environ.get('LANG', 'C.UTF-8'), + } + if tz := os.environ.get('TZ'): + setenv['TZ'] = tz + + name = f'lilac-worker-{worker_no}' + + fd, resultpath = tempfile.mkstemp(prefix=f'{name}-', suffix='.lilac') + os.close(fd) + + input = { + 'name': name, + 'deadline': deadline, + 'result': resultpath, + 'pkgdir': os.path.join(self.repodir, pkgname), + 'setenv': setenv, + **input, + } + + input_bytes = json.dumps(input).encode() + sshcmd: list[str] = self.get_sshcmd_prefix(pty=True) + [ + 'python', + '-Xno_debug_ranges', # save space + '-P', # don't prepend cwd to sys.path where unexpected directories may exist + '-m', 'lilac2.remote.runner', pkgname, str(worker_no), + ] + p = subprocess.Popen( + sshcmd, + stdin = subprocess.PIPE, + ) + p.stdin.write(input_bytes) # type: ignore + p.stdin.close() # type: ignore + + e: Optional[BaseException] = None + stop_countdown = None + while True: + try: + # timeout tor waiting subprocess to terminate + if stop_countdown is not None: + stop_countdown -= 1 + if stop_countdown == 0: + break + + try: + code = p.wait(10) + except subprocess.TimeoutExpired: + st = os.stat(1) + if st.st_size > 1024 ** 3: # larger than 1G + logger.error('\n\nToo much output, killed.') + else: + if code != 0 and e is None: + e = subprocess.CalledProcessError(code, 'lilac2.remote.runner') + break + except KeyboardInterrupt as e2: + logger.info('SIGINT received, relaying to remoteworker') + p.send_signal(signal.SIGINT) + stop_countdown = 6 + e = e2 + p.wait() + + try: + sshcmd = self.get_sshcmd_prefix() + ['cat', resultpath] + out = subprocess.check_output(sshcmd, text=True) + r = json.loads(out) + return r + finally: + if e: + raise e + + def get_sshcmd_prefix(self, pty: bool = False) -> list[str]: + if pty: + return ['ssh', '-t', self.host] + else: + return ['ssh', '-T', self.host] + + def run_cmds(self, cmds: list[str]) -> None: + for cmd in cmds: + subprocess.check_call(self.get_sshcmd_prefix() + [cmd]) diff --git a/processes.md b/processes.md new file mode 100644 index 00000000..cb30afe3 --- /dev/null +++ b/processes.md @@ -0,0 +1,9 @@ +* lilac + * local workerman thread (collect rusage) + * systemd-run lilac2.worker (handle SIGINT) + * build cmd + * remote workerman thread (collect rusage) + * systemd-run lilac2.remote.worker (handle SIGINT) + * ssh host lilac2.remote.runner (collect rusage) + * systemd-run lilac2.worker (handle SIGINT) + * build cmd diff --git a/scripts/dbsetup.sql b/scripts/dbsetup.sql index ad8a7da2..83871092 100644 --- a/scripts/dbsetup.sql +++ b/scripts/dbsetup.sql @@ -15,7 +15,8 @@ create table pkglog ( memory bigint, msg text, build_reasons jsonb, - maintainers jsonb + maintainers jsonb, + builder text not null ); create index pkglog_ts_idx on pkglog (ts); diff --git a/scripts/tailf-build-log b/scripts/tailf-build-log index 039a518e..9135cd37 100755 --- a/scripts/tailf-build-log +++ b/scripts/tailf-build-log @@ -18,7 +18,7 @@ FMT = { 'staged': f'[{c(12)}%(ts)s{c(7)}] {c(15)}%(pkgbase)s{c(7)} %(nv_version)s %(action)s{c(7)} as {c(15)}%(pkg_version)s{c(7)} in {c(6)}%(elapsed)s', 'failed': f'[{c(12)}%(ts)s{c(7)}] {c(15)}%(pkgbase)s{c(7)} %(nv_version)s %(action)s{c(7)} to build as {c(15)}%(pkg_version)s{c(7)} in {c(6)}%(elapsed)s', 'skipped': f'[{c(12)}%(ts)s{c(7)}] {c(15)}%(pkgbase)s{c(7)} %(nv_version)s %(action)s{c(7)} because {c(15)}%(msg)s', - '_rusage': f'{c(7)}; CPU time: {c(6)}%(cputime)s{c(7)} (%(cpupercent)s%%{c(7)}), Memory: {c(5)}%(memory)s\n', + '_rusage': f'{c(7)}; CPU time: {c(6)}%(cputime)s{c(7)} (%(cpupercent)s%%{c(7)}), Memory: {c(5)}%(memory)s{c(7)}', '_batch': f'[{c(12)}%(ts)s{c(7)}] {c(14)}build %(event)s\n', } @@ -29,7 +29,9 @@ ACTION = { 'skipped': f'{c(3)}skipped', } -N_CORES = os.cpu_count() +N_CORES = { + 'local': os.cpu_count(), +} def color_gradient(v): r = 255 - v * 255 @@ -63,11 +65,13 @@ def pretty_print(log): cputime = 0 memory = 0 + builder = log['builder'] + n_cores = N_CORES.get(builder, N_CORES['local']) if log['elapsed']: cpupercent = round(100 * cputime / log['elapsed']) else: cpupercent = 0 - cpupercent = color_gradient(1 - cpupercent / 100 / N_CORES) + str(cpupercent) + cpupercent = color_gradient(1 - cpupercent / 100 / n_cores) + str(cpupercent) args = { 'ts': log['ts'].strftime('%Y-%m-%d %H:%M:%S'), @@ -80,10 +84,14 @@ def pretty_print(log): 'cputime': humantime(cputime), 'cpupercent': cpupercent, 'memory': filesize(memory), + 'builder': builder, } fmt = FMT[result] out = c(7) + fmt % args + FMT['_rusage'] % args + if builder != 'local': + out += f' on {c(6)}{builder}' + out += '\n' if result == 'failed': out += f'{c(8)}{log["msg"][:1000]}\n' sys.stdout.write(out) diff --git a/scripts/useful.sql b/scripts/useful.sql index 83f1268c..6be7f869 100644 --- a/scripts/useful.sql +++ b/scripts/useful.sql @@ -1,7 +1,7 @@ -- some useful SQL commands (for PostgreSQL) -- show build log -select id, ts, pkgbase, nv_version, pkg_version, elapsed, result, cputime, case when elapsed = 0 then 0 else cputime * 100 / elapsed end as "cpu%", round(memory / 1073741824.0, 3) as "memory (GiB)", substring(msg for 20) as msg, build_reasons, (select array_agg(github) from jsonb_to_recordset(maintainers) as m(github text)) as maintainers from pkglog order by id desc limit 10; +select id, ts, pkgbase, nv_version, pkg_version, elapsed, result, cputime, case when elapsed = 0 then 0 else cputime * 100 / elapsed end as "cpu%", round(memory / 1073741824.0, 3) as "memory (GiB)", substring(msg for 20) as msg, build_reasons, (select array_agg(github) from jsonb_to_recordset(maintainers) as m(github text)) as maintainers, builder from pkglog order by id desc limit 10; -- show current build status and expected time select index, c.pkgbase, updated_at, status, elapsed as last_time, c.build_reasons from pkgcurrent as c left join lateral ( diff --git a/setup.py b/setup.py index 4fb829ca..cb778fdc 100755 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ description = 'The build bot for archlinuxcn', author = 'lilydjwg', author_email = 'lilydjwg@gmail.com', - python_requires = '>=3.12.0', + python_requires = '>=3.13.0', url = 'https://github.com/archlinuxcn/lilac', zip_safe = False, packages = find_packages(exclude=('tests',)) + ['nvchecker_source'], @@ -26,7 +26,6 @@ classifiers = [ 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: 3.13', ], )