From 938cd3c8a2503a3355f22a024c4a2db301a8ca67 Mon Sep 17 00:00:00 2001 From: Krzesimir Nowak Date: Thu, 3 Dec 2015 18:00:00 +0100 Subject: [PATCH 1/6] initial structure --- Makefile | 2 +- configure.ac | 8 +++-- stage1/stage1.mk | 5 +++ stage1_fly/aci/aci-manifest.in | 33 ++++++++++++++++++++ stage1_fly/aci/aci.mk | 53 ++++++++++++++++++++++++++++++++ stage1_fly/enter/enter.mk | 1 + stage1_fly/gc/gc.mk | 1 + stage1_fly/makelib/aci_binary.mk | 30 ++++++++++++++++++ stage1_fly/run/run.mk | 1 + stage1_fly/stage1_fly.mk | 33 ++++++++++++++++++++ 10 files changed, 163 insertions(+), 4 deletions(-) create mode 100644 stage1_fly/aci/aci-manifest.in create mode 100644 stage1_fly/aci/aci.mk create mode 100644 stage1_fly/enter/enter.mk create mode 100644 stage1_fly/gc/gc.mk create mode 100644 stage1_fly/makelib/aci_binary.mk create mode 100644 stage1_fly/run/run.mk create mode 100644 stage1_fly/stage1_fly.mk diff --git a/Makefile b/Makefile index 2b6ee138ee..e0b11f2c64 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ TOPLEVEL_STAMPS := TOPLEVEL_CHECK_STAMPS := TOPLEVEL_UNIT_CHECK_STAMPS := TOPLEVEL_FUNCTIONAL_CHECK_STAMPS := -TOPLEVEL_SUBDIRS := rkt tests stage1 +TOPLEVEL_SUBDIRS := rkt tests stage1 stage1_fly $(call inc-one,tools/tools.mk) $(call inc-many,$(foreach sd,$(TOPLEVEL_SUBDIRS),$(sd)/$(sd).mk)) diff --git a/configure.ac b/configure.ac index b206023219..6ccc0ae7ed 100644 --- a/configure.ac +++ b/configure.ac @@ -59,7 +59,7 @@ AC_ARG_WITH([stage1-default-flavor], AC_ARG_WITH([stage1-flavors], [AS_HELP_STRING([--with-stage1-flavors], - [comma-separated list of stage1 flavors; choose from 'src', 'coreos', 'host', 'kvm'; default: 'coreos,kvm'])], + [comma-separated list of stage1 flavors; choose from 'src', 'coreos', 'host', 'kvm', 'fly'; default: 'coreos,kvm'])], [RKT_STAGE1_FLAVORS="${withval}"], [RKT_STAGE1_FLAVORS=auto]) @@ -172,7 +172,7 @@ AS_VAR_IF([RKT_STAGE1_FLAVORS_VERSION_OVERRIDE], [auto], ## Built stage1 flavors verification dnl a list of all flavors -RKT_STAGE1_ALL_FLAVORS=coreos,kvm,host,src +RKT_STAGE1_ALL_FLAVORS=coreos,kvm,host,src,fly dnl RKT_ITERATE_FLAVORS iterates all comma-separated flavors stored in dnl $1 using an iterator variable $2 and executes body $3. @@ -188,7 +188,7 @@ dnl additional string to an error message. AC_DEFUN([RKT_IS_VALID_FLAVOR], [AS_CASE([$1], dnl Correct flavor, nothing to do. - [coreos|kvm|host|src], + [coreos|kvm|host|src|fly], [], dnl Bogus flavor, bail out. [AC_MSG_ERROR([*** unknown stage1 flavor "$1" $2])])]) @@ -231,6 +231,8 @@ RKT_ITERATE_FLAVORS([${RKT_STAGE1_FLAVORS}],[flavor], RKT_REQ_PROG([BC],[bc],[bc])], [host], [], + [fly], + [], [AC_MSG_ERROR([*** Unhandled flavor "${flavor}", should not happen])])]) dnl Validate passed default flavor, it should be one of the built diff --git a/stage1/stage1.mk b/stage1/stage1.mk index f1fee55c0f..fba22868c0 100644 --- a/stage1/stage1.mk +++ b/stage1/stage1.mk @@ -53,8 +53,13 @@ # # STAGE1_ENTER_CMD_$(flavor) - an enter command in stage1 to be used # for the "rkt enter" command. + STAGE1_FLAVORS := $(call commas-to-spaces,$(RKT_STAGE1_ALL_FLAVORS)) STAGE1_BUILT_FLAVORS := $(call commas-to-spaces,$(RKT_STAGE1_FLAVORS)) +# filter out the fly flavor - it is special +STAGE1_FLAVORS := $(filter-out fly,$(STAGE1_FLAVORS)) +STAGE1_BUILT_FLAVORS := $(filter-out fly,$(STAGE1_BUILT_FLAVORS)) + $(foreach f,$(STAGE1_FLAVORS), \ $(eval STAGE1_COPY_SO_DEPS_$f :=) \ $(eval STAGE1_USR_STAMPS_$f :=) \ diff --git a/stage1_fly/aci/aci-manifest.in b/stage1_fly/aci/aci-manifest.in new file mode 100644 index 0000000000..34a00283bb --- /dev/null +++ b/stage1_fly/aci/aci-manifest.in @@ -0,0 +1,33 @@ +{ + "acKind": "ImageManifest", + "acVersion": "0.7.3", + "name": "coreos.com/rkt/stage1-fly", + "labels": [ + { + "name": "version", + "value": "@RKT_STAGE1_VERSION@" + }, + { + "name": "arch", + "value": "amd64" + }, + { + "name": "os", + "value": "linux" + } + ], + "annotations": [ + { + "name": "coreos.com/rkt/stage1/run", + "value": "/run" + }, + { + "name": "coreos.com/rkt/stage1/enter", + "value": "/enter" + }, + { + "name": "coreos.com/rkt/stage1/gc", + "value": "/gc" + } + ] +} diff --git a/stage1_fly/aci/aci.mk b/stage1_fly/aci/aci.mk new file mode 100644 index 0000000000..675527503d --- /dev/null +++ b/stage1_fly/aci/aci.mk @@ -0,0 +1,53 @@ +$(call setup-stamp-file,FLY_ACI_STAMP,aci-manifest) +$(call setup-tmp-dir,FLY_ACI_TMPDIR_BASE) + +FLY_ACI_TMPDIR := $(FLY_ACI_TMPDIR_BASE)/fly +# a manifest template +FLY_ACI_SRC_MANIFEST := $(MK_SRCDIR)/aci-manifest.in +# generated manifest to be copied to the ACI directory +FLY_ACI_GEN_MANIFEST := $(FLY_ACI_TMPDIR)/manifest +# manifest in the ACI directory +FLY_ACI_MANIFEST := $(FLY_ACIDIR)/manifest +# escaped values of the ACI image name, version and enter command, so +# they can be safely used in the replacement part of sed's s/// +# command. +FLY_ACI_VERSION := $(call sed-replacement-escape,$(RKT_VERSION)) +# stamp and dep file for invalidating the generated manifest if name, +# version or enter command changes for this flavor +$(call setup-stamp-file,FLY_ACI_MANIFEST_KV_DEPMK_STAMP,$manifest-kv-dep) +$(call setup-dep-file,FLY_ACI_MANIFEST_KV_DEPMK,manifest-kv-dep) +FLY_ACI_DIRS := \ + $(FLY_ACIROOTFSDIR)/rkt \ + $(FLY_ACIROOTFSDIR)/rkt/status \ + $(FLY_ACIROOTFSDIR)/opt \ + $(FLY_ACIROOTFSDIR)/opt/stage2 + +# main stamp rule - makes sure manifest and deps files are generated +$(call generate-stamp-rule,$(FLY_ACI_STAMP),$(FLY_ACI_MANIFEST) $(FLY_ACI_MANIFEST_KV_DEPMK_STAMP)) + +# invalidate generated manifest if version changes +$(call generate-kv-deps,$(FLY_ACI_MANIFEST_KV_DEPMK_STAMP),$(FLY_ACI_GEN_MANIFEST),$(FLY_ACI_MANIFEST_KV_DEPMK),FLY_ACI_VERSION) + +# this rule generates a manifest +$(call forward-vars,$(FLY_ACI_GEN_MANIFEST), \ + FLY_ACI_VERSION) +$(FLY_ACI_GEN_MANIFEST): $(FLY_ACI_SRC_MANIFEST) | $(FLY_ACI_TMPDIR) $(FLY_ACI_DIRS) $(FLY_ACIROOTFSDIR)/flavor + $(VQ) \ + set -e; \ + $(call vb,vt,MANIFEST,fly) \ + sed \ + -e 's/@RKT_STAGE1_VERSION@/$(FLY_ACI_VERSION)/g' \ + "$<" >"$@.tmp"; \ + $(call bash-cond-rename,$@.tmp,$@) + +INSTALL_DIRS += \ + $(FLY_ACI_TMPDIR):- \ + $(foreach d,$(FLY_ACI_DIRS),$d:-) +INSTALL_SYMLINKS += \ + fly:$(FLY_ACIROOTFSDIR)/flavor +FLY_STAMPS += $(FLY_ACI_STAMP) +INSTALL_FILES += \ + $(FLY_ACI_GEN_MANIFEST):$(FLY_ACI_MANIFEST):0644 +CLEAN_FILES += $(FLY_ACI_GEN_MANIFEST) + +$(call undefine-namespaces,FLY_ACI _FLY_ACI) diff --git a/stage1_fly/enter/enter.mk b/stage1_fly/enter/enter.mk new file mode 100644 index 0000000000..bb177a3da7 --- /dev/null +++ b/stage1_fly/enter/enter.mk @@ -0,0 +1 @@ +include stage1_fly/makelib/aci_binary.mk diff --git a/stage1_fly/gc/gc.mk b/stage1_fly/gc/gc.mk new file mode 100644 index 0000000000..bb177a3da7 --- /dev/null +++ b/stage1_fly/gc/gc.mk @@ -0,0 +1 @@ +include stage1_fly/makelib/aci_binary.mk diff --git a/stage1_fly/makelib/aci_binary.mk b/stage1_fly/makelib/aci_binary.mk new file mode 100644 index 0000000000..f4051fb5c4 --- /dev/null +++ b/stage1_fly/makelib/aci_binary.mk @@ -0,0 +1,30 @@ +# The path of this file. This file is included (or at least it should +# be) with a standard include directive instead of our inc-one (or +# inc-many), so the MK_PATH, MK_FILENAME and MK_SRCDIR are set to +# values specific to the parent file (that is - including this one). +_FAB_PATH_ := $(lastword $(MAKEFILE_LIST)) +# Name of a binary, deduced upon filename of a parent Makefile. +_FAB_NAME_ := $(patsubst %.mk,%,$(MK_FILENAME)) +# Path to built binary. Not the one in the ACI rootfs. +_FAB_BINARY_ := $(FLY_TOOLSDIR)/$(_FAB_NAME_) +# Path to the built binary, copied to ACI rootfs directory +_FAB_ACI_BINARY_ := $(FLY_ACIROOTFSDIR)/$(_FAB_NAME_) + +$(call setup-stamp-file,_FAB_STAMP_,binary-build) +$(call generate-stamp-rule,$(_FAB_STAMP_)) + +# variables for makelib/build_go_bin.mk +BGB_STAMP := $(_FAB_STAMP_) +BGB_BINARY := $(_FAB_BINARY_) +BGB_PKG_IN_REPO := $(call go-pkg-from-dir) + +include makelib/build_go_bin.mk + +$(_FAB_BINARY_): $(_FAB_PATH_) $(MK_PATH) | $(FLY_TOOLSDIR) +$(_FAB_STAMP_): $(_FAB_ACI_BINARY_) + +CLEAN_FILES += $(_FAB_BINARY_) +INSTALL_FILES += $(_FAB_BINARY_):$(_FAB_ACI_BINARY_):- +FLY_STAMPS += $(_FAB_STAMP_) + +$(call undefine-namespaces,FAB _FAB) diff --git a/stage1_fly/run/run.mk b/stage1_fly/run/run.mk new file mode 100644 index 0000000000..bb177a3da7 --- /dev/null +++ b/stage1_fly/run/run.mk @@ -0,0 +1 @@ +include stage1_fly/makelib/aci_binary.mk diff --git a/stage1_fly/stage1_fly.mk b/stage1_fly/stage1_fly.mk new file mode 100644 index 0000000000..8513176f06 --- /dev/null +++ b/stage1_fly/stage1_fly.mk @@ -0,0 +1,33 @@ +FLY_ACIDIR := $(BUILDDIR)/aci-for-fly-flavor +FLY_ACIROOTFSDIR := $(FLY_ACIDIR)/rootfs +FLY_TOOLSDIR := $(TOOLSDIR)/fly +FLY_STAMPS := +FLY_SUBDIRS := run gc enter aci +FLY_STAGE1 := $(BINDIR)/stage1-fly.aci + +$(call setup-stamp-file,FLY_STAMP,aci-build) + +$(call inc-many,$(foreach sd,$(FLY_SUBDIRS),$(sd)/$(sd).mk)) + +$(call generate-stamp-rule,$(FLY_STAMP),$(FLY_STAMPS) $(ACTOOL_STAMP),, \ + $(call vb,vt,ACTOOL,$(call vsp,$(FLY_STAGE1))) \ + "$(ACTOOL)" build --overwrite --owner-root "$(FLY_ACIDIR)" "$(FLY_STAGE1)") + +INSTALL_DIRS += \ + $(FLY_TOOLSDIR):- \ + $(FLY_ACIDIR):- \ + $(FLY_ACIROOTFSDIR):- + +FLY_FLAVORS := $(call commas-to-spaces,$(RKT_STAGE1_FLAVORS)) + +CLEAN_FILES += $(FLY_STAGE1) + +ifneq ($(filter fly,$(FLY_FLAVORS)),) + +# actually build the fly stage1 only if requested + +TOPLEVEL_STAMPS += $(FLY_STAMP) + +endif + +$(call undefine-namespaces,FLY _FLY) From 96c8677c94eda2b00dde9b45aed5e82b86deb111 Mon Sep 17 00:00:00 2001 From: Stefan Junker Date: Fri, 4 Dec 2015 00:48:16 +0100 Subject: [PATCH 2/6] stage1: refactor common stage1 code --- stage1/common/run.go | 50 ++++++++ stage1/common/types/pod.go | 80 +++++++++++++ .../{kvm/mount.go => common/kvm_mount.go} | 7 +- .../kvm_mount_test.go} | 2 +- stage1/init/{ => common}/path.go | 12 +- stage1/init/{ => common}/pod.go | 107 +++++------------- stage1/init/{ => common}/pod_test.go | 8 +- stage1/init/init.go | 60 ++++------ stage1/init/kvm.go | 9 +- 9 files changed, 196 insertions(+), 139 deletions(-) create mode 100644 stage1/common/run.go create mode 100644 stage1/common/types/pod.go rename stage1/init/{kvm/mount.go => common/kvm_mount.go} (98%) rename stage1/init/{kvm/mount_test.go => common/kvm_mount_test.go} (99%) rename stage1/init/{ => common}/path.go (90%) rename stage1/init/{ => common}/pod.go (83%) rename stage1/init/{ => common}/pod_test.go (95%) diff --git a/stage1/common/run.go b/stage1/common/run.go new file mode 100644 index 0000000000..edabd6dc30 --- /dev/null +++ b/stage1/common/run.go @@ -0,0 +1,50 @@ +// Copyright 2015 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stage1_common + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + + "github.com/coreos/rkt/pkg/sys" +) + +func WithClearedCloExec(lfd int, f func() error) error { + err := sys.CloseOnExec(lfd, false) + if err != nil { + return err + } + defer sys.CloseOnExec(lfd, true) + + return f() +} + +func WritePpid(pid int) error { + // write ppid file as specified in + // Documentation/devel/stage1-implementors-guide.md + out, err := os.Getwd() + if err != nil { + return fmt.Errorf("cannot get current working directory: %v\n", err) + } + // we are the parent of the process that is PID 1 in the container so we write our PID to "ppid" + err = ioutil.WriteFile(filepath.Join(out, "ppid"), + []byte(fmt.Sprintf("%d\n", pid)), 0644) + if err != nil { + return fmt.Errorf("cannot write ppid file: %v\n", err) + } + return nil +} diff --git a/stage1/common/types/pod.go b/stage1/common/types/pod.go new file mode 100644 index 0000000000..7a4dbe24a2 --- /dev/null +++ b/stage1/common/types/pod.go @@ -0,0 +1,80 @@ +// Copyright 2015 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package types + +import ( + "encoding/json" + "fmt" + "io/ioutil" + + "github.com/coreos/rkt/common" + + "github.com/coreos/rkt/Godeps/_workspace/src/github.com/appc/spec/schema" + "github.com/coreos/rkt/Godeps/_workspace/src/github.com/appc/spec/schema/types" +) + +// Pod encapsulates a PodManifest and ImageManifests +type Pod struct { + Root string // root directory where the pod will be located + UUID types.UUID + Manifest *schema.PodManifest + Images map[string]*schema.ImageManifest + MetadataServiceURL string + Networks []string +} + +// LoadPod loads a Pod Manifest (as prepared by stage0) and +// its associated Application Manifests, under $root/stage1/opt/stage1/$apphash +func LoadPod(root string, uuid *types.UUID) (*Pod, error) { + p := &Pod{ + Root: root, + UUID: *uuid, + Images: make(map[string]*schema.ImageManifest), + } + + buf, err := ioutil.ReadFile(common.PodManifestPath(p.Root)) + if err != nil { + return nil, fmt.Errorf("failed reading pod manifest: %v", err) + } + + pm := &schema.PodManifest{} + if err := json.Unmarshal(buf, pm); err != nil { + return nil, fmt.Errorf("failed unmarshalling pod manifest: %v", err) + } + p.Manifest = pm + + for i, app := range p.Manifest.Apps { + ampath := common.ImageManifestPath(p.Root, app.Name) + buf, err := ioutil.ReadFile(ampath) + if err != nil { + return nil, fmt.Errorf("failed reading app manifest %q: %v", ampath, err) + } + + am := &schema.ImageManifest{} + if err = json.Unmarshal(buf, am); err != nil { + return nil, fmt.Errorf("failed unmarshalling app manifest %q: %v", ampath, err) + } + + if _, ok := p.Images[app.Name.String()]; ok { + return nil, fmt.Errorf("got multiple definitions for app: %v", app.Name) + } + if app.App == nil { + p.Manifest.Apps[i].App = am.App + } + p.Images[app.Name.String()] = am + } + + return p, nil +} diff --git a/stage1/init/kvm/mount.go b/stage1/init/common/kvm_mount.go similarity index 98% rename from stage1/init/kvm/mount.go rename to stage1/init/common/kvm_mount.go index 5c6c730dcd..94e82e9757 100644 --- a/stage1/init/kvm/mount.go +++ b/stage1/init/common/kvm_mount.go @@ -33,7 +33,7 @@ // - bind mounting is realized by appToSystemdMountUnits (for each app), // which creates mount.units (bind) required and ordered before particular application // note: systemd mount units require /usr/bin/mount -package kvm +package common import ( "fmt" @@ -47,7 +47,6 @@ import ( "github.com/coreos/rkt/Godeps/_workspace/src/github.com/appc/spec/schema/types" "github.com/coreos/rkt/Godeps/_workspace/src/github.com/coreos/go-systemd/unit" "github.com/coreos/rkt/common" - initcommon "github.com/coreos/rkt/stage1/init/common" ) const ( @@ -162,7 +161,7 @@ func AppToSystemdMountUnits(root string, appName types.ACName, volumes []types.V vols[v.Name] = v } - mounts := initcommon.GenerateMounts(ra, vols) + mounts := GenerateMounts(ra, vols) for _, m := range mounts { vol := vols[m.Volume] @@ -212,7 +211,7 @@ func AppToSystemdMountUnits(root string, appName types.ACName, volumes []types.V // TODO(iaguis) when we update util-linux to 2.27, this code can go // away and we can bind-mount RO with one unit file. // http://ftp.kernel.org/pub/linux/utils/util-linux/v2.27/v2.27-ReleaseNotes - if initcommon.IsMountReadOnly(vol, app.MountPoints) { + if IsMountReadOnly(vol, app.MountPoints) { opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("Remount read-only unit for %s", wherePath)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), diff --git a/stage1/init/kvm/mount_test.go b/stage1/init/common/kvm_mount_test.go similarity index 99% rename from stage1/init/kvm/mount_test.go rename to stage1/init/common/kvm_mount_test.go index 17c4a9e3cf..98d79c47cb 100644 --- a/stage1/init/kvm/mount_test.go +++ b/stage1/init/common/kvm_mount_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package kvm +package common import ( "testing" diff --git a/stage1/init/path.go b/stage1/init/common/path.go similarity index 90% rename from stage1/init/path.go rename to stage1/init/common/path.go index 903f0bddef..3e5d78af5c 100644 --- a/stage1/init/path.go +++ b/stage1/init/common/path.go @@ -14,7 +14,7 @@ //+build linux -package main +package common import ( "path/filepath" @@ -26,9 +26,9 @@ import ( const ( envDir = "/rkt/env" // TODO(vc): perhaps this doesn't belong in /rkt? - unitsDir = "/usr/lib/systemd/system" - defaultWantsDir = unitsDir + "/default.target.wants" - socketsWantsDir = unitsDir + "/sockets.target.wants" + UnitsDir = "/usr/lib/systemd/system" + defaultWantsDir = UnitsDir + "/default.target.wants" + socketsWantsDir = UnitsDir + "/sockets.target.wants" ) // ServiceUnitName returns a systemd service unit name for the given app name. @@ -39,7 +39,7 @@ func ServiceUnitName(appName types.ACName) string { // ServiceUnitPath returns the path to the systemd service file for the given // app name. func ServiceUnitPath(root string, appName types.ACName) string { - return filepath.Join(common.Stage1RootfsPath(root), unitsDir, ServiceUnitName(appName)) + return filepath.Join(common.Stage1RootfsPath(root), UnitsDir, ServiceUnitName(appName)) } // RelEnvFilePath returns the path to the environment file for the given app name @@ -74,7 +74,7 @@ func SocketUnitName(appName types.ACName) string { // SocketUnitPath returns the path to the systemd socket file for the given app name. func SocketUnitPath(root string, appName types.ACName) string { - return filepath.Join(common.Stage1RootfsPath(root), unitsDir, SocketUnitName(appName)) + return filepath.Join(common.Stage1RootfsPath(root), UnitsDir, SocketUnitName(appName)) } // SocketWantPath returns the systemd sockets.target.wants symlink path for the diff --git a/stage1/init/pod.go b/stage1/init/common/pod.go similarity index 83% rename from stage1/init/pod.go rename to stage1/init/common/pod.go index 55b21a3ee3..24b5cd674c 100644 --- a/stage1/init/pod.go +++ b/stage1/init/common/pod.go @@ -14,11 +14,10 @@ //+build linux -package main +package common import ( "bytes" - "encoding/json" "fmt" "io" "io/ioutil" @@ -29,29 +28,19 @@ import ( "strconv" "strings" + stage1commontypes "github.com/coreos/rkt/stage1/common/types" + "github.com/coreos/rkt/Godeps/_workspace/src/github.com/appc/spec/schema" "github.com/coreos/rkt/Godeps/_workspace/src/github.com/appc/spec/schema/types" "github.com/coreos/rkt/Godeps/_workspace/src/github.com/coreos/go-systemd/unit" "github.com/coreos/rkt/common" "github.com/coreos/rkt/common/cgroup" "github.com/coreos/rkt/pkg/uid" - initcommon "github.com/coreos/rkt/stage1/init/common" - "github.com/coreos/rkt/stage1/init/kvm" ) -// Pod encapsulates a PodManifest and ImageManifests -type Pod struct { - Root string // root directory where the pod will be located - UUID types.UUID - Manifest *schema.PodManifest - Images map[string]*schema.ImageManifest - MetadataServiceURL string - Networks []string -} - const ( // Name of the file storing the pod's flavor - flavorFile = "flavor" + FlavorFile = "flavor" sharedVolPerm = os.FileMode(0755) ) @@ -65,50 +54,6 @@ var ( } ) -// LoadPod loads a Pod Manifest (as prepared by stage0) and -// its associated Application Manifests, under $root/stage1/opt/stage1/$apphash -func LoadPod(root string, uuid *types.UUID) (*Pod, error) { - p := &Pod{ - Root: root, - UUID: *uuid, - Images: make(map[string]*schema.ImageManifest), - } - - buf, err := ioutil.ReadFile(common.PodManifestPath(p.Root)) - if err != nil { - return nil, fmt.Errorf("failed reading pod manifest: %v", err) - } - - pm := &schema.PodManifest{} - if err := json.Unmarshal(buf, pm); err != nil { - return nil, fmt.Errorf("failed unmarshalling pod manifest: %v", err) - } - p.Manifest = pm - - for i, app := range p.Manifest.Apps { - ampath := common.ImageManifestPath(p.Root, app.Name) - buf, err := ioutil.ReadFile(ampath) - if err != nil { - return nil, fmt.Errorf("failed reading app manifest %q: %v", ampath, err) - } - - am := &schema.ImageManifest{} - if err = json.Unmarshal(buf, am); err != nil { - return nil, fmt.Errorf("failed unmarshalling app manifest %q: %v", ampath, err) - } - - if _, ok := p.Images[app.Name.String()]; ok { - return nil, fmt.Errorf("got multiple definitions for app: %v", app.Name) - } - if app.App == nil { - p.Manifest.Apps[i].App = am.App - } - p.Images[app.Name.String()] = am - } - - return p, nil -} - // execEscape uses Golang's string quoting for ", \, \n, and regex for special cases func execEscape(i int, str string) string { escapeMap := map[string]string{ @@ -146,7 +91,7 @@ func quoteExec(exec []string) string { return strings.Join(qexec, " ") } -func (p *Pod) WriteDefaultTarget() error { +func WriteDefaultTarget(p *stage1commontypes.Pod) error { opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", "rkt apps target"), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), @@ -159,7 +104,7 @@ func (p *Pod) WriteDefaultTarget() error { opts = append(opts, unit.NewUnitOption("Unit", "Wants", serviceName)) } - unitsPath := filepath.Join(common.Stage1RootfsPath(p.Root), unitsDir) + unitsPath := filepath.Join(common.Stage1RootfsPath(p.Root), UnitsDir) file, err := os.OpenFile(filepath.Join(unitsPath, "default.target"), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) if err != nil { return err @@ -173,7 +118,7 @@ func (p *Pod) WriteDefaultTarget() error { return nil } -func (p *Pod) WritePrepareAppTemplate() error { +func WritePrepareAppTemplate(p *stage1commontypes.Pod) error { opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", "Prepare minimum environment for chrooted applications"), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), @@ -188,7 +133,7 @@ func (p *Pod) WritePrepareAppTemplate() error { unit.NewUnitOption("Service", "CapabilityBoundingSet", "CAP_SYS_ADMIN CAP_DAC_OVERRIDE"), } - unitsPath := filepath.Join(common.Stage1RootfsPath(p.Root), unitsDir) + unitsPath := filepath.Join(common.Stage1RootfsPath(p.Root), UnitsDir) file, err := os.OpenFile(filepath.Join(unitsPath, "prepare-app@.service"), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return fmt.Errorf("failed to create service unit file: %v", err) @@ -202,7 +147,7 @@ func (p *Pod) WritePrepareAppTemplate() error { return nil } -func (p *Pod) writeAppReaper(appName string) error { +func writeAppReaper(p *stage1commontypes.Pod, appName string) error { opts := []*unit.UnitOption{ unit.NewUnitOption("Unit", "Description", fmt.Sprintf("%s Reaper", appName)), unit.NewUnitOption("Unit", "DefaultDependencies", "false"), @@ -216,7 +161,7 @@ func (p *Pod) writeAppReaper(appName string) error { unit.NewUnitOption("Service", "ExecStop", fmt.Sprintf("/reaper.sh %s", appName)), } - unitsPath := filepath.Join(common.Stage1RootfsPath(p.Root), unitsDir) + unitsPath := filepath.Join(common.Stage1RootfsPath(p.Root), UnitsDir) file, err := os.OpenFile(filepath.Join(unitsPath, fmt.Sprintf("reaper-%s.service", appName)), os.O_WRONLY|os.O_CREATE, 0644) if err != nil { return fmt.Errorf("failed to create service unit file: %v", err) @@ -239,7 +184,7 @@ func generateGidArg(gid int, supplGid []int) string { } // appToSystemd transforms the provided RuntimeApp+ImageManifest into systemd units -func (p *Pod) appToSystemd(ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error { +func appToSystemd(p *stage1commontypes.Pod, ra *schema.RuntimeApp, interactive bool, flavor string, privateUsers string) error { app := ra.App appName := ra.Name image, ok := p.Images[appName.String()] @@ -265,7 +210,7 @@ func (p *Pod) appToSystemd(ra *schema.RuntimeApp, interactive bool, flavor strin env.Set("AC_METADATA_URL", p.MetadataServiceURL) } - if err := p.writeEnvFile(env, appName, privateUsers); err != nil { + if err := writeEnvFile(p, env, appName, privateUsers); err != nil { return fmt.Errorf("unable to write environment file: %v", err) } @@ -411,14 +356,14 @@ func (p *Pod) appToSystemd(ra *schema.RuntimeApp, interactive bool, flavor strin if flavor == "kvm" { // bind mount all shared volumes from /mnt/volumeName (we don't use mechanism for bind-mounting given by nspawn) - err := kvm.AppToSystemdMountUnits(common.Stage1RootfsPath(p.Root), appName, p.Manifest.Volumes, ra, unitsDir) + err := AppToSystemdMountUnits(common.Stage1RootfsPath(p.Root), appName, p.Manifest.Volumes, ra, UnitsDir) if err != nil { return fmt.Errorf("failed to prepare mount units: %v", err) } } - if err = p.writeAppReaper(appName.String()); err != nil { + if err = writeAppReaper(p, appName.String()); err != nil { return fmt.Errorf("Failed to write app %q reaper service: %v\n", appName, err) } @@ -428,7 +373,7 @@ func (p *Pod) appToSystemd(ra *schema.RuntimeApp, interactive bool, flavor strin // writeEnvFile creates an environment file for given app name, the minimum // required environment variables by the appc spec will be set to sensible // defaults here if they're not provided by env. -func (p *Pod) writeEnvFile(env types.Environment, appName types.ACName, privateUsers string) error { +func writeEnvFile(p *stage1commontypes.Pod, env types.Environment, appName types.ACName, privateUsers string) error { ef := bytes.Buffer{} for dk, dv := range defaultEnv { @@ -462,11 +407,11 @@ func (p *Pod) writeEnvFile(env types.Environment, appName types.ACName, privateU // PodToSystemd creates the appropriate systemd service unit files for // all the constituent apps of the Pod -func (p *Pod) PodToSystemd(interactive bool, flavor string, privateUsers string) error { +func PodToSystemd(p *stage1commontypes.Pod, interactive bool, flavor string, privateUsers string) error { for i := range p.Manifest.Apps { ra := &p.Manifest.Apps[i] - if err := p.appToSystemd(ra, interactive, flavor, privateUsers); err != nil { + if err := appToSystemd(p, ra, interactive, flavor, privateUsers); err != nil { return fmt.Errorf("failed to transform app %q into systemd service: %v", ra.Name, err) } } @@ -475,7 +420,7 @@ func (p *Pod) PodToSystemd(interactive bool, flavor string, privateUsers string) // appToNspawnArgs transforms the given app manifest, with the given associated // app name, into a subset of applicable systemd-nspawn argument -func (p *Pod) appToNspawnArgs(ra *schema.RuntimeApp) ([]string, error) { +func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp) ([]string, error) { var args []string appName := ra.Name app := ra.App @@ -493,7 +438,7 @@ func (p *Pod) appToNspawnArgs(ra *schema.RuntimeApp) ([]string, error) { vols[v.Name] = v } - mounts := initcommon.GenerateMounts(ra, vols) + mounts := GenerateMounts(ra, vols) for _, m := range mounts { vol := vols[m.Volume] @@ -516,7 +461,7 @@ func (p *Pod) appToNspawnArgs(ra *schema.RuntimeApp) ([]string, error) { opt := make([]string, 4) - if initcommon.IsMountReadOnly(vol, app.MountPoints) { + if IsMountReadOnly(vol, app.MountPoints) { opt[0] = "--bind-ro=" } else { opt[0] = "--bind=" @@ -560,15 +505,15 @@ func (p *Pod) appToNspawnArgs(ra *schema.RuntimeApp) ([]string, error) { // PodToNspawnArgs renders a prepared Pod as a systemd-nspawn // argument list ready to be executed -func (p *Pod) PodToNspawnArgs() ([]string, error) { +func PodToNspawnArgs(p *stage1commontypes.Pod) ([]string, error) { args := []string{ "--uuid=" + p.UUID.String(), - "--machine=" + p.GetMachineID(), + "--machine=" + GetMachineID(p), "--directory=" + common.Stage1RootfsPath(p.Root), } for i := range p.Manifest.Apps { - aa, err := p.appToNspawnArgs(&p.Manifest.Apps[i]) + aa, err := appToNspawnArgs(p, &p.Manifest.Apps[i]) if err != nil { return nil, err } @@ -578,7 +523,7 @@ func (p *Pod) PodToNspawnArgs() ([]string, error) { return args, nil } -func (p *Pod) getFlavor() (flavor string, systemdVersion string, err error) { +func GetFlavor(p *stage1commontypes.Pod) (flavor string, systemdVersion string, err error) { flavor, err = os.Readlink(filepath.Join(common.Stage1RootfsPath(p.Root), "flavor")) if err != nil { return "", "", fmt.Errorf("unable to determine stage1 flavor: %v", err) @@ -598,7 +543,7 @@ func (p *Pod) getFlavor() (flavor string, systemdVersion string, err error) { } // GetAppHashes returns a list of hashes of the apps in this pod -func (p *Pod) GetAppHashes() []types.Hash { +func GetAppHashes(p *stage1commontypes.Pod) []types.Hash { var names []types.Hash for _, a := range p.Manifest.Apps { names = append(names, a.Image.ID) @@ -609,6 +554,6 @@ func (p *Pod) GetAppHashes() []types.Hash { // GetMachineID returns the machine id string of the pod to be passed to // systemd-nspawn -func (p *Pod) GetMachineID() string { +func GetMachineID(p *stage1commontypes.Pod) string { return "rkt-" + p.UUID.String() } diff --git a/stage1/init/pod_test.go b/stage1/init/common/pod_test.go similarity index 95% rename from stage1/init/pod_test.go rename to stage1/init/common/pod_test.go index 84bf69eb7c..84261f4801 100644 --- a/stage1/init/pod_test.go +++ b/stage1/init/common/pod_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package common import ( "io/ioutil" @@ -20,6 +20,8 @@ import ( "regexp" "testing" + stage1commontypes "github.com/coreos/rkt/stage1/common/types" + "github.com/coreos/rkt/Godeps/_workspace/src/github.com/appc/spec/schema" "github.com/coreos/rkt/Godeps/_workspace/src/github.com/appc/spec/schema/types" ) @@ -150,8 +152,8 @@ func TestAppToNspawnArgsOverridesImageManifestReadOnly(t *testing.T) { } defer os.RemoveAll(tmpDir) - p := &Pod{Manifest: podManifest, Root: tmpDir} - output, err := p.appToNspawnArgs(appManifest) + p := &stage1commontypes.Pod{Manifest: podManifest, Root: tmpDir} + output, err := appToNspawnArgs(p, appManifest) if err != nil { t.Errorf("#%d: unexpected error: `%v`", i, err) } diff --git a/stage1/init/init.go b/stage1/init/init.go index 8a0dcc1679..ac5bfca2a7 100644 --- a/stage1/init/init.go +++ b/stage1/init/init.go @@ -39,6 +39,10 @@ import ( "github.com/coreos/rkt/Godeps/_workspace/src/github.com/godbus/dbus" "github.com/coreos/rkt/Godeps/_workspace/src/github.com/godbus/dbus/introspect" + stage1common "github.com/coreos/rkt/stage1/common" + stage1commontypes "github.com/coreos/rkt/stage1/common/types" + stage1initcommon "github.com/coreos/rkt/stage1/init/common" + "github.com/coreos/rkt/common" "github.com/coreos/rkt/common/cgroup" "github.com/coreos/rkt/networking" @@ -214,12 +218,12 @@ func installAssets() error { } // getArgsEnv returns the nspawn or lkvm args and env according to the flavor used -func getArgsEnv(p *Pod, flavor string, debug bool, n *networking.Networking) ([]string, []string, error) { +func getArgsEnv(p *stage1commontypes.Pod, flavor string, debug bool, n *networking.Networking) ([]string, []string, error) { var args []string env := os.Environ() // We store the pod's flavor so we can later garbage collect it correctly - if err := os.Symlink(flavor, filepath.Join(p.Root, flavorFile)); err != nil { + if err := os.Symlink(flavor, filepath.Join(p.Root, stage1initcommon.FlavorFile)); err != nil { return nil, nil, fmt.Errorf("failed to create flavor symlink: %v", err) } @@ -285,7 +289,7 @@ func getArgsEnv(p *Pod, flavor string, debug bool, n *networking.Networking) ([] } // host volume sharing with 9p - nsargs := kvm.VolumesToKvmDiskArgs(p.Manifest.Volumes) + nsargs := stage1initcommon.VolumesToKvmDiskArgs(p.Manifest.Volumes) args = append(args, nsargs...) // lkvm requires $HOME to be defined, @@ -405,7 +409,7 @@ func getArgsEnv(p *Pod, flavor string, debug bool, n *networking.Networking) ([] args = append(args, "--private-users="+privateUsers) } - nsargs, err := p.PodToNspawnArgs() + nsargs, err := stage1initcommon.PodToNspawnArgs(p) if err != nil { return nil, nil, fmt.Errorf("failed to generate nspawn args: %v", err) } @@ -424,17 +428,7 @@ func getArgsEnv(p *Pod, flavor string, debug bool, n *networking.Networking) ([] return args, env, nil } -func withClearedCloExec(lfd int, f func() error) error { - err := sys.CloseOnExec(lfd, false) - if err != nil { - return err - } - defer sys.CloseOnExec(lfd, true) - - return f() -} - -func forwardedPorts(pod *Pod) ([]networking.ForwardedPort, error) { +func forwardedPorts(pod *stage1commontypes.Pod) ([]networking.ForwardedPort, error) { var fps []networking.ForwardedPort for _, ep := range pod.Manifest.Ports { @@ -468,22 +462,6 @@ func forwardedPorts(pod *Pod) ([]networking.ForwardedPort, error) { return fps, nil } -func writePpid(pid int) error { - // write ppid file as specified in - // Documentation/devel/stage1-implementors-guide.md - out, err := os.Getwd() - if err != nil { - return fmt.Errorf("Cannot get current working directory: %v\n", err) - } - // we are the parent of the process that is PID 1 in the container so we write our PID to "ppid" - err = ioutil.WriteFile(filepath.Join(out, "ppid"), - []byte(fmt.Sprintf("%d\n", pid)), 0644) - if err != nil { - return fmt.Errorf("Cannot write ppid file: %v\n", err) - } - return nil -} - func stage1() int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { @@ -492,7 +470,7 @@ func stage1() int { } root := "." - p, err := LoadPod(root, uuid) + p, err := stage1commontypes.LoadPod(root, uuid) if err != nil { fmt.Fprintf(os.Stderr, "Failed to load pod: %v\n", err) return 1 @@ -513,7 +491,7 @@ func stage1() int { mirrorLocalZoneInfo(p.Root) - flavor, _, err := p.getFlavor() + flavor, _, err := stage1initcommon.GetFlavor(p) if err != nil { fmt.Fprintf(os.Stderr, "Failed to get stage1 flavor: %v\n", err) return 3 @@ -558,24 +536,24 @@ func stage1() int { } } - if err = p.WriteDefaultTarget(); err != nil { + if err = stage1initcommon.WriteDefaultTarget(p); err != nil { fmt.Fprintf(os.Stderr, "Failed to write default.target: %v\n", err) return 2 } - if err = p.WritePrepareAppTemplate(); err != nil { + if err = stage1initcommon.WritePrepareAppTemplate(p); err != nil { fmt.Fprintf(os.Stderr, "Failed to write prepare-app service template: %v\n", err) return 2 } if flavor == "kvm" { - if err := p.KvmPodToSystemd(n); err != nil { + if err := KvmPodToSystemd(p, n); err != nil { fmt.Fprintf(os.Stderr, "Failed to configure systemd for kvm: %v\n", err) return 2 } } - if err = p.PodToSystemd(interactive, flavor, privateUsers); err != nil { + if err = stage1initcommon.PodToSystemd(p, interactive, flavor, privateUsers); err != nil { fmt.Fprintf(os.Stderr, "Failed to configure systemd: %v\n", err) return 2 } @@ -618,10 +596,10 @@ func stage1() int { var serviceNames []string for _, app := range p.Manifest.Apps { - serviceNames = append(serviceNames, ServiceUnitName(app.Name)) + serviceNames = append(serviceNames, stage1initcommon.ServiceUnitName(app.Name)) } s1Root := common.Stage1RootfsPath(p.Root) - machineID := p.GetMachineID() + machineID := stage1initcommon.GetMachineID(p) subcgroup, err := getContainerSubCgroup(machineID) if err == nil { if err := mountContainerCgroups(s1Root, enabledCgroups, subcgroup, serviceNames); err != nil { @@ -632,12 +610,12 @@ func stage1() int { fmt.Fprintf(os.Stderr, "Continuing with per-app isolators disabled: %v\n", err) } - if err = writePpid(os.Getpid()); err != nil { + if err = stage1common.WritePpid(os.Getpid()); err != nil { fmt.Fprintln(os.Stderr, err.Error()) return 4 } - err = withClearedCloExec(lfd, func() error { + err = stage1common.WithClearedCloExec(lfd, func() error { return syscall.Exec(args[0], args, env) }) if err != nil { diff --git a/stage1/init/kvm.go b/stage1/init/kvm.go index 3f7bfbb782..3c35777848 100644 --- a/stage1/init/kvm.go +++ b/stage1/init/kvm.go @@ -24,14 +24,17 @@ import ( "github.com/coreos/rkt/common" "github.com/coreos/rkt/networking" "github.com/coreos/rkt/stage1/init/kvm" + + stage1commontypes "github.com/coreos/rkt/stage1/common/types" + stage1initcommon "github.com/coreos/rkt/stage1/init/common" ) -func (p *Pod) KvmPodToSystemd(n *networking.Networking) error { +func KvmPodToSystemd(p *stage1commontypes.Pod, n *networking.Networking) error { podRoot := common.Stage1RootfsPath(p.Root) // networking netDescriptions := kvm.GetNetworkDescriptions(n) - if err := kvm.GenerateNetworkInterfaceUnits(filepath.Join(podRoot, unitsDir), netDescriptions); err != nil { + if err := kvm.GenerateNetworkInterfaceUnits(filepath.Join(podRoot, stage1initcommon.UnitsDir), netDescriptions); err != nil { return fmt.Errorf("failed to transform networking to units: %v", err) } @@ -44,7 +47,7 @@ func (p *Pod) KvmPodToSystemd(n *networking.Networking) error { } // mount host volumes through some remote file system e.g. 9p to /mnt/volumeName location // order is important here: podToSystemHostMountUnits prepares folders that are checked by each appToSystemdMountUnits later - if err := kvm.PodToSystemdHostMountUnits(podRoot, p.Manifest.Volumes, appNames, unitsDir); err != nil { + if err := stage1initcommon.PodToSystemdHostMountUnits(podRoot, p.Manifest.Volumes, appNames, stage1initcommon.UnitsDir); err != nil { return fmt.Errorf("failed to transform pod volumes into mount units: %v", err) } From aae2e4e719145fd192df5def839dbc81fc45c5ce Mon Sep 17 00:00:00 2001 From: Stefan Junker Date: Fri, 4 Dec 2015 00:43:18 +0100 Subject: [PATCH 3/6] stage1_fly: add gc and enter stubs --- stage1_fly/enter/main.go | 25 +++++++++++++++++++++++++ stage1_fly/gc/main.go | 25 +++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 stage1_fly/enter/main.go create mode 100644 stage1_fly/gc/main.go diff --git a/stage1_fly/enter/main.go b/stage1_fly/enter/main.go new file mode 100644 index 0000000000..24fa27b45b --- /dev/null +++ b/stage1_fly/enter/main.go @@ -0,0 +1,25 @@ +// Copyright 2015 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "log" + "os" +) + +func main() { + log.Printf("Not doing anything here! (%+v)", os.Args) + return +} diff --git a/stage1_fly/gc/main.go b/stage1_fly/gc/main.go new file mode 100644 index 0000000000..24fa27b45b --- /dev/null +++ b/stage1_fly/gc/main.go @@ -0,0 +1,25 @@ +// Copyright 2015 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "log" + "os" +) + +func main() { + log.Printf("Not doing anything here! (%+v)", os.Args) + return +} From 294d0d0a3dcbe558413cafb0e00d1ccf61f41f22 Mon Sep 17 00:00:00 2001 From: Stefan Junker Date: Thu, 3 Dec 2015 17:59:42 +0100 Subject: [PATCH 4/6] fly: add new stage1 --- configure.ac | 6 +- stage1_fly/gc/main.go | 22 ++- stage1_fly/run/main.go | 346 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 369 insertions(+), 5 deletions(-) create mode 100644 stage1_fly/run/main.go diff --git a/configure.ac b/configure.ac index 6ccc0ae7ed..03b034851c 100644 --- a/configure.ac +++ b/configure.ac @@ -59,7 +59,7 @@ AC_ARG_WITH([stage1-default-flavor], AC_ARG_WITH([stage1-flavors], [AS_HELP_STRING([--with-stage1-flavors], - [comma-separated list of stage1 flavors; choose from 'src', 'coreos', 'host', 'kvm', 'fly'; default: 'coreos,kvm'])], + [comma-separated list of stage1 flavors; choose from 'src', 'coreos', 'host', 'kvm', 'fly'; default: 'coreos,kvm,fly'])], [RKT_STAGE1_FLAVORS="${withval}"], [RKT_STAGE1_FLAVORS=auto]) @@ -153,7 +153,7 @@ AS_CASE([${RKT_STAGE1_SETUP_KIND}], dnl and set the first flavor in the list as a dnl default unless specified. [AS_VAR_IF([RKT_STAGE1_FLAVORS],[auto], - [RKT_STAGE1_FLAVORS='coreos,kvm']) + [RKT_STAGE1_FLAVORS='coreos,kvm,fly']) AS_VAR_IF([RKT_STAGE1_DEFAULT_FLAVOR],[auto], [RKT_STAGE1_DEFAULT_FLAVOR=`AS_ECHO([${RKT_STAGE1_FLAVORS}]) | cut -d, -f1`]) RKT_STAGE1_DEFAULT_NAME='' @@ -232,7 +232,7 @@ RKT_ITERATE_FLAVORS([${RKT_STAGE1_FLAVORS}],[flavor], [host], [], [fly], - [], + [AC_MSG_WARN([* fly is an experimental stage1 implementation with almost no isolation and less features])], [AC_MSG_ERROR([*** Unhandled flavor "${flavor}", should not happen])])]) dnl Validate passed default flavor, it should be one of the built diff --git a/stage1_fly/gc/main.go b/stage1_fly/gc/main.go index 24fa27b45b..763ceccb86 100644 --- a/stage1_fly/gc/main.go +++ b/stage1_fly/gc/main.go @@ -15,11 +15,29 @@ package main import ( + "flag" + "io/ioutil" "log" - "os" ) +const ( + mountinfoPath = "/proc/self/mountinfo" +) + +var ( + debug bool +) + +func init() { + flag.BoolVar(&debug, "debug", false, "Run in debug mode") +} + func main() { - log.Printf("Not doing anything here! (%+v)", os.Args) + flag.Parse() + + if !debug { + log.SetOutput(ioutil.Discard) + } + log.Printf("Not doing anything since stage0 is cleaning up the mounts") return } diff --git a/stage1_fly/run/main.go b/stage1_fly/run/main.go new file mode 100644 index 0000000000..b41a0dd344 --- /dev/null +++ b/stage1_fly/run/main.go @@ -0,0 +1,346 @@ +// Copyright 2015 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bufio" + "flag" + "fmt" + "io/ioutil" + "log" + "os" + "path/filepath" + "strings" + "syscall" + + stage1common "github.com/coreos/rkt/stage1/common" + stage1commontypes "github.com/coreos/rkt/stage1/common/types" + + "github.com/coreos/rkt/Godeps/_workspace/src/github.com/appc/spec/schema" + "github.com/coreos/rkt/Godeps/_workspace/src/github.com/appc/spec/schema/types" + + "github.com/coreos/rkt/common" + "github.com/coreos/rkt/pkg/sys" +) + +const ( + flavor = "fly" +) + +type flyMount struct { + HostPath string + TargetPrefixPath string + RelTargetPath string + Fs string + Flags uintptr +} + +type volumeMountTuple struct { + V types.Volume + M schema.Mount +} + +var ( + debug bool + + discardNetlist common.NetList + discardBool bool + discardString string +) + +func getHostMounts() (map[string]struct{}, error) { + hostMounts := map[string]struct{}{} + + mi, err := os.Open("/proc/self/mountinfo") + if err != nil { + return nil, err + } + defer mi.Close() + + sc := bufio.NewScanner(mi) + for sc.Scan() { + var ( + discard string + mountPoint string + ) + + _, err := fmt.Sscanf(sc.Text(), + "%s %s %s %s %s", + &discard, &discard, &discard, &discard, &mountPoint, + ) + if err != nil { + return nil, err + } + + hostMounts[mountPoint] = struct{}{} + } + if sc.Err() != nil { + return nil, fmt.Errorf("problem parsing mountinfo: %v", sc.Err()) + } + return hostMounts, nil +} + +func init() { + flag.BoolVar(&debug, "debug", false, "Run in debug mode") + + // The following flags need to be supported by stage1 according to + // https://github.com/coreos/rkt/blob/master/Documentation/devel/stage1-implementors-guide.md + // TODO: either implement functionality or give not implemented warnings + flag.Var(&discardNetlist, "net", "Setup networking") + flag.BoolVar(&discardBool, "interactive", true, "The pod is interactive") + flag.StringVar(&discardString, "mds-token", "", "MDS auth token") + flag.StringVar(&discardString, "local-config", common.DefaultLocalConfigDir, "Local config path") +} + +func evaluateMounts(rfs string, app string, p *stage1commontypes.Pod) ([]flyMount, error) { + imApp := p.Images[app].App + namedVolumeMounts := map[types.ACName]volumeMountTuple{} + + for _, m := range p.Manifest.Apps[0].Mounts { + _, exists := namedVolumeMounts[m.Volume] + if exists { + return nil, fmt.Errorf("duplicate mount given: %q", m.Volume) + } + namedVolumeMounts[m.Volume] = volumeMountTuple{M: m} + log.Printf("Adding %+v", namedVolumeMounts[m.Volume]) + } + + // Merge command-line Mounts with ImageManifest's MountPoints + for _, mp := range imApp.MountPoints { + tuple, exists := namedVolumeMounts[mp.Name] + switch { + case exists && tuple.M.Path != mp.Path: + return nil, fmt.Errorf("conflicting path information from mount and mountpoint %q", mp.Name) + case !exists: + namedVolumeMounts[mp.Name] = volumeMountTuple{M: schema.Mount{Volume: mp.Name, Path: mp.Path}} + log.Printf("Adding %+v", namedVolumeMounts[mp.Name]) + } + } + + // Insert the command-line Volumes + for _, v := range p.Manifest.Volumes { + // Check if we have a mount for this volume + tuple, exists := namedVolumeMounts[v.Name] + if !exists { + return nil, fmt.Errorf("missing mount for volume %q", v.Name) + } else if tuple.M.Volume != v.Name { + // assertion regarding the implementation, should never happen + return nil, fmt.Errorf("mismatched volume:mount pair: %q != %q", v.Name, tuple.M.Volume) + } + namedVolumeMounts[v.Name] = volumeMountTuple{V: v, M: tuple.M} + log.Printf("Adding %+v", namedVolumeMounts[v.Name]) + } + + // Merge command-line Volumes with ImageManifest's MountPoints + for _, mp := range imApp.MountPoints { + // Check if we have a volume for this mountpoint + tuple, exists := namedVolumeMounts[mp.Name] + if !exists || tuple.V.Name == "" { + return nil, fmt.Errorf("missing volume for mountpoint %q", mp.Name) + } + + // If empty, fill in ReadOnly bit + if tuple.V.ReadOnly == nil { + v := tuple.V + v.ReadOnly = &mp.ReadOnly + namedVolumeMounts[mp.Name] = volumeMountTuple{M: tuple.M, V: v} + log.Printf("Adding %+v", namedVolumeMounts[mp.Name]) + } + } + + // Gather host mounts which we make MS_SHARED if passed as a volume source + hostMounts, err := getHostMounts() + if err != nil { + return nil, fmt.Errorf("can't gather host mounts: %v", err) + } + + argFlyMounts := []flyMount{} + var flags uintptr = syscall.MS_BIND // TODO: allow optional | syscall.MS_REC + for _, tuple := range namedVolumeMounts { + if _, isHostMount := hostMounts[tuple.V.Source]; isHostMount { + // Mark the host mount as SHARED so the container's changes to the mount are propagated to the host + argFlyMounts = append(argFlyMounts, + flyMount{"", "", tuple.V.Source, "none", syscall.MS_REC | syscall.MS_SHARED}, + ) + } + argFlyMounts = append(argFlyMounts, + flyMount{tuple.V.Source, rfs, tuple.M.Path, "none", flags}, + ) + + if tuple.V.ReadOnly != nil && *tuple.V.ReadOnly { + argFlyMounts = append(argFlyMounts, + flyMount{"", rfs, tuple.M.Path, "none", flags | syscall.MS_REMOUNT | syscall.MS_RDONLY}, + ) + } + } + return argFlyMounts, nil +} + +func stage1() int { + uuid, err := types.NewUUID(flag.Arg(0)) + if err != nil { + fmt.Fprintf(os.Stderr, "UUID is missing or malformed\n") + return 1 + } + + root := "." + p, err := stage1commontypes.LoadPod(root, uuid) + if err != nil { + fmt.Fprintf(os.Stderr, "can't load pod: %v\n", err) + return 1 + } + + if len(p.Manifest.Apps) != 1 { + fmt.Fprintf(os.Stderr, "flavor %q only supports 1 application per Pod for now.\n", flavor) + return 1 + } + + lfd, err := common.GetRktLockFD() + if err != nil { + fmt.Fprintf(os.Stderr, "can't get rkt lock fd: %v\n", err) + return 1 + } + + // set close-on-exec flag on RKT_LOCK_FD so it gets correctly closed after execution is finished + if err := sys.CloseOnExec(lfd, true); err != nil { + fmt.Fprintf(os.Stderr, "can't set FD_CLOEXEC on rkt lock: %v\n", err) + return 1 + } + + // TODO: insert environment from manifest + env := []string{"PATH=/bin:/sbin:/usr/bin:/usr/local/bin"} + args := p.Manifest.Apps[0].App.Exec + rfs := filepath.Join(common.AppPath(p.Root, p.Manifest.Apps[0].Name), "rootfs") + + argFlyMounts, err := evaluateMounts(rfs, string(p.Manifest.Apps[0].Name), p) + if err != nil { + fmt.Fprintf(os.Stderr, "can't evaluate mounts: %v\n", err) + return 1 + } + + effectiveMounts := append( + []flyMount{ + {"", "", "/dev", "none", syscall.MS_REC | syscall.MS_SHARED}, + {"/dev", rfs, "/dev", "none", syscall.MS_BIND | syscall.MS_REC}, + + {"", "", "/proc", "none", syscall.MS_REC | syscall.MS_SHARED}, + {"/proc", rfs, "/proc", "none", syscall.MS_BIND | syscall.MS_REC}, + + {"", "", "/sys", "none", syscall.MS_REC | syscall.MS_SHARED}, + {"/sys", rfs, "/sys", "none", syscall.MS_BIND | syscall.MS_REC}, + + {"tmpfs", rfs, "/tmp", "tmpfs", 0}, + }, + argFlyMounts..., + ) + + for _, mount := range effectiveMounts { + var ( + err error + hostPathInfo os.FileInfo + targetPathInfo os.FileInfo + ) + + if strings.HasPrefix(mount.HostPath, "/") { + if hostPathInfo, err = os.Stat(mount.HostPath); err != nil { + fmt.Fprintf(os.Stderr, "stat of host directory %s: \n%v", mount.HostPath, err) + return 1 + } + } else { + hostPathInfo = nil + } + + absTargetPath := filepath.Join(mount.TargetPrefixPath, mount.RelTargetPath) + if targetPathInfo, err = os.Stat(absTargetPath); err != nil && !os.IsNotExist(err) { + fmt.Fprintf(os.Stderr, "stat of target directory %s: \n%v\n", absTargetPath, err) + return 1 + } + + switch { + case targetPathInfo == nil: + absTargetPathParent, _ := filepath.Split(absTargetPath) + if err := os.MkdirAll(absTargetPathParent, 0700); err != nil { + fmt.Fprintf(os.Stderr, "can't create directory %q: \n%v", absTargetPath, err) + return 1 + } + switch { + case hostPathInfo == nil || hostPathInfo.IsDir(): + if err := os.Mkdir(absTargetPath, 0700); err != nil { + fmt.Fprintf(os.Stderr, "can't create directory %q: \n%v", absTargetPath, err) + return 1 + } + case !hostPathInfo.IsDir(): + file, err := os.OpenFile(absTargetPath, os.O_CREATE, 0700) + if err != nil { + fmt.Fprintf(os.Stderr, "can't create file %q: \n%v\n", absTargetPath, err) + return 1 + } + file.Close() + } + case hostPathInfo != nil: + switch { + case hostPathInfo.IsDir() && !targetPathInfo.IsDir(): + fmt.Fprintf(os.Stderr, "can't mount: %q is a directory while %q is not\n", mount.HostPath, absTargetPath) + return 1 + case !hostPathInfo.IsDir() && targetPathInfo.IsDir(): + fmt.Fprintf(os.Stderr, "can't mount: %q is not a directory while %q is\n", mount.HostPath, absTargetPath) + return 1 + } + } + + if err := syscall.Mount(mount.HostPath, absTargetPath, mount.Fs, mount.Flags, ""); err != nil { + fmt.Fprintf(os.Stderr, "can't mount %q on %q with flags %v: %v\n", mount.HostPath, absTargetPath, mount.Flags, err) + return 1 + } + } + + if err = stage1common.WritePpid(os.Getpid()); err != nil { + fmt.Fprintln(os.Stderr, err.Error()) + return 4 + } + + log.Printf("Chroot to %q", rfs) + if err := syscall.Chroot(rfs); err != nil { + fmt.Fprintf(os.Stderr, "can't chroot: %v\n", err) + return 1 + } + + if err := os.Chdir("/"); err != nil { + fmt.Fprintf(os.Stderr, "can't change to root new directory: %v\n", err) + return 1 + } + + log.Printf("Execing %q in %q", args, rfs) + err = stage1common.WithClearedCloExec(lfd, func() error { + return syscall.Exec(args[0], args, env) + }) + if err != nil { + fmt.Fprintf(os.Stderr, "can't execute %q: %v\n", args[0], err) + return 7 + } + + return 0 +} + +func main() { + flag.Parse() + + if !debug { + log.SetOutput(ioutil.Discard) + } + + // move code into stage1() helper so defered fns get run + os.Exit(stage1()) +} From 41edab8b113ddadadd5cb83505ec8f776f5d5e89 Mon Sep 17 00:00:00 2001 From: Stefan Junker Date: Fri, 18 Dec 2015 13:01:31 +0100 Subject: [PATCH 5/6] docs: stage1 fly and stub for rkt fly --- Documentation/build-configure.md | 3 +- Documentation/running-fly-stage1.md | 94 +++++++++++++++++++++++++++++ Documentation/subcommands/fly.md | 6 ++ 3 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 Documentation/running-fly-stage1.md create mode 100644 Documentation/subcommands/fly.md diff --git a/Documentation/build-configure.md b/Documentation/build-configure.md index 9396673796..9807da432a 100644 --- a/Documentation/build-configure.md +++ b/Documentation/build-configure.md @@ -10,7 +10,7 @@ For a quick help of available parameters, run `./configure --help`. #### `--with-stage1-flavors` This parameter takes a comma-separated list of all the flavors that the build system should assemble. -Depending on a default stage1 image setup, this list is by default either empty or set to `coreos,kvm` for, respectively, detailed setup and flavor setup. +Depending on a default stage1 image setup, this list is by default either empty or set to `coreos,kvm,fly` for, respectively, detailed setup and flavor setup. Note that specifying this parameter does not necessarily mean that rkt will use them in the end. Available flavors are: @@ -18,6 +18,7 @@ Available flavors are: - `kvm` - it takes systemd, bash and other binaries from a CoreOS PXE image; uses lkvm - `src` - it builds systemd, takes bash from the host at build time; uses built systemd-nspawn - `host` - it takes systemd and bash from host at runtime; uses systemd-nspawn from the host +- `fly` - chroot-only approach for single-application minimal isolation containers; native Go implementation The `host` flavor is probably the best suited flavor for distributions that have strict rules about software sources. diff --git a/Documentation/running-fly-stage1.md b/Documentation/running-fly-stage1.md new file mode 100644 index 0000000000..ea9bcee337 --- /dev/null +++ b/Documentation/running-fly-stage1.md @@ -0,0 +1,94 @@ +# Running rkt with the *fly* stage1 + +The *fly* stage1 is an alternative stage1 that runs a single-application ACI with only `chroot`-isolation. + + +## Motivation + +The motivation of the fly feature is to add the ability to run applications with full privileges on the host but still benefit from the image management and discovery from rkt. +The Kubernetes kubelet is one candidate for rkt fly. + + +## How does it work? + +In comparison to the default stage1, there is no process manager involved in the stage1. + +The rkt application sets up bind mounts for `/dev`, `/proc`, `/sys`, and the user-provided volumes. +In addition to the bind mounts, An additional *tmpfs* mount is done at `/tmp`. +After the mounts are set up, rkt `chroot`s to the application's RootFS and finally executes the application. + +Here's a comparison of the default and fly stage1: + +stage1-coreos.aci: + +``` +host OS + └─ rkt + └─ systemd-nspawn + └─ systemd + └─ chroot + └─ user-app1 +``` + + +stage1-fly.aci: + +``` +host OS + └─ rkt + └─ chroot + └─ user-app1 +``` + +### Mount propagation modes +The *fly* stage1 makes use of Linux' [mount propagation modes](https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt). +If a volume source path is a mountpoint on the host, this mountpoint is made recursively shared before the host path is mounted on the target path in the container. +Hence, changes to the mounts on the target mount path inside the container will be propagated back to the host. + +The bind mounts for `/dev`, `/proc`, and `/sys` are done automatically and are recursive, because their hierarchy contains mounts which also need to be available for the container to function properly. +User provided volumes are not mounted recursively. +This is a safety measure to prevent system crashes when multiple containers are started that mount `/` into the container. + + +## Getting started + +You can either use `stage1-fly.aci` from the official release, or build rkt yourself with the right options: + +``` +$ ./autogen.sh && ./configure --with-stage1-flavors=fly && make +``` + +For more details about configure parameters, see [configure script parameters documentation](build-configure.md). +This will build the rkt binary and the stage1-fly.aci in `build-rkt-0.13.0+git/bin/`. + +### Selecting stage1 at runtime + +Curious readers can read a whole document on how to [choose which stage1.aci to use at runtime](https://github.com/coreos/rkt/blob/master/Documentation/commands.md#use-a-custom-stage-1). + +Here is a quick example of how to use a container stage1 named `stage1-fly.aci` in `/usr/local/rkt/`: +``` +# rkt run --stage1-image=/usr/local/rkt/stage1-fly.aci coreos.com/etcd:v2.0.9 +``` + + +## WARNING: missing isolation and security features + +The *fly* stage1 does **NOT** support the isolators and security features as the default stage1 does. + +Here's an incomplete list of features that are missing: +- network namespace isolation +- CPU isolators +- Memory isolators +- CAPABILITY bounding +- SELinux + +### Winning missing features back with systemd + +If you run systemd on your host, you can [wrap rkt with a systemd unit file](using-rkt-with-systemd.md#advanced-unit-file). +For more information please consult the systemd manual. + +The following should get you started: + +* [systemd.resource-control](http://www.freedesktop.org/software/systemd/man/systemd.resource-control.html) +* [systemd.directives](http://www.freedesktop.org/software/systemd/man/systemd.directives.html) + diff --git a/Documentation/subcommands/fly.md b/Documentation/subcommands/fly.md new file mode 100644 index 0000000000..866bd82e93 --- /dev/null +++ b/Documentation/subcommands/fly.md @@ -0,0 +1,6 @@ +# rkt fly + +***This subcommand does not exist yet.*** +It will be a synonym for `rkt run --stage1=/path/to/stage1-fly.aci`. + +For more information please read about [running the fly stage1](../running-fly-stage1.md). From e3e6cb6ff00d6ae9c7c1bf95d82b4b9fb05b94c3 Mon Sep 17 00:00:00 2001 From: Stefan Junker Date: Fri, 18 Dec 2015 19:22:49 +0100 Subject: [PATCH 6/6] changelog: add entry for fly stage1 --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1544394881..e97ac02c0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ## vUNRELEASED -rkt vUNRELEASED is an important release with new features like resource isolators in the kvm stage1, bug fixes and improved documentation. +rkt vUNRELEASED is an important release with new features like resource isolators in the kvm stage1, a new stage1 flavor called *fly*, bug fixes and improved documentation. The appc spec version has been updated to v0.7.4 #### New features and UX changes @@ -9,6 +9,7 @@ The appc spec version has been updated to v0.7.4 - CPU and memory resource isolators can be specified on the command line to override the limits specified in the image manifest ([#1851](https://github.com/coreos/rkt/pull/1851), [#1874](https://github.com/coreos/rkt/pull/1874)). See rkt's [overriding isolators](https://github.com/coreos/rkt/blob/master/Documentation/subcommands/run.md#overriding-isolators) documentation. - CPU and memory resource isolators can now be used within the kvm stage1 ([#1404](https://github.com/coreos/rkt/pull/1404)) - The `rkt image list` command can now display the image size ([#1865](https://github.com/coreos/rkt/pull/1865)). +- A new stage1 flavor has been added: fly; and it represents the first experimental implementation of the upcoming rkt fly feature. ([#1833](https://github.com/coreos/rkt/pull/1833)) #### Build improvements