Thanks to visit codestin.com
Credit goes to github.com

Skip to content

chore: track usage of organizations in telemetry (abandoned) #16307

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions cli/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,8 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
// This should be output before the logs start streaming.
cliui.Infof(inv.Stdout, "\n==> Logs will stream in below (press ctrl+c to gracefully exit):")

// We set this to a valid function pointer later
orgSyncEnabledFn := new(func() bool)
if vals.Telemetry.Enable {
vals, err := vals.WithoutSecrets()
if err != nil {
Expand Down Expand Up @@ -810,6 +812,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
lic.Trial = &trial
return nil
},
OrganizationSyncEnabled: orgSyncEnabledFn,
})
if err != nil {
return xerrors.Errorf("create telemetry reporter: %w", err)
Expand Down Expand Up @@ -860,6 +863,24 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
return xerrors.Errorf("create coder API: %w", err)
}

if vals.Telemetry.Enable && options.Telemetry != nil {
// We can initialize this pointer only now because the function
// depends on the coderAPI. Read the docstring on
// Options.OrganizationSyncEnabled in coderd/telemetry/telemetry.go
// for more context.
*orgSyncEnabledFn = func() bool {
// Sanity check just in case.
if coderAPI == nil || coderAPI.IDPSync == nil {
return false
}
// nolint:gocritic // AsSystemRestricted is fine here because it's a read-only operation
// used for telemetry reporting.
return coderAPI.IDPSync.OrganizationSyncEnabled(dbauthz.AsSystemRestricted(ctx), options.Database)
}

options.Telemetry.Start()
}

if vals.Prometheus.Enable {
// Agent metrics require reference to the tailnet coordinator, so must be initiated after Coder API.
closeAgentsFunc, err := prometheusmetrics.Agents(ctx, logger, options.PrometheusRegistry, coderAPI.Database, &coderAPI.TailnetCoordinator, coderAPI.DERPMap, coderAPI.Options.AgentInactiveDisconnectTimeout, 0)
Expand Down Expand Up @@ -1159,6 +1180,11 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
}
wg.Wait()

// Ensures a last report can be sent before exit!
// This should be closed before the API, as telemetry holds
// a reference to at least one of the fields of the API object.
options.Telemetry.Close()

cliui.Info(inv.Stdout, "Waiting for WebSocket connections to close..."+"\n")
_ = coderAPICloser.Close()
cliui.Info(inv.Stdout, "Done waiting for WebSocket connections"+"\n")
Expand All @@ -1171,9 +1197,6 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
cliui.Infof(inv.Stdout, "Done waiting for tunnel")
}

// Ensures a last report can be sent before exit!
options.Telemetry.Close()

// Trigger context cancellation for any remaining services.
cancel()

Expand Down
89 changes: 82 additions & 7 deletions coderd/telemetry/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/coder/coder/v2/buildinfo"
clitelemetry "github.com/coder/coder/v2/cli/telemetry"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/coderd/database/dbtime"
"github.com/coder/coder/v2/codersdk"
tailnetproto "github.com/coder/coder/v2/tailnet/proto"
Expand All @@ -53,11 +54,33 @@ type Options struct {

SnapshotFrequency time.Duration
ParseLicenseJWT func(lic *License) error

// OrganizationSyncEnabled is stored as a function pointer for two reasons:
//
// 1. It avoids a circular import with the IDPSync interface. We only need the
// OrganizationSyncEnabled() method, so passing it as a function pointer is
// simpler than including the entire interface, which would require restructuring
// packages.
//
// 2. It works with Coder's initialization order:
// - Telemetry is created first (so this pointer starts as nil).
// - Next, the IDPSync interface is created by the Coder API, which depends
// on the telemetry reporter being created first.
// - Finally, this function pointer is set to a closure that calls
// IDPSync’s OrganizationSyncEnabled method.
//
// This is extremely janky, but we'd need to refactor the entire initialization
// process to avoid it.
OrganizationSyncEnabled *func() bool
}

// New constructs a reporter for telemetry data.
// Duplicate data will be sent, it's on the server-side to index by UUID.
// Data is anonymized prior to being sent!
//
// The reporter has to be started with Start() before it will begin sending data.
// This allows for the deferred initialization of the OrganizationSyncEnabled
// function on the Options struct.
func New(options Options) (Reporter, error) {
if options.SnapshotFrequency == 0 {
// Report once every 30mins by default!
Expand All @@ -82,7 +105,6 @@ func New(options Options) (Reporter, error) {
snapshotURL: snapshotURL,
startedAt: dbtime.Now(),
}
go reporter.runSnapshotter()
return reporter, nil
}

Expand All @@ -100,13 +122,16 @@ type Reporter interface {
Report(snapshot *Snapshot)
Enabled() bool
Close()
Start()
}

type remoteReporter struct {
ctx context.Context
closed chan struct{}
closeMutex sync.Mutex
closeFunc context.CancelFunc
ctx context.Context
closed chan struct{}
closeMutex sync.Mutex
closeFunc context.CancelFunc
startedOnce sync.Once
started bool

options Options
deploymentURL,
Expand All @@ -115,8 +140,15 @@ type remoteReporter struct {
shutdownAt *time.Time
}

func (*remoteReporter) Enabled() bool {
return true
func (r *remoteReporter) Start() {
r.startedOnce.Do(func() {
r.started = true
go r.runSnapshotter()
})
}

func (r *remoteReporter) Enabled() bool {
return r.started
}

func (r *remoteReporter) Report(snapshot *Snapshot) {
Expand Down Expand Up @@ -244,6 +276,13 @@ func (r *remoteReporter) deployment() error {
return xerrors.Errorf("install source must be <=64 chars: %s", installSource)
}

idpOrgSync := false
if r.options.OrganizationSyncEnabled != nil && *r.options.OrganizationSyncEnabled != nil {
idpOrgSync = (*r.options.OrganizationSyncEnabled)()
} else {
r.options.Logger.Debug(r.ctx, "organization sync enabled function is nil, skipping IDP org sync check")
}

data, err := json.Marshal(&Deployment{
ID: r.options.DeploymentID,
Architecture: sysInfo.Architecture,
Expand All @@ -263,6 +302,7 @@ func (r *remoteReporter) deployment() error {
MachineID: sysInfo.UniqueID,
StartedAt: r.startedAt,
ShutdownAt: r.shutdownAt,
IDPOrgSync: idpOrgSync,
})
if err != nil {
return xerrors.Errorf("marshal deployment: %w", err)
Expand Down Expand Up @@ -518,6 +558,24 @@ func (r *remoteReporter) createSnapshot() (*Snapshot, error) {
}
return nil
})
eg.Go(func() error {
// Warning: When an organization is deleted, it's completely removed from
// the database. This means that if an organization is deleted, it will
// no longer be reported, and there will be no other indicator that it
// was deleted. This requires special handling when interpreting the
// telemetry data later.
// nolint:gocritic // AsSystemRestricted is fine here because it's a read-only operation
// used for telemetry reporting.
orgs, err := r.options.Database.GetOrganizations(dbauthz.AsSystemRestricted(r.ctx), database.GetOrganizationsParams{})
if err != nil {
return xerrors.Errorf("get organizations: %w", err)
}
snapshot.Organizations = make([]Organization, 0, len(orgs))
for _, org := range orgs {
snapshot.Organizations = append(snapshot.Organizations, ConvertOrganization(org))
}
return nil
})

err := eg.Wait()
if err != nil {
Expand Down Expand Up @@ -916,6 +974,14 @@ func ConvertExternalProvisioner(id uuid.UUID, tags map[string]string, provisione
}
}

func ConvertOrganization(org database.Organization) Organization {
return Organization{
ID: org.ID,
CreatedAt: org.CreatedAt,
IsDefault: org.IsDefault,
}
}

// Snapshot represents a point-in-time anonymized database dump.
// Data is aggregated by latest on the server-side, so partial data
// can be sent without issue.
Expand All @@ -942,6 +1008,7 @@ type Snapshot struct {
WorkspaceModules []WorkspaceModule `json:"workspace_modules"`
Workspaces []Workspace `json:"workspaces"`
NetworkEvents []NetworkEvent `json:"network_events"`
Organizations []Organization `json:"organizations"`
}

// Deployment contains information about the host running Coder.
Expand All @@ -964,6 +1031,7 @@ type Deployment struct {
MachineID string `json:"machine_id"`
StartedAt time.Time `json:"started_at"`
ShutdownAt *time.Time `json:"shutdown_at"`
IDPOrgSync bool `json:"idp_org_sync"`
}

type APIKey struct {
Expand Down Expand Up @@ -1457,8 +1525,15 @@ func NetworkEventFromProto(proto *tailnetproto.TelemetryEvent) (NetworkEvent, er
}, nil
}

type Organization struct {
ID uuid.UUID `json:"id"`
IsDefault bool `json:"is_default"`
CreatedAt time.Time `json:"created_at"`
}

type noopReporter struct{}

func (*noopReporter) Report(_ *Snapshot) {}
func (*noopReporter) Enabled() bool { return false }
func (*noopReporter) Close() {}
func (*noopReporter) Start() {}
51 changes: 44 additions & 7 deletions coderd/telemetry/telemetry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,33 @@ func TestTelemetry(t *testing.T) {
db := dbmem.New()

ctx := testutil.Context(t, testutil.WaitMedium)

org, err := db.GetDefaultOrganization(ctx)
require.NoError(t, err)

_, _ = dbgen.APIKey(t, db, database.APIKey{})
_ = dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
Provisioner: database.ProvisionerTypeTerraform,
StorageMethod: database.ProvisionerStorageMethodFile,
Type: database.ProvisionerJobTypeTemplateVersionDryRun,
Provisioner: database.ProvisionerTypeTerraform,
StorageMethod: database.ProvisionerStorageMethodFile,
Type: database.ProvisionerJobTypeTemplateVersionDryRun,
OrganizationID: org.ID,
})
_ = dbgen.Template(t, db, database.Template{
Provisioner: database.ProvisionerTypeTerraform,
Provisioner: database.ProvisionerTypeTerraform,
OrganizationID: org.ID,
})
sourceExampleID := uuid.NewString()
_ = dbgen.TemplateVersion(t, db, database.TemplateVersion{
SourceExampleID: sql.NullString{String: sourceExampleID, Valid: true},
OrganizationID: org.ID,
})
_ = dbgen.TemplateVersion(t, db, database.TemplateVersion{
OrganizationID: org.ID,
})
_ = dbgen.TemplateVersion(t, db, database.TemplateVersion{})
user := dbgen.User(t, db, database.User{})
_ = dbgen.Workspace(t, db, database.WorkspaceTable{})
_ = dbgen.Workspace(t, db, database.WorkspaceTable{
OrganizationID: org.ID,
})
_ = dbgen.WorkspaceApp(t, db, database.WorkspaceApp{
SharingLevel: database.AppSharingLevelOwner,
Health: database.WorkspaceAppHealthDisabled,
Expand Down Expand Up @@ -95,7 +106,9 @@ func TestTelemetry(t *testing.T) {

_ = dbgen.WorkspaceModule(t, db, database.WorkspaceModule{})

_, snapshot := collectSnapshot(t, db, nil)
deployment, snapshot := collectSnapshot(t, db, nil)
require.False(t, deployment.IDPOrgSync)

require.Len(t, snapshot.ProvisionerJobs, 1)
require.Len(t, snapshot.Licenses, 1)
require.Len(t, snapshot.Templates, 1)
Expand All @@ -112,6 +125,7 @@ func TestTelemetry(t *testing.T) {
require.Len(t, snapshot.WorkspaceAgentStats, 1)
require.Len(t, snapshot.WorkspaceProxies, 1)
require.Len(t, snapshot.WorkspaceModules, 1)
require.Len(t, snapshot.Organizations, 1)

wsa := snapshot.WorkspaceAgents[0]
require.Len(t, wsa.Subsystems, 2)
Expand All @@ -128,6 +142,28 @@ func TestTelemetry(t *testing.T) {
})
require.Equal(t, tvs[0].SourceExampleID, &sourceExampleID)
require.Nil(t, tvs[1].SourceExampleID)

for _, entity := range snapshot.Workspaces {
require.Equal(t, entity.OrganizationID, org.ID)
}
for _, entity := range snapshot.ProvisionerJobs {
require.Equal(t, entity.OrganizationID, org.ID)
}
for _, entity := range snapshot.TemplateVersions {
require.Equal(t, entity.OrganizationID, org.ID)
}
for _, entity := range snapshot.Templates {
require.Equal(t, entity.OrganizationID, org.ID)
}

deployment2, _ := collectSnapshot(t, db, func(opts telemetry.Options) telemetry.Options {
opts.OrganizationSyncEnabled = new(func() bool)
*opts.OrganizationSyncEnabled = func() bool {
return true
}
return opts
})
require.True(t, deployment2.IDPOrgSync)
})
t.Run("HashedEmail", func(t *testing.T) {
t.Parallel()
Expand Down Expand Up @@ -290,6 +326,7 @@ func collectSnapshot(t *testing.T, db database.Store, addOptionsFn func(opts tel

reporter, err := telemetry.New(options)
require.NoError(t, err)
reporter.Start()
t.Cleanup(reporter.Close)
return <-deployment, <-snapshot
}
Loading