@@ -7,6 +7,8 @@ package azureblobstorage
77import (
88 "context"
99 "fmt"
10+ "slices"
11+ "sort"
1012 "sync"
1113
1214 azruntime "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
@@ -190,41 +192,19 @@ func (s *scheduler) fetchBlobPager(batchSize int32) *azruntime.Pager[azblob.List
190192// moveToLastSeenJob, moves to the latest job position past the last seen job
191193// Jobs are stored in lexicographical order always, hence the latest position can be found either on the basis of job name or timestamp
192194func (s * scheduler ) moveToLastSeenJob (jobs []* job ) []* job {
193- var latestJobs []* job
194- jobsToReturn := make ([]* job , 0 )
195- counter := 0
196- flag := false
197- ignore := false
198-
199- for _ , job := range jobs {
200- switch {
201- case job .timestamp ().After (s .state .checkpoint ().LatestEntryTime ):
202- latestJobs = append (latestJobs , job )
203- case job .name () == s .state .checkpoint ().BlobName :
204- flag = true
205- case job .name () > s .state .checkpoint ().BlobName :
206- flag = true
207- counter --
208- case job .name () <= s .state .checkpoint ().BlobName && (! ignore ):
209- ignore = true
210- }
211- counter ++
212- }
213-
214- if flag && (counter < len (jobs )- 1 ) {
215- jobsToReturn = jobs [counter + 1 :]
216- } else if ! flag && ! ignore {
217- jobsToReturn = jobs
218- }
219-
220- // in a senario where there are some jobs which have a greater timestamp
221- // but lesser alphanumeric order and some jobs have greater alphanumeric order
222- // than the current checkpoint blob name, then we append the latest jobs
223- if len (jobsToReturn ) != len (jobs ) && len (latestJobs ) > 0 {
224- jobsToReturn = append (latestJobs , jobsToReturn ... )
225- }
195+ cp := s .state .checkpoint ()
196+ jobs = slices .DeleteFunc (jobs , func (j * job ) bool {
197+ return ! (j .timestamp ().After (cp .LatestEntryTime ) || j .name () > cp .BlobName )
198+ })
226199
227- return jobsToReturn
200+ // In a scenario where there are some jobs which have a greater timestamp
201+ // but lesser lexicographic order and some jobs have greater lexicographic order
202+ // than the current checkpoint blob name, we then sort around the pivot checkpoint
203+ // timestamp.
204+ sort .SliceStable (jobs , func (i , _ int ) bool {
205+ return jobs [i ].timestamp ().After (cp .LatestEntryTime )
206+ })
207+ return jobs
228208}
229209
230210func (s * scheduler ) isFileSelected (name string ) bool {
0 commit comments