Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6f51a2f

Browse files
committed
refactor(MergeFeedsJob): set retrieval method on feed merge; store regional merge in fs
re ibi-group/datatools-ui#544
1 parent 065d509 commit 6f51a2f

File tree

8 files changed

+121
-44
lines changed

8 files changed

+121
-44
lines changed

src/main/java/com/conveyal/datatools/manager/controllers/api/ProjectController.java

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import com.conveyal.datatools.manager.persistence.Persistence;
1717
import com.conveyal.datatools.manager.utils.json.JsonManager;
1818
import org.bson.Document;
19+
import org.eclipse.jetty.http.HttpStatus;
1920
import org.slf4j.Logger;
2021
import org.slf4j.LoggerFactory;
2122
import spark.Request;
@@ -219,21 +220,45 @@ private static Project checkProjectPermissions(Request req, Project project, Str
219220
static String mergeProjectFeeds(Request req, Response res) {
220221
Project project = requestProjectById(req, "view");
221222
Auth0UserProfile userProfile = req.attribute("user");
222-
// TODO: make this an authenticated call?
223+
if (!userProfile.canAdministerProject(project.id)) {
224+
logMessageAndHalt(req, HttpStatus.UNAUTHORIZED_401, "Must be a project admin to merge project feeds.");
225+
}
223226
Set<FeedVersion> feedVersions = new HashSet<>();
224227
// Get latest version for each feed source in project
225228
Collection<FeedSource> feedSources = project.retrieveProjectFeedSources();
226-
for (FeedSource fs : feedSources) {
227-
// check if feed version exists
228-
FeedVersion version = fs.retrieveLatest();
229+
for (FeedSource feedSource : feedSources) {
230+
if (feedSource.retrievalMethod.equals(FeedSource.FeedRetrievalMethod.REGIONAL_MERGE)) {
231+
LOG.warn("Skipping {} feed source because it contains the regionally merged feed.", feedSource.name);
232+
continue;
233+
}
234+
// Check if feed version exists.
235+
// TODO: check that version passes baseline validation checks?
236+
FeedVersion version = feedSource.retrieveLatest();
229237
if (version == null) {
230-
LOG.warn("Skipping {} because it has no feed versions", fs.name);
238+
LOG.warn("Skipping {} because it has no feed versions", feedSource.name);
231239
continue;
232240
}
233-
// modify feed version to use prepended feed id
234-
LOG.info("Adding {} feed to merged zip", fs.name);
241+
LOG.info("Adding {} feed to merged zip", feedSource.name);
235242
feedVersions.add(version);
236243
}
244+
// Check that the latest regionally merged feed does not already contain input feed versions.
245+
if (project.regionalFeedSourceId != null) {
246+
Set<String> versionIds = feedVersions.stream().map(FeedVersion::retrieveId).collect(Collectors.toSet());
247+
// Check that latest merged feed version is not a copy of what has already been merged.
248+
FeedSource regionalFeedSource = Persistence.feedSources.getById(project.regionalFeedSourceId);
249+
if (regionalFeedSource != null) {
250+
FeedVersion latest = regionalFeedSource.retrieveLatest();
251+
if (latest != null && latest.inputVersions.equals(versionIds)) {
252+
logMessageAndHalt(
253+
req,
254+
HttpStatus.BAD_REQUEST_400,
255+
"Merge feeds job aborted. Regional merge already exists for latest feed versions found in project.",
256+
null,
257+
"path/to/feedSource"
258+
);
259+
}
260+
}
261+
}
237262
MergeFeedsJob mergeFeedsJob = new MergeFeedsJob(userProfile, feedVersions, project.id, REGIONAL);
238263
DataManager.heavyExecutor.execute(mergeFeedsJob);
239264
// Return job ID to requester for monitoring job status.

src/main/java/com/conveyal/datatools/manager/jobs/MergeFeedsJob.java

Lines changed: 55 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,11 @@
55
import com.conveyal.datatools.manager.auth.Auth0UserProfile;
66
import com.conveyal.datatools.manager.gtfsplus.tables.GtfsPlusTable;
77
import com.conveyal.datatools.manager.models.FeedSource;
8+
import com.conveyal.datatools.manager.models.FeedSource.FeedRetrievalMethod;
89
import com.conveyal.datatools.manager.models.FeedVersion;
10+
import com.conveyal.datatools.manager.models.Project;
911
import com.conveyal.datatools.manager.persistence.FeedStore;
12+
import com.conveyal.datatools.manager.persistence.Persistence;
1013
import com.conveyal.gtfs.error.NewGTFSError;
1114
import com.conveyal.gtfs.error.NewGTFSErrorType;
1215
import com.conveyal.gtfs.loader.Field;
@@ -42,8 +45,9 @@
4245
import java.util.zip.ZipFile;
4346
import java.util.zip.ZipOutputStream;
4447

45-
import static com.conveyal.datatools.manager.jobs.MergeFeedsType.MTC;
48+
import static com.conveyal.datatools.manager.jobs.MergeFeedsType.SERVICE_PERIOD;
4649
import static com.conveyal.datatools.manager.jobs.MergeFeedsType.REGIONAL;
50+
import static com.conveyal.datatools.manager.models.FeedSource.FeedRetrievalMethod.REGIONAL_MERGE;
4751
import static com.conveyal.datatools.manager.utils.StringUtils.getCleanName;
4852
import static com.conveyal.gtfs.loader.DateField.GTFS_DATE_FORMATTER;
4953
import static com.conveyal.gtfs.loader.Field.getFieldIndex;
@@ -56,7 +60,7 @@
5660
* found in any other feed version. Note: There is absolutely no attempt to merge
5761
* entities based on either expected shared IDs or entity location (e.g., stop
5862
* coordinates).
59-
* - {@link MergeFeedsType#MTC}: this strategy is defined in detail at https://github.com/conveyal/datatools-server/issues/185,
63+
* - {@link MergeFeedsType#SERVICE_PERIOD}: this strategy is defined in detail at https://github.com/conveyal/datatools-server/issues/185,
6064
* but in essence, this strategy attempts to merge a current and future feed into
6165
* a combined file. For certain entities (specifically stops and routes) it uses
6266
* alternate fields as primary keys (stop_code and route_short_name) if they are
@@ -143,18 +147,39 @@ public MergeFeedsJob(Auth0UserProfile owner, Set<FeedVersion> feedVersions, Stri
143147
super(owner, mergeType.equals(REGIONAL) ? "Merging project feeds" : "Merging feed versions",
144148
JobType.MERGE_FEED_VERSIONS);
145149
this.feedVersions = feedVersions;
146-
// Grab parent feed source if performing non-regional merge (each version should share the
147-
// same feed source).
148-
this.feedSource =
149-
mergeType.equals(REGIONAL) ? null : feedVersions.iterator().next().parentFeedSource();
150150
// Construct full filename with extension
151151
this.filename = String.format("%s.zip", file);
152152
// If the merge type is regional, the file string should be equivalent to projectId, which
153153
// is used by the client to download the merged feed upon job completion.
154154
this.projectId = mergeType.equals(REGIONAL) ? file : null;
155155
this.mergeType = mergeType;
156156
// Assuming job is successful, mergedVersion will contain the resulting feed version.
157-
this.mergedVersion = mergeType.equals(REGIONAL) ? null : new FeedVersion(this.feedSource);
157+
Project project = Persistence.projects.getById(projectId);
158+
// Grab parent feed source depending on merge type.
159+
FeedSource regionalFeedSource = null;
160+
// If storing a regional merge as a new version, find the feed source designated by the project.
161+
if (mergeType.equals(REGIONAL)) {
162+
regionalFeedSource = Persistence.feedSources.getById(project.regionalFeedSourceId);
163+
// Create new feed source if this is the first regional merge.
164+
if (regionalFeedSource == null) {
165+
regionalFeedSource = new FeedSource("REGIONAL MERGE", project.id, REGIONAL_MERGE);
166+
// Store new feed source.
167+
Persistence.feedSources.create(regionalFeedSource);
168+
// Update regional feed source ID on project.
169+
project.regionalFeedSourceId = regionalFeedSource.id;
170+
Persistence.projects.replace(project.id, project);
171+
}
172+
}
173+
// Assign regional feed source or simply the first parent feed source found in the feed version list (these
174+
// should all belong to the same feed source if the merge is not regional).
175+
this.feedSource = mergeType.equals(REGIONAL)
176+
? regionalFeedSource
177+
: feedVersions.iterator().next().parentFeedSource();
178+
// Set feed source for merged version.
179+
this.mergedVersion = new FeedVersion(this.feedSource);
180+
this.mergedVersion.retrievalMethod = mergeType.equals(REGIONAL)
181+
? FeedRetrievalMethod.REGIONAL_MERGE
182+
: FeedRetrievalMethod.SERVICE_PERIOD_MERGE;
158183
this.mergeFeedsResult = new MergeFeedsResult(mergeType);
159184
}
160185

@@ -226,21 +251,22 @@ public void jobFinished() {
226251
}
227252
// Close output stream for zip file.
228253
out.close();
229-
// Handle writing file to storage (local or s3).
230254
if (mergeFeedsResult.failed) {
255+
// Fail job if the merge result indicates something went wrong.
231256
status.fail("Merging feed versions failed.");
232257
} else {
258+
// Store feed locally and (if applicable) upload regional feed to S3.
233259
storeMergedFeed();
234260
status.update(false, "Merged feed created successfully.", 100, true);
235261
}
236262
LOG.info("Feed merge is complete.");
237-
if (!mergeType.equals(REGIONAL) && !status.error && !mergeFeedsResult.failed) {
238-
// Handle the processing of the new version for non-regional merges (note: s3 upload is handled within this job).
263+
if (mergedVersion != null && !status.error && !mergeFeedsResult.failed) {
264+
mergedVersion.hash();
265+
mergedVersion.inputVersions = feedVersions.stream().map(FeedVersion::retrieveId).collect(Collectors.toSet());
266+
// Handle the processing of the new version when storing new version (note: s3 upload is handled within this job).
239267
// We must add this job in jobLogic (rather than jobFinished) because jobFinished is called after this job's
240268
// subJobs are run.
241-
ProcessSingleFeedJob processSingleFeedJob =
242-
new ProcessSingleFeedJob(mergedVersion, owner, true);
243-
addNextJob(processSingleFeedJob);
269+
addNextJob(new ProcessSingleFeedJob(mergedVersion, owner, true));
244270
}
245271
}
246272

@@ -254,8 +280,7 @@ private List<FeedToMerge> collectAndSortFeeds(Set<FeedVersion> feedVersions) {
254280
try {
255281
return new FeedToMerge(version);
256282
} catch (Exception e) {
257-
LOG.error("Could not create zip file for version {}:", version.parentFeedSource(),
258-
version.version);
283+
LOG.error("Could not create zip file for version: {}", version.version);
259284
return null;
260285
}
261286
}).filter(Objects::nonNull).filter(entry -> entry.version.validationResult != null
@@ -271,6 +296,16 @@ private List<FeedToMerge> collectAndSortFeeds(Set<FeedVersion> feedVersions) {
271296
* Otherwise, it will write to a new version.
272297
*/
273298
private void storeMergedFeed() throws IOException {
299+
if (mergedVersion != null) {
300+
// Store the zip file for the merged feed version.
301+
try {
302+
FeedVersion.feedStore.newFeed(mergedVersion.id, new FileInputStream(mergedTempFile), feedSource);
303+
} catch (IOException e) {
304+
LOG.error("Could not store merged feed for new version");
305+
throw e;
306+
}
307+
}
308+
// Write the new latest regional merge file to s3://$BUCKET/project/$PROJECT_ID.zip
274309
if (mergeType.equals(REGIONAL)) {
275310
status.update(false, "Saving merged feed.", 95);
276311
// Store the project merged zip locally or on s3
@@ -289,15 +324,6 @@ private void storeMergedFeed() throws IOException {
289324
throw e;
290325
}
291326
}
292-
} else {
293-
// Store the zip file for the merged feed version.
294-
try {
295-
FeedVersion.feedStore
296-
.newFeed(mergedVersion.id, new FileInputStream(mergedTempFile), feedSource);
297-
} catch (IOException e) {
298-
LOG.error("Could not store merged feed for new version");
299-
throw e;
300-
}
301327
}
302328
}
303329

@@ -315,7 +341,7 @@ private int constructMergedTable(Table table, List<FeedToMerge> feedsToMerge,
315341
CsvListWriter writer = new CsvListWriter(new OutputStreamWriter(out), CsvPreference.STANDARD_PREFERENCE);
316342
String keyField = table.getKeyFieldName();
317343
String orderField = table.getOrderFieldName();
318-
if (mergeType.equals(MTC)) {
344+
if (mergeType.equals(SERVICE_PERIOD) && DataManager.isExtensionEnabled("mtc")) {
319345
// MTC requires that the stop and route records be merged based on different key fields.
320346
switch (table.name) {
321347
case "stops":
@@ -389,7 +415,7 @@ private int constructMergedTable(Table table, List<FeedToMerge> feedsToMerge,
389415
// Iterate over rows in table, writing them to the out file.
390416
while (csvReader.readRecord()) {
391417
String keyValue = csvReader.get(keyFieldIndex);
392-
if (feedIndex > 0 && mergeType.equals(MTC)) {
418+
if (feedIndex > 0 && mergeType.equals(SERVICE_PERIOD)) {
393419
// Always prefer the "future" file for the feed_info table, which means
394420
// we can skip any iterations following the first one. If merging the agency
395421
// table, we should only skip the following feeds if performing an MTC merge
@@ -454,7 +480,7 @@ private int constructMergedTable(Table table, List<FeedToMerge> feedsToMerge,
454480
}
455481
fieldsFoundList = Arrays.asList(fieldsFoundInZip);
456482
}
457-
if (mergeType.equals(MTC) && table.name.equals("stops")) {
483+
if (mergeType.equals(SERVICE_PERIOD) && table.name.equals("stops")) {
458484
// For the first line of the stops table, check that the alt. key
459485
// field (stop_code) is present. If it is not, revert to the original
460486
// key field. This is only pertinent for the MTC merge type.
@@ -541,7 +567,7 @@ private int constructMergedTable(Table table, List<FeedToMerge> feedsToMerge,
541567
// reference tracker will get far too large if we attempt to use it to
542568
// track references for a large number of feeds (e.g., every feed in New
543569
// York State).
544-
if (mergeType.equals(MTC)) {
570+
if (mergeType.equals(SERVICE_PERIOD)) {
545571
Set<NewGTFSError> idErrors;
546572
// If analyzing the second feed (non-future feed), the service_id always gets feed scoped.
547573
// See https://github.com/ibi-group/datatools-server/issues/244
@@ -834,7 +860,7 @@ private int constructMergedTable(Table table, List<FeedToMerge> feedsToMerge,
834860
rowValues[specFieldIndex] = valueToWrite;
835861
} // End of iteration over each field for a row.
836862
// Do not write rows that are designated to be skipped.
837-
if (skipRecord && this.mergeType.equals(MTC)) {
863+
if (skipRecord && this.mergeType.equals(SERVICE_PERIOD)) {
838864
mergeFeedsResult.recordsSkipCount++;
839865
continue;
840866
}

src/main/java/com/conveyal/datatools/manager/jobs/MergeFeedsType.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22

33
public enum MergeFeedsType {
44
REGIONAL,
5-
MTC
5+
SERVICE_PERIOD
66
}

src/main/java/com/conveyal/datatools/manager/models/FeedSource.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,13 @@ public FeedSource (String name) {
135135
this.retrievalMethod = FeedRetrievalMethod.MANUALLY_UPLOADED;
136136
}
137137

138+
public FeedSource (String name, String projectId, FeedRetrievalMethod retrievalMethod) {
139+
super();
140+
this.name = name;
141+
this.projectId = projectId;
142+
this.retrievalMethod = retrievalMethod;
143+
}
144+
138145
/**
139146
* No-arg constructor to yield an uninitialized feed source, for dump/restore.
140147
* Should not be used in general code.
@@ -526,12 +533,18 @@ public void renumberSnapshots() {
526533
}
527534

528535
/**
529-
* Represents ways feeds can be retrieved
536+
* Represents ways feeds can be retrieved. Note: this enum was originally developed for feed sources, which were
537+
* limited to a single retrieval method per source; however, use of this software has evolved in such a way that
538+
* supports GTFS data for a single feed source to be retrieved in a multitude of ways, including: fetching via URL,
539+
* uploading manually, creating with the editor, or transforming in some way (e.g., merging multiple versions or
540+
* post-processing a single version).
530541
*/
531542
public enum FeedRetrievalMethod {
532543
FETCHED_AUTOMATICALLY, // automatically retrieved over HTTP on some regular basis
533544
MANUALLY_UPLOADED, // manually uploaded by someone, perhaps the agency, or perhaps an internal user
534-
PRODUCED_IN_HOUSE // produced in-house in a GTFS Editor instance
545+
PRODUCED_IN_HOUSE, // produced in-house in a GTFS Editor instance
546+
REGIONAL_MERGE,
547+
SERVICE_PERIOD_MERGE
535548
}
536549

537550
/**

src/main/java/com/conveyal/datatools/manager/models/FeedVersion.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.text.SimpleDateFormat;
3030
import java.time.LocalDate;
3131
import java.util.Date;
32+
import java.util.Set;
3233
import java.util.UUID;
3334

3435
import static com.conveyal.datatools.manager.utils.StringUtils.getCleanName;
@@ -50,6 +51,10 @@ public class FeedVersion extends Model implements Serializable {
5051
private static final Logger LOG = LoggerFactory.getLogger(FeedVersion.class);
5152
// FIXME: move this out of FeedVersion (also, it should probably not be public)?
5253
public static FeedStore feedStore = new FeedStore();
54+
/**
55+
* Input feed versions used to create a merged version.
56+
*/
57+
public Set<String> inputVersions;
5358

5459
/**
5560
* We generate IDs manually, but we need a bit of information to do so

src/main/java/com/conveyal/datatools/manager/models/Model.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ public Model () {
3939

4040
public String id;
4141

42+
public String retrieveId() { return this.id; }
43+
4244
// FIXME: should this be stored here? Should we use lastUpdated as a nonce to protect against race conditions in DB
4345
// writes?
4446
public Date lastUpdated;

src/main/java/com/conveyal/datatools/manager/models/Project.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ public List<OtpServer> availableOtpServers() {
6767
// the "pinned" deployment are shown and compared to the most recent version in the feed sources.
6868
public String pinnedDeploymentId;
6969

70+
/**
71+
* Feed source in which to store regionally merged GTFS feeds. If specified, during a regional feed merge all feeds
72+
* in the project will be merged except for the feed source defined here.
73+
*/
74+
public String regionalFeedSourceId;
75+
7076
public Project() {
7177
this.buildConfig = new OtpBuildConfig();
7278
this.routerConfig = new OtpRouterConfig();

0 commit comments

Comments
 (0)