diff --git a/src/archivist/recorder/repositories/git/dataMapper.js b/src/archivist/recorder/repositories/git/dataMapper.js index c9dadd267..8fcd3fafb 100644 --- a/src/archivist/recorder/repositories/git/dataMapper.js +++ b/src/archivist/recorder/repositories/git/dataMapper.js @@ -91,6 +91,11 @@ function generateFileName(termsType, documentId, extension) { } export function generateFilePath(serviceId, termsType, documentId, mimeType) { + // If only serviceId is provided, return a pattern to match all files for that service + if (termsType === undefined) { + return `${serviceId}/*`; + } + const extension = mime.getExtension(mimeType) || '*'; // If mime type is undefined, an asterisk is set as an extension. Used to match all files for the given service ID, terms type and document ID when mime type is unknown return `${serviceId}/${generateFileName(termsType, documentId, extension)}`; // Do not use `path.join` as even for Windows, the path should be with `/` and not `\` diff --git a/src/archivist/recorder/repositories/git/git.js b/src/archivist/recorder/repositories/git/git.js index 791c39310..a031317ae 100644 --- a/src/archivist/recorder/repositories/git/git.js +++ b/src/archivist/recorder/repositories/git/git.js @@ -68,8 +68,12 @@ export default class Git { return this.git.push(); } - listCommits(options = []) { - return this.log([ '--reverse', '--no-merges', '--name-only', ...options ]); // Returns all commits in chronological order (`--reverse`), excluding merge commits (`--no-merges`), with modified files names (`--name-only`) + listCommits(options = [], { reverse = true, skip, maxCount } = {}) { + const reverseOption = reverse ? ['--reverse'] : []; + const skipOption = skip !== undefined ? [`--skip=${skip}`] : []; + const maxCountOption = maxCount !== undefined ? [`--max-count=${maxCount}`] : []; + + return this.log([ ...reverseOption, '--author-date-order', '--no-merges', '--name-only', ...skipOption, ...maxCountOption, ...options ]); // Returns commits in chronological order with `--reverse` (oldest first) or reverse chronological without it (newest first), sorted by author date (`--author-date-order`), excluding merge commits (`--no-merges`), with modified files names (`--name-only`), with optional pagination (`--skip`, `--max-count`) } async getCommit(options) { @@ -150,4 +154,25 @@ export default class Git { async updateCommitGraph() { await this.git.raw([ 'commit-graph', 'write', '--reachable', '--changed-paths', '--append' ]); } + + async getDiffStats(commitHash) { + const output = await this.git.raw([ 'show', '--numstat', '--format=', commitHash ]); + + let additions = 0; + let deletions = 0; + + for (const line of output.trim().split('\n')) { + if (!line) { + continue; + } + + const [ added, deleted ] = line.split('\t'); + + // Binary files show '-' for additions/deletions + if (added !== '-') additions += parseInt(added, 10); + if (deleted !== '-') deletions += parseInt(deleted, 10); + } + + return { additions, deletions }; + } } diff --git a/src/archivist/recorder/repositories/git/index.js b/src/archivist/recorder/repositories/git/index.js index 5caf59948..ea2b4e2df 100644 --- a/src/archivist/recorder/repositories/git/index.js +++ b/src/archivist/recorder/repositories/git/index.js @@ -88,16 +88,89 @@ export default class GitRepository extends RepositoryInterface { return this.#toDomain(commit); } - async findAll() { - return Promise.all((await this.#getCommits()).map(commit => this.#toDomain(commit, { deferContentLoading: true }))); + async findMetadataById(recordId) { + const commit = await this.git.getCommit([recordId]); + + return this.#toDomain(commit, { deferContentLoading: true }); + } + + async findAll({ limit, offset } = {}) { + return Promise.all((await this.#getCommits({ limit, offset })).map(commit => this.#toDomain(commit, { deferContentLoading: true }))); + } + + async findByServiceAndTermsType(serviceId, termsType, { limit, offset } = {}) { + const pathPattern = DataMapper.generateFilePath(serviceId, termsType); + + return Promise.all((await this.#getCommits({ pathFilter: pathPattern, limit, offset })).map(commit => this.#toDomain(commit, { deferContentLoading: true }))); + } + + async findByService(serviceId, { limit, offset } = {}) { + const pathPattern = DataMapper.generateFilePath(serviceId); + + return Promise.all((await this.#getCommits({ pathFilter: pathPattern, limit, offset })).map(commit => this.#toDomain(commit, { deferContentLoading: true }))); + } + + async findFirst(serviceId, termsType) { + const pathPattern = DataMapper.generateFilePath(serviceId, termsType); + const commits = await this.#getCommits({ pathFilter: pathPattern, reverse: true }); + + return commits.length > 0 ? this.#toDomain(commits[0], { deferContentLoading: true }) : null; + } + + async findPrevious(versionId) { + const version = await this.findById(versionId); + + if (!version) { + return null; + } + + return this.findByDate(version.serviceId, version.termsType, new Date(version.fetchDate.getTime() - 1)); + } + + async findNext(versionId) { + const version = await this.findById(versionId); + + if (!version) { + return null; + } + + const pathPattern = DataMapper.generateFilePath(version.serviceId, version.termsType); + + // Use --ancestry-path to follow the direct lineage from versionId to HEAD + // This gets commits that are both descendants of versionId and ancestors of HEAD + const [commit] = await this.git.listCommits([ + '--ancestry-path', + `${versionId}..HEAD`, + '--', + pathPattern, + ]); + + return this.#toDomain(commit, { deferContentLoading: true }); } - async count() { - return (await this.git.log(Object.values(DataMapper.COMMIT_MESSAGE_PREFIXES).map(prefix => `--grep=${prefix}`))).length; + async count(serviceId, termsType) { + const grepOptions = Object.values(DataMapper.COMMIT_MESSAGE_PREFIXES).map(prefix => `--grep=${prefix}`); + const pathOptions = []; + + if (serviceId && termsType) { + const pathPattern = DataMapper.generateFilePath(serviceId, termsType); + + pathOptions.push('--', pathPattern); + } else if (serviceId) { + // Count all records for a service (all terms types) + const pathPattern = DataMapper.generateFilePath(serviceId); + + pathOptions.push('--', pathPattern); + } else { + // Count all records (exclude root directory files) + pathOptions.push('--', '*/*'); + } + + return (await this.git.log([ ...grepOptions, ...pathOptions ])).length; } async* iterate() { - const commits = await this.#getCommits(); + const commits = await this.#getCommits({ reverse: true }); for (const commit of commits) { yield this.#toDomain(commit); @@ -131,12 +204,43 @@ export default class GitRepository extends RepositoryInterface { record.content = pdfBuffer; } - async #getCommits() { - return (await this.git.listCommits()) - .filter(commit => // Skip non-record commits (e.g., README or LICENSE updates) - DataMapper.COMMIT_MESSAGE_PREFIXES_REGEXP.test(commit.message) // Commits generated by the engine have messages that match predefined prefixes - && path.dirname(commit.diff.files[0].file) !== '.') // Assumes one record per commit; records must be in a serviceId folder, not root - .sort((commitA, commitB) => new Date(commitA.date) - new Date(commitB.date)); // Make sure that the commits are sorted in ascending chronological order + getDiffStats(recordId) { + return this.git.getDiffStats(recordId); + } + + async #getCommits({ pathFilter, reverse = false, limit, offset } = {}) { + const grepOptions = Object.values(DataMapper.COMMIT_MESSAGE_PREFIXES).flatMap(prefix => [ '--grep', prefix ]); + const pathOptions = pathFilter + ? [ '--', pathFilter ] + : [ '--', '*/*' ]; // Exclude root directory files by only matching files in subdirectories + + const options = [ ...grepOptions, ...pathOptions ]; + + // Use git-level pagination when available + // Note: --skip and --max-count work in topological order, not chronological order + // This means pagination may not be strictly chronological, but it's acceptable for performance + const paginationOptions = {}; + + if (offset !== undefined) { + paginationOptions.skip = offset; + } + + if (limit !== undefined) { + paginationOptions.maxCount = limit; + } + + const commits = await this.git.listCommits(options, { reverse: false, ...paginationOptions }); // Get commits without git's --reverse for better performance, filtered at git level + + // Sort by date in JavaScript for accuracy - git's date ordering may not be reliable with backdated commits + // Default order is descending (newest to oldest), reverse gives ascending (oldest to newest) + commits.sort((commitA, commitB) => { + const dateA = new Date(commitA.date); + const dateB = new Date(commitB.date); + + return reverse ? dateA - dateB : dateB - dateA; + }); + + return commits; } static async writeFile({ filePath, content }) { diff --git a/src/archivist/recorder/repositories/git/index.test.js b/src/archivist/recorder/repositories/git/index.test.js index 6c7e1dea0..9dcfd2c5d 100644 --- a/src/archivist/recorder/repositories/git/index.test.js +++ b/src/archivist/recorder/repositories/git/index.test.js @@ -540,8 +540,87 @@ describe('GitRepository', () => { } }); - it('returns records in ascending order', () => { - expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]); + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE, FETCH_DATE_EARLIER ]); + }); + }); + + describe('#findByServiceAndTermsType', () => { + const expectedIds = []; + let records; + + before(async function () { + this.timeout(5000); + + const { id: id1 } = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + + expectedIds.push(id1); + + const { id: id2 } = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: `${CONTENT} - updated`, + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + + expectedIds.push(id2); + + await subject.save(new Version({ + serviceId: 'other_service', + termsType: 'Privacy Policy', + content: `${CONTENT} - other`, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + + (records = await subject.findByServiceAndTermsType(SERVICE_PROVIDER_ID, TERMS_TYPE)); + }); + + after(() => subject.removeAll()); + + it('returns only matching records', () => { + expect(records.length).to.equal(2); + }); + + it('returns Version objects', () => { + for (const record of records) { + expect(record).to.be.an.instanceof(Version); + } + }); + + it('returns records with matching service ID', () => { + for (const record of records) { + expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID); + } + }); + + it('returns records with matching terms type', () => { + for (const record of records) { + expect(record.termsType).to.equal(TERMS_TYPE); + } + }); + + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE ]); + }); + + it('returns records with correct IDs', () => { + expect(records.map(record => record.id)).to.have.members(expectedIds); + }); + + context('when no matching records exist', () => { + it('returns an empty array', async () => { + const result = await subject.findByServiceAndTermsType('non_existent_service', 'Non Existent Terms'); + + expect(result).to.be.an('array').that.is.empty; + }); }); }); @@ -582,6 +661,228 @@ describe('GitRepository', () => { it('returns the proper count', () => { expect(count).to.equal(3); }); + + context('with serviceId and termsType filters', () => { + it('returns count for specific service and terms type', async () => { + const filteredCount = await subject.count(SERVICE_PROVIDER_ID, TERMS_TYPE); + + expect(filteredCount).to.equal(3); + }); + + it('returns zero for non-existent service', async () => { + const filteredCount = await subject.count('non-existent-service', TERMS_TYPE); + + expect(filteredCount).to.equal(0); + }); + }); + + context('with only serviceId filter', () => { + it('returns count for all terms types of a service', async () => { + // Add a version with different terms type + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: 'Different Terms', + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + + const filteredCount = await subject.count(SERVICE_PROVIDER_ID); + + expect(filteredCount).to.equal(4); // 3 from TERMS_TYPE + 1 from 'Different Terms' + }); + }); + }); + + describe('#findFirst', () => { + let firstVersion; + let result; + + before(async function () { + this.timeout(5000); + + firstVersion = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'first content', + fetchDate: FETCH_DATE_EARLIER, + snapshotIds: [SNAPSHOT_ID], + })); + + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'middle content', + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'last content', + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + + result = await subject.findFirst(SERVICE_PROVIDER_ID, TERMS_TYPE); + }); + + after(() => subject.removeAll()); + + it('returns a Version object', () => { + expect(result).to.be.an.instanceof(Version); + }); + + it('returns the oldest version', () => { + expect(result.id).to.equal(firstVersion.id); + }); + + it('returns the correct fetch date', () => { + expect(result.fetchDate).to.deep.equal(firstVersion.fetchDate); + }); + + context('when no versions exist', () => { + it('returns null', async () => { + expect(await subject.findFirst('non_existent_service', 'Non Existent Terms')).to.be.null; + }); + }); + }); + + describe('#findPrevious', () => { + let firstVersion; + let middleVersion; + let lastVersion; + + before(async function () { + this.timeout(5000); + + firstVersion = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'first content', + fetchDate: FETCH_DATE_EARLIER, + snapshotIds: [SNAPSHOT_ID], + })); + + middleVersion = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'middle content', + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + + lastVersion = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'last content', + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + }); + + after(() => subject.removeAll()); + + context('when requesting previous of middle version', () => { + it('returns the version before it', async () => { + const result = await subject.findPrevious(middleVersion.id); + + expect(result).to.be.an.instanceof(Version); + expect(result.id).to.equal(firstVersion.id); + }); + }); + + context('when requesting previous of last version', () => { + it('returns the version before it', async () => { + const result = await subject.findPrevious(lastVersion.id); + + expect(result).to.be.an.instanceof(Version); + expect(result.id).to.equal(middleVersion.id); + }); + }); + + context('when requesting previous of first version', () => { + it('returns null', async () => { + const result = await subject.findPrevious(firstVersion.id); + + expect(result).to.be.null; + }); + }); + + context('when the version does not exist', () => { + it('returns null', async () => { + expect(await subject.findPrevious('non_existent_version_id')).to.be.null; + }); + }); + }); + + describe('#findNext', () => { + let firstVersion; + let middleVersion; + let lastVersion; + + before(async function () { + this.timeout(5000); + + firstVersion = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'first content', + fetchDate: FETCH_DATE_EARLIER, + snapshotIds: [SNAPSHOT_ID], + })); + + middleVersion = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'middle content', + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + + lastVersion = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'last content', + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + }); + + after(() => subject.removeAll()); + + context('when requesting next of first version', () => { + it('returns the version after it', async () => { + const result = await subject.findNext(firstVersion.id); + + expect(result).to.be.an.instanceof(Version); + expect(result.id).to.equal(middleVersion.id); + }); + }); + + context('when requesting next of middle version', () => { + it('returns the version after it', async () => { + const result = await subject.findNext(middleVersion.id); + + expect(result).to.be.an.instanceof(Version); + expect(result.id).to.equal(lastVersion.id); + }); + }); + + context('when requesting next of last version', () => { + it('returns null', async () => { + const result = await subject.findNext(lastVersion.id); + + expect(result).to.be.null; + }); + }); + + context('when the version does not exist', () => { + it('returns null', async () => { + expect(await subject.findNext('non_existent_version_id')).to.be.null; + }); + }); }); describe('#findLatest', () => { @@ -1101,8 +1402,8 @@ describe('GitRepository', () => { } }); - it('returns records in ascending order', () => { - expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]); + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE, FETCH_DATE_EARLIER ]); }); }); @@ -1462,8 +1763,8 @@ describe('GitRepository', () => { } }); - it('returns records in ascending order', () => { - expect(records.map(record => record.fetchDate)).to.deep.equal(expectedDates); + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([...expectedDates].reverse()); }); }); diff --git a/src/archivist/recorder/repositories/interface.js b/src/archivist/recorder/repositories/interface.js index 1d9270944..0dca2b5eb 100644 --- a/src/archivist/recorder/repositories/interface.js +++ b/src/archivist/recorder/repositories/interface.js @@ -69,22 +69,93 @@ class RepositoryInterface { throw new Error(`#findById method is not implemented in ${this.constructor.name}`); } + /** + * Find the metadata of the record that matches the given record ID, without loading its content + * @param {string} recordId - Record ID of the record to find + * @returns {Promise} Promise that will be resolved with the found record (without content) or null if none match the given ID + */ + async findMetadataById(recordId) { + throw new Error(`#findMetadataById method is not implemented in ${this.constructor.name}`); + } + /** * Find all records * For performance reasons, the content of the records will not be loaded by default. Use #loadRecordContent to load the content of individual records - * @see RepositoryInterface#loadRecordContent - * @returns {Promise>} Promise that will be resolved with an array of all records + * @see RepositoryInterface#loadRecordContent + * @param {object} [options] - Pagination options + * @param {number} [options.limit] - Maximum number of records to return + * @param {number} [options.offset] - Number of records to skip + * @returns {Promise>} Promise that will be resolved with an array of all records */ - async findAll() { + async findAll(options = {}) { throw new Error(`#findAll method is not implemented in ${this.constructor.name}`); } + /** + * Find all records for a specific service and terms type + * For performance reasons, the content of the records will not be loaded by default. Use #loadRecordContent to load the content of individual records + * @see RepositoryInterface#loadRecordContent + * @param {string} serviceId - Service ID of records to find + * @param {string} termsType - Terms type of records to find + * @param {object} [options] - Pagination options + * @param {number} [options.limit] - Maximum number of records to return + * @param {number} [options.offset] - Number of records to skip + * @returns {Promise>} Promise that will be resolved with an array of matching records + */ + async findByServiceAndTermsType(serviceId, termsType, options = {}) { + throw new Error(`#findByServiceAndTermsType method is not implemented in ${this.constructor.name}`); + } + + /** + * Find all records for a specific service (across all terms types) + * For performance reasons, the content of the records will not be loaded by default. Use #loadRecordContent to load the content of individual records + * @see RepositoryInterface#loadRecordContent + * @param {string} serviceId - Service ID of records to find + * @param {object} [options] - Pagination options + * @param {number} [options.limit] - Maximum number of records to return + * @param {number} [options.offset] - Number of records to skip + * @returns {Promise>} Promise that will be resolved with an array of matching records + */ + async findByService(serviceId, options = {}) { + throw new Error(`#findByService method is not implemented in ${this.constructor.name}`); + } + + /** + * Find the first (oldest) record for a specific service and terms type + * @param {string} serviceId - Service ID of record to find + * @param {string} termsType - Terms type of record to find + * @returns {Promise} Promise that will be resolved with the found record or null if none match + */ + async findFirst(serviceId, termsType) { + throw new Error(`#findFirst method is not implemented in ${this.constructor.name}`); + } + + /** + * Find the previous record (the one before the given version) + * @param {string} versionId - Version ID to find the previous record for + * @returns {Promise} Promise that will be resolved with the found record or null if none match + */ + async findPrevious(versionId) { + throw new Error(`#findPrevious method is not implemented in ${this.constructor.name}`); + } + + /** + * Find the next record (the one after the given version) + * @param {string} versionId - Version ID to find the next record for + * @returns {Promise} Promise that will be resolved with the found record or null if none match + */ + async findNext(versionId) { + throw new Error(`#findNext method is not implemented in ${this.constructor.name}`); + } + /** * Count the total number of records in the repository * For performance reasons, use this method rather than counting the number of entries returned by #findAll if you only need the size of a repository - * @returns {Promise} Promise that will be resolved with the total number of records + * @param {string} [serviceId] - Optional service ID to filter records + * @param {string} [termsType] - Optional terms type to filter records (requires serviceId) + * @returns {Promise} Promise that will be resolved with the total number of records */ - async count() { + async count(serviceId, termsType) { throw new Error(`#count method is not implemented in ${this.constructor.name}`); } @@ -114,6 +185,15 @@ class RepositoryInterface { async loadRecordContent(record) { throw new Error(`#loadRecordContent method is not implemented in ${this.constructor.name}`); } + + /** + * Get diff statistics for a specific record + * @param {string} recordId - Record ID to get diff stats for + * @returns {Promise<{additions: number, deletions: number}>} Promise that will be resolved with the diff statistics + */ + async getDiffStats(recordId) { + throw new Error(`#getDiffStats method is not implemented in ${this.constructor.name}`); + } } export default RepositoryInterface; diff --git a/src/archivist/recorder/repositories/mongo/index.js b/src/archivist/recorder/repositories/mongo/index.js index 2a4abb18c..b659e0566 100644 --- a/src/archivist/recorder/repositories/mongo/index.js +++ b/src/archivist/recorder/repositories/mongo/index.js @@ -88,13 +88,106 @@ export default class MongoRepository extends RepositoryInterface { return this.#toDomain(mongoDocument); } - async findAll() { - return Promise.all((await this.collection.find().project({ content: 0 }).sort({ fetchDate: 1 }).toArray()) + async findMetadataById(recordId) { + if (!ObjectId.isValid(recordId)) { + return null; + } + + const document = await this.collection.findOne( + { _id: ObjectId.createFromHexString(recordId) }, + { projection: { content: 0 } } + ); + + return document ? this.#toDomain(document, { deferContentLoading: true }) : null; + } + + async findAll({ limit, offset } = {}) { + let query = this.collection.find().project({ content: 0 }).sort({ fetchDate: -1 }); + + if (offset !== undefined) { + query = query.skip(offset); + } + + if (limit !== undefined) { + query = query.limit(limit); + } + + return Promise.all((await query.toArray()) + .map(mongoDocument => this.#toDomain(mongoDocument, { deferContentLoading: true }))); + } + + async findByServiceAndTermsType(serviceId, termsType, { limit, offset } = {}) { + let query = this.collection.find({ serviceId, termsType }).project({ content: 0 }).sort({ fetchDate: -1 }); + + if (offset !== undefined) { + query = query.skip(offset); + } + + if (limit !== undefined) { + query = query.limit(limit); + } + + return Promise.all((await query.toArray()) + .map(mongoDocument => this.#toDomain(mongoDocument, { deferContentLoading: true }))); + } + + async findByService(serviceId, { limit, offset } = {}) { + let query = this.collection.find({ serviceId }).project({ content: 0 }).sort({ fetchDate: -1 }); + + if (offset !== undefined) { + query = query.skip(offset); + } + + if (limit !== undefined) { + query = query.limit(limit); + } + + return Promise.all((await query.toArray()) .map(mongoDocument => this.#toDomain(mongoDocument, { deferContentLoading: true }))); } - count() { - return this.collection.countDocuments(); + async findFirst(serviceId, termsType) { + const [mongoDocument] = await this.collection.find({ serviceId, termsType }).limit(1).sort({ fetchDate: 1 }).toArray(); + + return this.#toDomain(mongoDocument, { deferContentLoading: true }); + } + + async findPrevious(versionId) { + const version = await this.findById(versionId); + + if (!version) { + return null; + } + + const [mongoDocument] = await this.collection.find({ serviceId: version.serviceId, termsType: version.termsType, fetchDate: { $lt: new Date(version.fetchDate) } }).limit(1).sort({ fetchDate: -1 }).toArray(); + + return this.#toDomain(mongoDocument, { deferContentLoading: true }); + } + + async findNext(versionId) { + const version = await this.findById(versionId); + + if (!version) { + return null; + } + + const [mongoDocument] = await this.collection.find({ serviceId: version.serviceId, termsType: version.termsType, fetchDate: { $gt: new Date(version.fetchDate) } }).limit(1).sort({ fetchDate: 1 }).toArray(); + + return this.#toDomain(mongoDocument, { deferContentLoading: true }); + } + + count(serviceId, termsType) { + const filter = {}; + + if (serviceId) { + filter.serviceId = serviceId; + } + + if (termsType) { + filter.termsType = termsType; + } + + return this.collection.countDocuments(filter); } async* iterate() { @@ -117,6 +210,11 @@ export default class MongoRepository extends RepositoryInterface { record.content = content instanceof Binary ? content.buffer : content; } + // eslint-disable-next-line no-unused-vars + async getDiffStats(recordId) { + return { additions: null, deletions: null }; // Diff stats are not available for MongoDB storage + } + async #toDomain(mongoDocument, { deferContentLoading } = {}) { if (!mongoDocument) { return null; diff --git a/src/archivist/recorder/repositories/mongo/index.test.js b/src/archivist/recorder/repositories/mongo/index.test.js index 61ecfd1d0..dfeeb9514 100644 --- a/src/archivist/recorder/repositories/mongo/index.test.js +++ b/src/archivist/recorder/repositories/mongo/index.test.js @@ -629,45 +629,383 @@ describe('MongoRepository', () => { } }); - it('returns records in ascending order', () => { - expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]); + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE, FETCH_DATE_EARLIER ]); }); }); - describe('#count', () => { - let count; + describe('#findByServiceAndTermsType', () => { + const expectedIds = []; + let records; before(async () => { - await subject.save(new Version({ + const { id: id1 } = await subject.save(new Version({ serviceId: SERVICE_PROVIDER_ID, termsType: TERMS_TYPE, content: CONTENT, fetchDate: FETCH_DATE, snapshotIds: [SNAPSHOT_ID], })); - await subject.save(new Version({ + + expectedIds.push(id1); + + const { id: id2 } = await subject.save(new Version({ serviceId: SERVICE_PROVIDER_ID, termsType: TERMS_TYPE, content: `${CONTENT} - updated`, fetchDate: FETCH_DATE_LATER, snapshotIds: [SNAPSHOT_ID], })); + + expectedIds.push(id2); + + await subject.save(new Version({ + serviceId: 'other_service', + termsType: 'Privacy Policy', + content: `${CONTENT} - other`, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + + (records = await subject.findByServiceAndTermsType(SERVICE_PROVIDER_ID, TERMS_TYPE)); + }); + + after(() => subject.removeAll()); + + it('returns only matching records', () => { + expect(records.length).to.equal(2); + }); + + it('returns Version objects', () => { + for (const record of records) { + expect(record).to.be.an.instanceof(Version); + } + }); + + it('returns records with matching service ID', () => { + for (const record of records) { + expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID); + } + }); + + it('returns records with matching terms type', () => { + for (const record of records) { + expect(record.termsType).to.equal(TERMS_TYPE); + } + }); + + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE ]); + }); + + it('returns records with correct IDs', () => { + expect(records.map(record => record.id)).to.have.members(expectedIds); + }); + + context('when no matching records exist', () => { + it('returns an empty array', async () => { + const result = await subject.findByServiceAndTermsType('non_existent_service', 'Non Existent Terms'); + + expect(result).to.be.an('array').that.is.empty; + }); + }); + }); + + describe('#count', () => { + context('without filters', () => { + let count; + + before(async () => { + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: `${CONTENT} - updated`, + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: `${CONTENT} - updated 2`, + isExtractOnly: true, + fetchDate: FETCH_DATE_EARLIER, + snapshotIds: [SNAPSHOT_ID], + })); + + (count = await subject.count()); + }); + + after(() => subject.removeAll()); + + it('returns the proper count', () => { + expect(count).to.equal(3); + }); + }); + + context('with serviceId and termsType filters', () => { + before(async () => { + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: `${CONTENT} - updated`, + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: 'other_service', + termsType: 'Privacy Policy', + content: 'Other content', + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + }); + + after(() => subject.removeAll()); + + it('returns count for specific service and terms type', async () => { + const filteredCount = await subject.count(SERVICE_PROVIDER_ID, TERMS_TYPE); + + expect(filteredCount).to.equal(2); + }); + + it('returns zero for non-existent service', async () => { + const filteredCount = await subject.count('non-existent-service', TERMS_TYPE); + + expect(filteredCount).to.equal(0); + }); + }); + + context('with only serviceId filter', () => { + before(async () => { + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: 'Different Terms', + content: 'Different content', + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ + serviceId: 'other_service', + termsType: 'Privacy Policy', + content: 'Other content', + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + }); + + after(() => subject.removeAll()); + + it('returns count for all terms types of a service', async () => { + const filteredCount = await subject.count(SERVICE_PROVIDER_ID); + + expect(filteredCount).to.equal(2); + }); + }); + }); + + describe('#findFirst', () => { + let firstVersion; + let result; + + before(async () => { + firstVersion = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'first content', + fetchDate: FETCH_DATE_EARLIER, + snapshotIds: [SNAPSHOT_ID], + })); + await subject.save(new Version({ serviceId: SERVICE_PROVIDER_ID, termsType: TERMS_TYPE, - content: `${CONTENT} - updated 2`, - isTechnicalUpgrade: true, + content: 'middle content', + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + + await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'last content', + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + + result = await subject.findFirst(SERVICE_PROVIDER_ID, TERMS_TYPE); + }); + + after(() => subject.removeAll()); + + it('returns a Version object', () => { + expect(result).to.be.an.instanceof(Version); + }); + + it('returns the oldest version', () => { + expect(result.id).to.equal(firstVersion.id); + }); + + it('returns the correct fetch date', () => { + expect(result.fetchDate).to.deep.equal(firstVersion.fetchDate); + }); + + context('when no versions exist', () => { + it('returns null', async () => { + expect(await subject.findFirst('non_existent_service', 'Non Existent Terms')).to.be.null; + }); + }); + }); + + describe('#findPrevious', () => { + let firstVersion; + let middleVersion; + let lastVersion; + + before(async () => { + firstVersion = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'first content', fetchDate: FETCH_DATE_EARLIER, snapshotIds: [SNAPSHOT_ID], })); - (count = await subject.count()); + middleVersion = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'middle content', + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + + lastVersion = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'last content', + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); }); after(() => subject.removeAll()); - it('returns the proper count', () => { - expect(count).to.equal(3); + context('when requesting previous of middle version', () => { + it('returns the version before it', async () => { + const result = await subject.findPrevious(middleVersion.id); + + expect(result).to.be.an.instanceof(Version); + expect(result.id).to.equal(firstVersion.id); + }); + }); + + context('when requesting previous of last version', () => { + it('returns the version before it', async () => { + const result = await subject.findPrevious(lastVersion.id); + + expect(result).to.be.an.instanceof(Version); + expect(result.id).to.equal(middleVersion.id); + }); + }); + + context('when requesting previous of first version', () => { + it('returns null', async () => { + const result = await subject.findPrevious(firstVersion.id); + + expect(result).to.be.null; + }); + }); + + context('when the version does not exist', () => { + it('returns null', async () => { + expect(await subject.findPrevious('non_existent_version_id')).to.be.null; + }); + }); + }); + + describe('#findNext', () => { + let firstVersion; + let middleVersion; + let lastVersion; + + before(async () => { + firstVersion = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'first content', + fetchDate: FETCH_DATE_EARLIER, + snapshotIds: [SNAPSHOT_ID], + })); + + middleVersion = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'middle content', + fetchDate: FETCH_DATE, + snapshotIds: [SNAPSHOT_ID], + })); + + lastVersion = await subject.save(new Version({ + serviceId: SERVICE_PROVIDER_ID, + termsType: TERMS_TYPE, + content: 'last content', + fetchDate: FETCH_DATE_LATER, + snapshotIds: [SNAPSHOT_ID], + })); + }); + + after(() => subject.removeAll()); + + context('when requesting next of first version', () => { + it('returns the version after it', async () => { + const result = await subject.findNext(firstVersion.id); + + expect(result).to.be.an.instanceof(Version); + expect(result.id).to.equal(middleVersion.id); + }); + }); + + context('when requesting next of middle version', () => { + it('returns the version after it', async () => { + const result = await subject.findNext(middleVersion.id); + + expect(result).to.be.an.instanceof(Version); + expect(result.id).to.equal(lastVersion.id); + }); + }); + + context('when requesting next of last version', () => { + it('returns null', async () => { + const result = await subject.findNext(lastVersion.id); + + expect(result).to.be.null; + }); + }); + + context('when the version does not exist', () => { + it('returns null', async () => { + expect(await subject.findNext('non_existent_version_id')).to.be.null; + }); }); }); @@ -1197,8 +1535,8 @@ describe('MongoRepository', () => { } }); - it('returns records in ascending order', () => { - expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]); + it('returns records in descending order', () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_LATER, FETCH_DATE, FETCH_DATE_EARLIER ]); }); }); diff --git a/src/collection-api/routes/services.js b/src/collection-api/routes/services.js index f13879d2d..e3a305858 100644 --- a/src/collection-api/routes/services.js +++ b/src/collection-api/routes/services.js @@ -9,6 +9,24 @@ import express from 'express'; * description: Services API * components: * schemas: + * ServiceListItem: + * type: object + * properties: + * id: + * type: string + * description: The ID of the service. + * name: + * type: string + * description: The name of the service. + * terms: + * type: array + * description: The declared terms types for this service. + * items: + * type: object + * properties: + * type: + * type: string + * description: The type of terms. * Service: * type: object * description: Definition of a service and the agreements its provider sets forth. While the information is the same, the format differs from the JSON declaration files that are designed for readability by contributors. @@ -51,6 +69,19 @@ import express from 'express'; * description: The names of filters to apply to the content. * items: * type: string + * ErrorResponse: + * type: object + * properties: + * error: + * type: string + * description: Error message. + * responses: + * NotFoundError: + * description: Resource not found. + * content: + * application/json: + * schema: + * $ref: '#/components/schemas/ErrorResponse' */ export default function servicesRouter(services) { const router = express.Router(); @@ -71,23 +102,7 @@ export default function servicesRouter(services) { * schema: * type: array * items: - * type: object - * properties: - * id: - * type: string - * description: The ID of the service. - * name: - * type: string - * description: The name of the service. - * terms: - * type: array - * description: The declared terms types for this service. - * items: - * type: object - * properties: - * type: - * type: string - * description: The type of terms. + * $ref: '#/components/schemas/ServiceListItem' */ router.get('/services', (req, res) => { res.status(200).json(Object.values(services).map(service => ({ @@ -127,16 +142,14 @@ export default function servicesRouter(services) { * schema: * $ref: '#/components/schemas/Service' * 404: - * description: No service matching the provided ID is found. + * $ref: '#/components/responses/NotFoundError' */ router.get('/service/:serviceId', (req, res) => { const matchedServiceID = Object.keys(services).find(key => key.toLowerCase() === req.params.serviceId?.toLowerCase()); const service = services[matchedServiceID]; if (!service) { - res.status(404).send('Service not found'); - - return; + return res.status(404).json({ error: 'Service not found' }); } res.status(200).json({ diff --git a/src/collection-api/routes/versions.js b/src/collection-api/routes/versions.js index e420f8998..27a96a379 100644 --- a/src/collection-api/routes/versions.js +++ b/src/collection-api/routes/versions.js @@ -11,25 +11,529 @@ import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js'; * name: Versions * description: Versions API * components: + * parameters: + * LimitParam: + * in: query + * name: limit + * description: | + * The maximum number of versions to return. + * + * **Note for Git storage**: Pagination uses Git's `--skip` and `--max-count` options, + * which work in topological order rather than strictly chronological order. + * This means paginated results may not be in perfect chronological sequence, + * but this is an acceptable performance trade-off. + * schema: + * type: integer + * minimum: 1 + * maximum: 500 + * default: 100 + * required: false + * OffsetParam: + * in: query + * name: offset + * description: | + * The number of versions to skip before returning results. + * + * **Note for Git storage**: Pagination uses Git's `--skip` and `--max-count` options, + * which work in topological order rather than strictly chronological order. + * schema: + * type: integer + * minimum: 0 + * default: 0 + * required: false * schemas: * Version: * type: object * description: Version content and metadata * properties: + * id: + * type: string + * description: The ID of the version. + * serviceId: + * type: string + * description: The ID of the service. + * termsType: + * type: string + * description: The type of terms. * fetchDate: * type: string * format: date-time * description: The ISO 8601 datetime string when the version was recorded. + * isFirstRecord: + * type: boolean + * description: Whether this version is the first one recorded for this service and terms type. + * content: + * type: string + * description: The JSON-escaped Markdown content of the version + * VersionListItem: + * type: object + * properties: * id: * type: string * description: The ID of the version. - * content: + * serviceId: * type: string - * description: The JSON-escaped Markdown content of the version + * description: The ID of the service. + * termsType: + * type: string + * description: The type of terms. + * fetchDate: + * type: string + * format: date-time + * description: The ISO 8601 datetime string when the version was recorded. + * isFirstRecord: + * type: boolean + * description: Whether this version is the first one recorded for this service and terms type. + * VersionListItemWithStats: + * allOf: + * - $ref: '#/components/schemas/VersionListItem' + * - type: object + * properties: + * additions: + * type: integer + * nullable: true + * description: The number of lines added in this version, or null if not available. + * deletions: + * type: integer + * nullable: true + * description: The number of lines deleted in this version, or null if not available. + * PaginatedVersionsResponse: + * type: object + * properties: + * data: + * type: array + * description: The list of versions. + * items: + * $ref: '#/components/schemas/VersionListItem' + * count: + * type: integer + * description: The total number of versions found. + * limit: + * type: integer + * description: The maximum number of versions returned in this response. + * offset: + * type: integer + * description: The number of versions skipped before returning results. + * PaginatedVersionsWithStatsResponse: + * type: object + * properties: + * data: + * type: array + * description: The list of versions with diff statistics. + * items: + * $ref: '#/components/schemas/VersionListItemWithStats' + * count: + * type: integer + * description: The total number of versions found. + * limit: + * type: integer + * description: The maximum number of versions returned in this response. + * offset: + * type: integer + * description: The number of versions skipped before returning results. + * VersionWithLinks: + * allOf: + * - $ref: '#/components/schemas/Version' + * - type: object + * properties: + * additions: + * type: integer + * nullable: true + * description: The number of lines added in this version, or null if not available. + * deletions: + * type: integer + * nullable: true + * description: The number of lines deleted in this version, or null if not available. + * fetchUrls: + * type: array + * description: The URLs of the source documents that were fetched to produce this version. + * items: + * type: string + * format: uri + * links: + * type: object + * description: Navigation links to related versions. + * properties: + * first: + * type: string + * description: The ID of the first version for this service and terms type. + * nullable: true + * prev: + * type: string + * description: The ID of the previous version, or null if this is the first. + * nullable: true + * next: + * type: string + * description: The ID of the next version, or null if this is the last. + * nullable: true + * last: + * type: string + * description: The ID of the last version for this service and terms type. + * nullable: true + * ErrorResponse: + * type: object + * properties: + * error: + * type: string + * description: Error message. + * responses: + * BadRequestError: + * description: Invalid pagination parameters. + * content: + * application/json: + * schema: + * $ref: '#/components/schemas/ErrorResponse' + * NotFoundError: + * description: Resource not found. + * content: + * application/json: + * schema: + * $ref: '#/components/schemas/ErrorResponse' */ const router = express.Router(); const versionsRepository = await RepositoryFactory.create(config.get('@opentermsarchive/engine.recorder.versions.storage')).initialize(); +const snapshotsRepository = await RepositoryFactory.create(config.get('@opentermsarchive/engine.recorder.snapshots.storage')).initialize(); + +function parsePaginationParams(query) { + const limit = query.limit ? parseInt(query.limit, 10) : 100; + const offset = query.offset ? parseInt(query.offset, 10) : 0; + + return { limit, offset }; +} + +function validatePaginationParams(limit, offset) { + if (Number.isNaN(limit) || limit < 1) { + return { error: 'Invalid limit parameter. Must be a positive integer.' }; + } + + if (limit > 500) { + return { error: 'Invalid limit parameter. Must not exceed 500.' }; + } + + if (Number.isNaN(offset) || offset < 0) { + return { error: 'Invalid offset parameter. Must be a non-negative integer.' }; + } + + return null; +} + +function mapVersionToListItem(version) { + return { + id: version.id, + serviceId: version.serviceId, + termsType: version.termsType, + fetchDate: toISODateWithoutMilliseconds(version.fetchDate), + isFirstRecord: version.isFirstRecord, + isTechnicalUpgrade: version.isTechnicalUpgrade, + }; +} + +async function getFetchUrls(snapshotIds) { + if (!snapshotIds?.length) return []; + const snapshots = await Promise.all(snapshotIds.map(id => snapshotsRepository.findMetadataById(id))); + + return snapshots + .filter(Boolean) + .map(snapshot => snapshot.metadata?.['x-source-document-location']) + .filter(Boolean); +} + +function mapVersionToDetailResponse(version, links, fetchUrls) { + return { + id: version.id, + serviceId: version.serviceId, + termsType: version.termsType, + fetchDate: toISODateWithoutMilliseconds(version.fetchDate), + content: version.content, + isFirstRecord: version.isFirstRecord, + isTechnicalUpgrade: version.isTechnicalUpgrade, + fetchUrls, + links: { + first: links.first?.id || null, + prev: links.prev?.id || null, + next: links.next?.id || null, + last: links.last?.id || null, + }, + }; +} + +/** + * @private + * @swagger + * /versions: + * get: + * summary: Get all versions. + * tags: [Versions] + * produces: + * - application/json + * parameters: + * - $ref: '#/components/parameters/LimitParam' + * - $ref: '#/components/parameters/OffsetParam' + * responses: + * 200: + * description: A JSON object containing the list of all versions and metadata. + * content: + * application/json: + * schema: + * $ref: '#/components/schemas/PaginatedVersionsResponse' + * 400: + * $ref: '#/components/responses/BadRequestError' + */ +router.get('/versions', async (req, res) => { + const { limit, offset } = parsePaginationParams(req.query); + const validationError = validatePaginationParams(limit, offset); + + if (validationError) { + return res.status(400).json(validationError); + } + + const paginatedVersions = await versionsRepository.findAll({ limit, offset }); + + const versionsList = paginatedVersions.map(mapVersionToListItem); + + const response = { + data: versionsList, + count: await versionsRepository.count(), + limit, + offset, + }; + + return res.status(200).json(response); +}); + +/** + * @private + * @swagger + * /versions/{serviceId}: + * get: + * summary: Get all versions for a specific service. + * tags: [Versions] + * produces: + * - application/json + * parameters: + * - in: path + * name: serviceId + * description: The ID of the service whose versions will be returned. + * schema: + * type: string + * required: true + * - $ref: '#/components/parameters/LimitParam' + * - $ref: '#/components/parameters/OffsetParam' + * responses: + * 200: + * description: A JSON object containing the list of versions and metadata. + * content: + * application/json: + * schema: + * $ref: '#/components/schemas/PaginatedVersionsResponse' + * 400: + * $ref: '#/components/responses/BadRequestError' + * 404: + * $ref: '#/components/responses/NotFoundError' + */ +router.get('/versions/:serviceId', async (req, res) => { + const { serviceId } = req.params; + const { limit, offset } = parsePaginationParams(req.query); + const validationError = validatePaginationParams(limit, offset); + + if (validationError) { + return res.status(400).json(validationError); + } + + const totalCount = await versionsRepository.count(serviceId); + + if (totalCount === 0) { + return res.status(404).json({ error: `No versions found for service "${serviceId}"` }); + } + + const paginatedVersions = await versionsRepository.findByService(serviceId, { limit, offset }); + + const versionsList = paginatedVersions.map(mapVersionToListItem); + + const response = { + data: versionsList, + count: totalCount, + limit, + offset, + }; + + return res.status(200).json(response); +}); + +/** + * @private + * @swagger + * /versions/{serviceId}/{termsType}: + * get: + * summary: Get all versions of some terms for a specific service. + * tags: [Versions] + * produces: + * - application/json + * parameters: + * - in: path + * name: serviceId + * description: The ID of the service whose versions will be returned. + * schema: + * type: string + * required: true + * - in: path + * name: termsType + * description: The type of terms whose versions will be returned. + * schema: + * type: string + * required: true + * - $ref: '#/components/parameters/LimitParam' + * - $ref: '#/components/parameters/OffsetParam' + * responses: + * 200: + * description: A JSON object containing the list of versions with diff statistics and metadata. + * content: + * application/json: + * schema: + * $ref: '#/components/schemas/PaginatedVersionsWithStatsResponse' + * 400: + * $ref: '#/components/responses/BadRequestError' + * 404: + * $ref: '#/components/responses/NotFoundError' + */ +router.get('/versions/:serviceId/:termsType', async (req, res) => { + const { serviceId, termsType } = req.params; + const { limit, offset } = parsePaginationParams(req.query); + const validationError = validatePaginationParams(limit, offset); + + if (validationError) { + return res.status(400).json(validationError); + } + + const totalCount = await versionsRepository.count(serviceId, termsType); + + if (totalCount === 0) { + return res.status(404).json({ error: `No versions found for service "${serviceId}" and terms type "${termsType}"` }); + } + + const paginatedVersions = await versionsRepository.findByServiceAndTermsType(serviceId, termsType, { limit, offset }); + + const versionsList = await Promise.all(paginatedVersions.map(async version => { + const stats = await versionsRepository.getDiffStats(version.id); + + return { + ...mapVersionToListItem(version), + ...stats, + }; + })); + + const response = { + data: versionsList, + count: totalCount, + limit, + offset, + }; + + return res.status(200).json(response); +}); + +/** + * @private + * @swagger + * /version/{versionId}: + * get: + * summary: Get a specific version by its ID. + * tags: [Versions] + * produces: + * - application/json + * parameters: + * - in: path + * name: versionId + * description: The ID of the version to retrieve. + * schema: + * type: string + * required: true + * responses: + * 200: + * description: A JSON object containing the version content, metadata, and navigation links. + * content: + * application/json: + * schema: + * $ref: '#/components/schemas/VersionWithLinks' + * 404: + * $ref: '#/components/responses/NotFoundError' + */ +router.get('/version/:versionId', async (req, res) => { + const { versionId } = req.params; + + const version = await versionsRepository.findById(versionId); + + if (!version) { + return res.status(404).json({ error: `No version found with ID "${versionId}"` }); + } + + const [ first, prev, next, last, stats, fetchUrls ] = await Promise.all([ + versionsRepository.findFirst(version.serviceId, version.termsType), + versionsRepository.findPrevious(versionId), + versionsRepository.findNext(versionId), + versionsRepository.findLatest(version.serviceId, version.termsType), + versionsRepository.getDiffStats(versionId), + getFetchUrls(version.snapshotIds), + ]); + + return res.status(200).json({ + ...mapVersionToDetailResponse(version, { first, prev, next, last }, fetchUrls), + ...stats, + }); +}); + +/** + * @private + * @swagger + * /version/{serviceId}/{termsType}/latest: + * get: + * summary: Get the latest version of some terms for a service. + * tags: [Versions] + * produces: + * - application/json + * parameters: + * - in: path + * name: serviceId + * description: The ID of the service whose version will be returned. + * schema: + * type: string + * required: true + * - in: path + * name: termsType + * description: The type of terms whose version will be returned. + * schema: + * type: string + * required: true + * responses: + * 200: + * description: A JSON object containing the version content, metadata, and navigation links. + * content: + * application/json: + * schema: + * $ref: '#/components/schemas/VersionWithLinks' + * 404: + * $ref: '#/components/responses/NotFoundError' + */ +router.get('/version/:serviceId/:termsType/latest', async (req, res) => { + const { serviceId, termsType } = req.params; + + const version = await versionsRepository.findLatest(serviceId, termsType); + + if (!version) { + return res.status(404).json({ error: `No version found for service "${serviceId}" and terms type "${termsType}"` }); + } + + const [ first, prev, next, last, fetchUrls ] = await Promise.all([ + versionsRepository.findFirst(version.serviceId, version.termsType), + versionsRepository.findPrevious(version.id), + versionsRepository.findNext(version.id), + versionsRepository.findLatest(version.serviceId, version.termsType), + getFetchUrls(version.snapshotIds), + ]); + + return res.status(200).json(mapVersionToDetailResponse(version, { first, prev, next, last }, fetchUrls)); +}); /** * @private @@ -62,31 +566,19 @@ const versionsRepository = await RepositoryFactory.create(config.get('@openterms * required: true * responses: * 200: - * description: A JSON object containing the version content and metadata. + * description: A JSON object containing the version content, metadata, and navigation links. * content: * application/json: * schema: - * $ref: '#/components/schemas/Version' + * $ref: '#/components/schemas/VersionWithLinks' * 404: - * description: No version found for the specified combination of service ID, terms type and date. - * content: - * application/json: - * schema: - * type: object - * properties: - * error: - * type: string - * description: Error message indicating that no version is found. + * $ref: '#/components/responses/NotFoundError' * 416: * description: The requested date is in the future. * content: * application/json: * schema: - * type: object - * properties: - * error: - * type: string - * description: Error message indicating that the requested date is in the future. + * $ref: '#/components/schemas/ErrorResponse' */ router.get('/version/:serviceId/:termsType/:date', async (req, res) => { const { serviceId, termsType, date } = req.params; @@ -99,14 +591,18 @@ router.get('/version/:serviceId/:termsType/:date', async (req, res) => { const version = await versionsRepository.findByDate(serviceId, termsType, requestedDate); if (!version) { - return res.status(404).json({ error: `No version found for date ${date}` }); + return res.status(404).json({ error: `No version found for service "${serviceId}" and terms type "${termsType}" at date ${date}` }); } - return res.status(200).json({ - id: version.id, - fetchDate: toISODateWithoutMilliseconds(version.fetchDate), - content: version.content, - }); + const [ first, prev, next, last, fetchUrls ] = await Promise.all([ + versionsRepository.findFirst(version.serviceId, version.termsType), + versionsRepository.findPrevious(version.id), + versionsRepository.findNext(version.id), + versionsRepository.findLatest(version.serviceId, version.termsType), + getFetchUrls(version.snapshotIds), + ]); + + return res.status(200).json(mapVersionToDetailResponse(version, { first, prev, next, last }, fetchUrls)); }); export default router; diff --git a/src/collection-api/routes/versions.test.js b/src/collection-api/routes/versions.test.js index aadcfe14b..b993d2958 100644 --- a/src/collection-api/routes/versions.test.js +++ b/src/collection-api/routes/versions.test.js @@ -12,52 +12,588 @@ const basePath = config.get('@opentermsarchive/engine.collection-api.basePath'); const request = supertest(app); describe('Versions API', () => { - describe('GET /version/:serviceId/:termsType/:date', () => { - let expectedResult; - let versionsRepository; - const FETCH_DATE = new Date('2023-01-01T12:00:00Z'); - const VERSION_COMMON_ATTRIBUTES = { - serviceId: 'service-1', - termsType: 'Terms of Service', - snapshotId: ['snapshot_id'], - }; + let versionsRepository; + const FETCH_DATE = new Date('2023-01-01T12:00:00Z'); + const VERSION_COMMON_ATTRIBUTES = { + serviceId: 'service-1', + termsType: 'Terms of Service', + snapshotId: ['snapshot_id'], + }; - before(async () => { - versionsRepository = RepositoryFactory.create(config.get('@opentermsarchive/engine.recorder.versions.storage')); + before(async () => { + versionsRepository = RepositoryFactory.create(config.get('@opentermsarchive/engine.recorder.versions.storage')); + await versionsRepository.initialize(); + }); + + after(() => versionsRepository.removeAll()); - await versionsRepository.initialize(); + describe('GET /versions/:serviceId/:termsType', () => { + let version1; + let version2; + let version3; + before(async () => { const ONE_HOUR = 60 * 60 * 1000; - await versionsRepository.save(new Version({ + version1 = new Version({ ...VERSION_COMMON_ATTRIBUTES, content: 'initial content', fetchDate: new Date(new Date(FETCH_DATE).getTime() - ONE_HOUR), - })); + }); + await versionsRepository.save(version1); - const version = new Version({ + version2 = new Version({ ...VERSION_COMMON_ATTRIBUTES, content: 'updated content', fetchDate: FETCH_DATE, }); + await versionsRepository.save(version2); - await versionsRepository.save(version); + version3 = new Version({ + ...VERSION_COMMON_ATTRIBUTES, + content: 'latest content', + fetchDate: new Date(new Date(FETCH_DATE).getTime() + ONE_HOUR), + }); + await versionsRepository.save(version3); await versionsRepository.save(new Version({ + serviceId: 'service-2', + termsType: 'Privacy Policy', + snapshotId: ['snapshot_id'], + content: 'other service content', + fetchDate: FETCH_DATE, + })); + }); + + let response; + + context('when versions are found', () => { + before(async () => { + response = await request.get(`${basePath}/v1/versions/service-1/Terms%20of%20Service`); + }); + + it('responds with 200 status code', () => { + expect(response.status).to.equal(200); + }); + + it('responds with Content-Type application/json', () => { + expect(response.type).to.equal('application/json'); + }); + + it('returns response with metadata structure', () => { + expect(response.body).to.have.all.keys('data', 'count', 'limit', 'offset'); + }); + + it('returns all versions for the service and terms type', () => { + expect(response.body.data).to.be.an('array').with.lengthOf(3); + }); + + it('returns correct count', () => { + expect(response.body.count).to.equal(3); + }); + + it('returns versions with id, serviceId, termsType, fetchDate, isFirstRecord, isTechnicalUpgrade, additions and deletions', () => { + response.body.data.forEach(version => { + expect(version).to.have.all.keys('id', 'serviceId', 'termsType', 'fetchDate', 'isFirstRecord', 'isTechnicalUpgrade', 'additions', 'deletions'); + expect(version).to.not.have.property('content'); + }); + }); + + it('returns versions with correct serviceId and termsType', () => { + response.body.data.forEach(version => { + expect(version.serviceId).to.equal('service-1'); + expect(version.termsType).to.equal('Terms of Service'); + }); + }); + + it('returns versions in reverse chronological order', () => { + expect(response.body.data[0].id).to.equal(version3.id); + expect(response.body.data[1].id).to.equal(version2.id); + expect(response.body.data[2].id).to.equal(version1.id); + }); + + it('returns versions with correct fetchDates', () => { + expect(response.body.data[0].fetchDate).to.equal(toISODateWithoutMilliseconds(version3.fetchDate)); + expect(response.body.data[1].fetchDate).to.equal(toISODateWithoutMilliseconds(version2.fetchDate)); + expect(response.body.data[2].fetchDate).to.equal(toISODateWithoutMilliseconds(version1.fetchDate)); + }); + }); + + context('with pagination', () => { + context('with default limit (no query parameters)', () => { + before(async () => { + response = await request.get(`${basePath}/v1/versions/service-1/Terms%20of%20Service`); + }); + + it('responds with 200 status code', () => { + expect(response.status).to.equal(200); + }); + + it('returns all versions when total is less than default limit', () => { + expect(response.body.data).to.be.an('array').with.lengthOf(3); + }); + + it('includes default pagination metadata', () => { + expect(response.body).to.have.property('limit', 100); + expect(response.body).to.have.property('offset', 0); + }); + }); + + context('with limit parameter', () => { + before(async () => { + response = await request.get(`${basePath}/v1/versions/service-1/Terms%20of%20Service?limit=2`); + }); + + it('responds with 200 status code', () => { + expect(response.status).to.equal(200); + }); + + it('returns limited number of versions', () => { + expect(response.body.data).to.be.an('array').with.lengthOf(2); + }); + + it('returns correct total count', () => { + expect(response.body.count).to.equal(3); + }); + + it('includes pagination metadata', () => { + expect(response.body).to.have.property('limit', 2); + expect(response.body).to.have.property('offset', 0); + }); + + it('returns first two versions in reverse chronological order', () => { + expect(response.body.data[0].id).to.equal(version3.id); + expect(response.body.data[1].id).to.equal(version2.id); + }); + }); + + context('with limit and offset parameters', () => { + before(async () => { + response = await request.get(`${basePath}/v1/versions/service-1/Terms%20of%20Service?limit=1&offset=1`); + }); + + it('responds with 200 status code', () => { + expect(response.status).to.equal(200); + }); + + it('returns limited number of versions starting from offset', () => { + expect(response.body.data).to.be.an('array').with.lengthOf(1); + }); + + it('returns correct total count', () => { + expect(response.body.count).to.equal(3); + }); + + it('includes pagination metadata', () => { + expect(response.body).to.have.property('limit', 1); + expect(response.body).to.have.property('offset', 1); + }); + + it('returns second version', () => { + expect(response.body.data[0].id).to.equal(version2.id); + }); + }); + + context('with only offset parameter', () => { + before(async () => { + response = await request.get(`${basePath}/v1/versions/service-1/Terms%20of%20Service?offset=1`); + }); + + it('responds with 200 status code', () => { + expect(response.status).to.equal(200); + }); + + it('returns all versions starting from offset', () => { + expect(response.body.data).to.be.an('array').with.lengthOf(2); + }); + + it('returns correct total count', () => { + expect(response.body.count).to.equal(3); + }); + + it('includes offset in metadata', () => { + expect(response.body).to.have.property('offset', 1); + }); + + it('returns last two versions', () => { + expect(response.body.data[0].id).to.equal(version2.id); + expect(response.body.data[1].id).to.equal(version1.id); + }); + }); + + context('with invalid limit parameter (too small)', () => { + before(async () => { + response = await request.get(`${basePath}/v1/versions/service-1/Terms%20of%20Service?limit=0`); + }); + + it('responds with 400 status code', () => { + expect(response.status).to.equal(400); + }); + + it('returns error message', () => { + expect(response.body).to.have.property('error'); + expect(response.body.error).to.include('Invalid limit parameter'); + }); + }); + + context('with invalid limit parameter (exceeds maximum)', () => { + before(async () => { + response = await request.get(`${basePath}/v1/versions/service-1/Terms%20of%20Service?limit=501`); + }); + + it('responds with 400 status code', () => { + expect(response.status).to.equal(400); + }); + + it('returns error message', () => { + expect(response.body).to.have.property('error'); + expect(response.body.error).to.include('Invalid limit parameter'); + expect(response.body.error).to.include('500'); + }); + }); + + context('with invalid offset parameter', () => { + before(async () => { + response = await request.get(`${basePath}/v1/versions/service-1/Terms%20of%20Service?offset=-1`); + }); + + it('responds with 400 status code', () => { + expect(response.status).to.equal(400); + }); + + it('returns error message', () => { + expect(response.body).to.have.property('error'); + expect(response.body.error).to.include('Invalid offset parameter'); + }); + }); + }); + + context('when no versions are found', () => { + before(async () => { + response = await request.get(`${basePath}/v1/versions/non-existent-service/Terms%20of%20Service`); + }); + + it('responds with 404 status code', () => { + expect(response.status).to.equal(404); + }); + + it('responds with Content-Type application/json', () => { + expect(response.type).to.equal('application/json'); + }); + + it('returns an error message', () => { + expect(response.body.error).to.contain('No versions found').and.to.contain('non-existent-service').and.to.contain('Terms of Service'); + }); + }); + }); + + describe('GET /version/:versionId', () => { + const ONE_HOUR = 60 * 60 * 1000; + let firstVersion; + let middleVersion; + let lastVersion; + + before(async () => { + await versionsRepository.removeAll(); + + firstVersion = new Version({ + ...VERSION_COMMON_ATTRIBUTES, + content: 'first content', + fetchDate: new Date(FETCH_DATE.getTime() - ONE_HOUR), + }); + await versionsRepository.save(firstVersion); + + middleVersion = new Version({ + ...VERSION_COMMON_ATTRIBUTES, + content: 'middle content', + fetchDate: FETCH_DATE, + }); + await versionsRepository.save(middleVersion); + + lastVersion = new Version({ + ...VERSION_COMMON_ATTRIBUTES, + content: 'last content', + fetchDate: new Date(FETCH_DATE.getTime() + ONE_HOUR), + }); + await versionsRepository.save(lastVersion); + }); + + let response; + + context('when requesting the first version', () => { + before(async () => { + response = await request.get(`${basePath}/v1/version/${firstVersion.id}`); + }); + + it('responds with 200 status code', () => { + expect(response.status).to.equal(200); + }); + + it('responds with Content-Type application/json', () => { + expect(response.type).to.equal('application/json'); + }); + + it('returns the version content and metadata', () => { + expect(response.body.id).to.equal(firstVersion.id); + expect(response.body.serviceId).to.equal(firstVersion.serviceId); + expect(response.body.termsType).to.equal(firstVersion.termsType); + expect(response.body.fetchDate).to.equal(toISODateWithoutMilliseconds(firstVersion.fetchDate)); + expect(response.body.content).to.equal(firstVersion.content); + }); + + it('returns fetchUrls as an empty array when no matching snapshots exist', () => { + expect(response.body.fetchUrls).to.deep.equal([]); + }); + + it('returns links object', () => { + expect(response.body.links).to.be.an('object'); + }); + + it('returns first link pointing to itself', () => { + expect(response.body.links.first).to.equal(firstVersion.id); + }); + + it('returns null for prev', () => { + expect(response.body.links.prev).to.be.null; + }); + + it('returns next link', () => { + expect(response.body.links.next).to.equal(middleVersion.id); + }); + + it('returns last link', () => { + expect(response.body.links.last).to.equal(lastVersion.id); + }); + }); + + context('when requesting a middle version', () => { + before(async () => { + response = await request.get(`${basePath}/v1/version/${middleVersion.id}`); + }); + + it('responds with 200 status code', () => { + expect(response.status).to.equal(200); + }); + + it('returns the version content and metadata', () => { + expect(response.body.id).to.equal(middleVersion.id); + expect(response.body.serviceId).to.equal(middleVersion.serviceId); + expect(response.body.termsType).to.equal(middleVersion.termsType); + expect(response.body.content).to.equal(middleVersion.content); + }); + + it('returns first link', () => { + expect(response.body.links.first).to.equal(firstVersion.id); + }); + + it('returns prev link', () => { + expect(response.body.links.prev).to.equal(firstVersion.id); + }); + + it('returns next link', () => { + expect(response.body.links.next).to.equal(lastVersion.id); + }); + + it('returns last link', () => { + expect(response.body.links.last).to.equal(lastVersion.id); + }); + }); + + context('when requesting the last version', () => { + before(async () => { + response = await request.get(`${basePath}/v1/version/${lastVersion.id}`); + }); + + it('responds with 200 status code', () => { + expect(response.status).to.equal(200); + }); + + it('returns the version content and metadata', () => { + expect(response.body.id).to.equal(lastVersion.id); + expect(response.body.serviceId).to.equal(lastVersion.serviceId); + expect(response.body.termsType).to.equal(lastVersion.termsType); + expect(response.body.content).to.equal(lastVersion.content); + }); + + it('returns first link', () => { + expect(response.body.links.first).to.equal(firstVersion.id); + }); + + it('returns prev link', () => { + expect(response.body.links.prev).to.equal(middleVersion.id); + }); + + it('returns null for next', () => { + expect(response.body.links.next).to.be.null; + }); + + it('returns last link pointing to itself', () => { + expect(response.body.links.last).to.equal(lastVersion.id); + }); + }); + + context('when the version does not exist', () => { + before(async () => { + response = await request.get(`${basePath}/v1/version/non-existent-id`); + }); + + it('responds with 404 status code', () => { + expect(response.status).to.equal(404); + }); + + it('responds with Content-Type application/json', () => { + expect(response.type).to.equal('application/json'); + }); + + it('returns an error message', () => { + expect(response.body.error).to.contain('No version found').and.to.contain('non-existent-id'); + }); + }); + }); + + describe('GET /version/:serviceId/:termsType/latest', () => { + let firstVersion; + let middleVersion; + let lastVersion; + + before(async () => { + await versionsRepository.removeAll(); + + const ONE_HOUR = 60 * 60 * 1000; + + firstVersion = await versionsRepository.save(new Version({ + ...VERSION_COMMON_ATTRIBUTES, + content: 'first content', + fetchDate: new Date(FETCH_DATE.getTime() - ONE_HOUR), + })); + + middleVersion = await versionsRepository.save(new Version({ + ...VERSION_COMMON_ATTRIBUTES, + content: 'middle content', + fetchDate: FETCH_DATE, + })); + + lastVersion = await versionsRepository.save(new Version({ + ...VERSION_COMMON_ATTRIBUTES, + content: 'last content', + fetchDate: new Date(FETCH_DATE.getTime() + ONE_HOUR), + })); + }); + + let response; + + context('when versions exist', () => { + before(async () => { + response = await request.get(`${basePath}/v1/version/service-1/Terms%20of%20Service/latest`); + }); + + it('responds with 200 status code', () => { + expect(response.status).to.equal(200); + }); + + it('responds with Content-Type application/json', () => { + expect(response.type).to.equal('application/json'); + }); + + it('returns the latest version', () => { + expect(response.body.id).to.equal(lastVersion.id); + expect(response.body.serviceId).to.equal(lastVersion.serviceId); + expect(response.body.termsType).to.equal(lastVersion.termsType); + expect(response.body.content).to.equal(lastVersion.content); + expect(response.body.fetchDate).to.equal(toISODateWithoutMilliseconds(lastVersion.fetchDate)); + }); + + it('returns fetchUrls as an empty array when no matching snapshots exist', () => { + expect(response.body.fetchUrls).to.deep.equal([]); + }); + + it('returns links object', () => { + expect(response.body.links).to.be.an('object'); + }); + + it('returns first link', () => { + expect(response.body.links.first).to.equal(firstVersion.id); + }); + + it('returns prev link', () => { + expect(response.body.links.prev).to.equal(middleVersion.id); + }); + + it('returns null for next', () => { + expect(response.body.links.next).to.be.null; + }); + + it('returns last link pointing to itself', () => { + expect(response.body.links.last).to.equal(lastVersion.id); + }); + }); + + context('when no versions exist', () => { + before(async () => { + response = await request.get(`${basePath}/v1/version/non-existent-service/Non%20Existent%20Terms/latest`); + }); + + it('responds with 404 status code', () => { + expect(response.status).to.equal(404); + }); + + it('responds with Content-Type application/json', () => { + expect(response.type).to.equal('application/json'); + }); + + it('returns an error message', () => { + expect(response.body.error).to.contain('No version found').and.to.contain('non-existent-service'); + }); + }); + }); + + describe('GET /version/:serviceId/:termsType/:date', () => { + let expectedResult; + let firstVersion; + let middleVersion; + let lastVersion; + + before(async () => { + await versionsRepository.removeAll(); + + const ONE_HOUR = 60 * 60 * 1000; + + firstVersion = await versionsRepository.save(new Version({ + ...VERSION_COMMON_ATTRIBUTES, + content: 'initial content', + fetchDate: new Date(new Date(FETCH_DATE).getTime() - ONE_HOUR), + })); + + middleVersion = await versionsRepository.save(new Version({ + ...VERSION_COMMON_ATTRIBUTES, + content: 'updated content', + fetchDate: FETCH_DATE, + })); + + lastVersion = await versionsRepository.save(new Version({ ...VERSION_COMMON_ATTRIBUTES, content: 'latest content', fetchDate: new Date(new Date(FETCH_DATE).getTime() + ONE_HOUR), })); expectedResult = { - id: version.id, - fetchDate: toISODateWithoutMilliseconds(version.fetchDate), - content: version.content, + id: middleVersion.id, + serviceId: middleVersion.serviceId, + termsType: middleVersion.termsType, + fetchDate: toISODateWithoutMilliseconds(middleVersion.fetchDate), + content: middleVersion.content, + isFirstRecord: false, + isTechnicalUpgrade: false, + fetchUrls: [], + links: { + first: firstVersion.id, + prev: firstVersion.id, + next: lastVersion.id, + last: lastVersion.id, + }, }; }); - after(() => versionsRepository.removeAll()); - let response; context('when a version is found', () => {