-
Notifications
You must be signed in to change notification settings - Fork 193
Description
Describe the bug
I've run two tests - one where I insert documents with a single field of 2500 'a' characters and another with a single field of 2500 random characters a..z and A..Z. The average document size reported for the 'a' character test is far lower than reality as it reports average document size of 80 whereas the random text field test reports average document size of 3352. It appears to report average document size after compression which is incorrect.
Expected behavior
Average document size should be actual size, not size after compression.
Environment
- Using Docker container with latest
Reproduction Steps
const { MongoClient } = require('mongodb');
const uri = process.env.MONGO_URI;
const dbName = 'testdb';
const collName = 'testcoll1';
const totalDocs = 1000000;
const batchSize = 1000;
const textFieldSize = 2500;
function randomString(fieldSize) {
const chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
let result = '';
for (let i = 0; i < fieldSize; i++) {
result += chars[Math.floor(Math.random() * chars.length)];
}
return result;
}
function fixedString(fieldSize) {
return 'a'.repeat(fieldSize);
}
async function bulkInsert(compressibleData) {
const client = new MongoClient(uri);
if (compressibleData == true) {
console.log('inserting ',totalDocs.toLocaleString(),' uncompressible documents');
} else {
console.log('inserting ', totalDocs.toLocaleString(), ' compressible documents');
}
try {
await client.connect();
const db = client.db(dbName);
const collection = db.collection(collName);
await collection.drop();
let inserted = 0;
let lastPercent = 0;
while (inserted < totalDocs) {
const bulk = collection.initializeUnorderedBulkOp();
for (let i = 0; i < batchSize; i++) {
if (compressibleData == true) {
bulk.insert({ field1: randomString(textFieldSize) });
} else {
bulk.insert({ field1: fixedString(textFieldSize) });
}
}
await bulk.execute();
inserted += batchSize;
const percent = Math.floor((inserted / totalDocs) * 100);
if ((percent !== lastPercent) && (percent % 10 === 0)) {
console.log(`${percent}% complete (${inserted.toLocaleString()} / ${totalDocs.toLocaleString()})`);
lastPercent = percent;
}
}
const stats = await db.command({ collStats: collName });
console.log('-------------------------------------------------------------------');
console.log('Average Object Size (bytes):', stats.avgObjSize.toLocaleString());
console.log('Collection Size (GB):', (stats.size / 1024 / 1024 / 1024).toLocaleString('en-US', { minimumFractionDigits: 2, maximumFractionDigits: 2 }));
console.log('Collection Storage Size (GB):', (stats.storageSize / 1024 / 1024 / 1024).toLocaleString('en-US', { minimumFractionDigits: 2, maximumFractionDigits: 2 }));
console.log('-------------------------------------------------------------------');
const sampleDoc = await collection.findOne();
console.log('Sample Document:', sampleDoc);
console.log('-------------------------------------------------------------------');
} finally {
await client.close();
}
}
(async () => {
await bulkInsert(false);
await bulkInsert(true);
})().catch(console.error);