Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Inaccurate average object size reported by db.<collection>.stats() #394

@tmcallaghan

Description

@tmcallaghan

Describe the bug

I've run two tests - one where I insert documents with a single field of 2500 'a' characters and another with a single field of 2500 random characters a..z and A..Z. The average document size reported for the 'a' character test is far lower than reality as it reports average document size of 80 whereas the random text field test reports average document size of 3352. It appears to report average document size after compression which is incorrect.

Expected behavior

Average document size should be actual size, not size after compression.

Environment

  • Using Docker container with latest

Reproduction Steps

const { MongoClient } = require('mongodb');

const uri = process.env.MONGO_URI;

const dbName = 'testdb';
const collName = 'testcoll1';
const totalDocs = 1000000;
const batchSize = 1000;
const textFieldSize = 2500;

function randomString(fieldSize) {
const chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
let result = '';
for (let i = 0; i < fieldSize; i++) {
result += chars[Math.floor(Math.random() * chars.length)];
}
return result;
}

function fixedString(fieldSize) {
return 'a'.repeat(fieldSize);
}

async function bulkInsert(compressibleData) {
const client = new MongoClient(uri);

if (compressibleData == true) {
console.log('inserting ',totalDocs.toLocaleString(),' uncompressible documents');
} else {
console.log('inserting ', totalDocs.toLocaleString(), ' compressible documents');
}

try {
await client.connect();
const db = client.db(dbName);
const collection = db.collection(collName);
await collection.drop();

let inserted = 0;
let lastPercent = 0;

while (inserted < totalDocs) {
  const bulk = collection.initializeUnorderedBulkOp();
  
  for (let i = 0; i < batchSize; i++) {
    if (compressibleData == true) {
      bulk.insert({ field1: randomString(textFieldSize) });
    } else {
      bulk.insert({ field1: fixedString(textFieldSize) });
    }
  }
  
  await bulk.execute();
  inserted += batchSize;
  
  const percent = Math.floor((inserted / totalDocs) * 100);
  if ((percent !== lastPercent) && (percent % 10 === 0)) {
    console.log(`${percent}% complete (${inserted.toLocaleString()} / ${totalDocs.toLocaleString()})`);
    lastPercent = percent;
  }
}
const stats = await db.command({ collStats: collName });
console.log('-------------------------------------------------------------------');
console.log('Average Object Size (bytes):', stats.avgObjSize.toLocaleString());
console.log('Collection Size (GB):', (stats.size / 1024 / 1024 / 1024).toLocaleString('en-US', { minimumFractionDigits: 2, maximumFractionDigits: 2 }));
console.log('Collection Storage Size (GB):', (stats.storageSize / 1024 / 1024 / 1024).toLocaleString('en-US', { minimumFractionDigits: 2, maximumFractionDigits: 2 }));
console.log('-------------------------------------------------------------------');
const sampleDoc = await collection.findOne();
console.log('Sample Document:', sampleDoc);
console.log('-------------------------------------------------------------------');

} finally {
await client.close();
}
}

(async () => {
await bulkInsert(false);
await bulkInsert(true);
})().catch(console.error);

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions