Dobrica Pavlinušić's random unstructured stuff
MongoDB: Revision 23
MongoDBCheckout sourcedpavlin@t61p:/rest/cvs$ git clone git://github.com/mongodb/mongo.git Initialized empty Git repository in /rest/cvs/mongo/.git/ remote: Counting objects: 32011, done. remote: Compressing objects: 100% (9340/9340), done. remote: Total 32011 (delta 22724), reused 31556 (delta 22412) Receiving objects: 100% (32011/32011), 20.57 MiB | 1.12 MiB/s, done. Resolving deltas: 100% (22724/22724), done. Install build dependenciesdpavlin@t61p:/rest/cvs/mongo$ sudo apt-get install \ libboost-dev libboost-thread-dev libboost-filesystem-dev libboost-program-options-dev libboost-date-time-dev \ libpcre3-dev xulrunner-dev libreadline-dev Build Debian packagedebian/control file needs modification for unstable: http://svn.rot13.org/index.cgi/pxelator/view/mongodb/mongo-debian-control-xulrunner.diff dpavlin@t61p:/rest/cvs$ cd mongo/ # patch source dpavlin@klin:/rest/cvs/mongo$ patch -p1 < /srv/pxelator/mongodb/mongo-debian-control-xulrunner.diff patching file debian/control # clean before new build dpavlin@t61p:/rest/cvs/mongo$ sudo rm -Rf debian/mongodb dpavlin@t61p:/rest/cvs/mongo$ time dpkg-buildpackage -rfakeroot -b ... real 6m16.744s user 5m41.701s sys 0m19.393s Perl driverdpavlin@t61p:/rest/cvs$ git clone git://github.com/mongodb/mongo-perl-driver.git Initialized empty Git repository in /rest/cvs/mongo-perl-driver/.git/ remote: Counting objects: 1782, done. remote: Compressing objects: 100% (1673/1673), done. remote: Total 1782 (delta 1122), reused 0 (delta 0) Receiving objects: 100% (1782/1782), 1.45 MiB | 747 KiB/s, done. Resolving deltas: 100% (1122/1122), done. sudo apt-get install libany-moose-perl libdata-types-perl dpavlin@t61p:/rest/cvs$ cd mongo-perl-driver/ perl Makefile.PL make test sudo dh-make-perl BinariesQueriesPXElator audit examples> use pexlator > db.audit.group({ key: { daemon:true } ,initial: { count: 0 } ,reduce: function(o,p) { p.count++ } }); We are really interested only in daemons which aren't null: > db.audit.ensureIndex( { daemon: true } ) > db.audit.group({ key: { daemon:true } ,cond: { daemon: { $exists: true } } ,initial: { count: 0 } ,reduce: function(o,p) { p.count++ } }); dhcp count usage by ip > db.audit.ensureIndex( { "package.name": true } ) > db.audit.group({ key:{ ip:true }, cond: { "package.name": "dhcpd" }, initial: { count: 0 }, reduce: function(o,p) { p.count++ } }); package usage > db.setProfilingLevel(2,1000); > db.audit.group({ key:{ "package.name":true }, initial:{ count:0 }, reduce:function(o,p){ p.count++ } }) > db.system.profile.find().sort({$natural:-1}).limit(10) { "ts" : "Sun Jan 24 2010 15:07:53 GMT+0100 (CET)", "info" : "query pxelator.$cmd ntoreturn:1 reslen:642 nscanned:0 query: { group: { key: { package.name: true }, initial: { count: 0.0 }, ns: \"audit\", $reduce: function (o, p) { p.count++; } } } nreturned:1 bytes:626 13887ms", "millis" : 13887 } > db.audit.ensureIndex({ "package.name":true }) > db.audit.group({ key:{ "package.name":true }, initial:{ count:0 }, reduce:function(o,p){ p.count++ } }) doesn't help much, because we don't have cond in query. Profile> db.setProfilingLevel(2,1000); { "was" : 2, "ok" : 1 } > db.system.profile.find() Indexes> db.system.indexes.find() { "name" : "_id_", "ns" : "pxelator.audit", "key" : { "_id" : ObjectId("000000000000000000000000") } } { "ns" : "pxelator.audit", "key" : { "daemon" : true }, "name" : "daemon_" } { "ns" : "pxelator.audit", "key" : { "key" : "package.time" }, "name" : "key_" } { "ns" : "pxelator.audit", "key" : { "package.name" : true }, "name" : "package.name_" } Comparison with CouchDBMigrate from CouchDB to MongoDB using http://svn.rot13.org/index.cgi/pxelator/view/bin/couchdb2mongodb.pl Disk usageroot@opr:~# du -hc /var/lib/couchdb/0.9.0/.pxelator* /var/lib/couchdb/0.9.0/pxelator.couch 655M /var/lib/couchdb/0.9.0/.pxelator_design 23M /var/lib/couchdb/0.9.0/.pxelator_temp 7.8G /var/lib/couchdb/0.9.0/pxelator.couch 8.4G total root@opr:~# du -hc /var/lib/mongodb/pxelator.* 65M /var/lib/mongodb/pxelator.0 129M /var/lib/mongodb/pxelator.1 257M /var/lib/mongodb/pxelator.2 513M /var/lib/mongodb/pxelator.3 513M /var/lib/mongodb/pxelator.4 513M /var/lib/mongodb/pxelator.5 17M /var/lib/mongodb/pxelator.ns 2.0G total Map/Reduce differencesCouchDB # map function(doc) { if ( doc.package.name == 'dnsd' ) emit(doc.peerhost,1); } # reduce function (k,v) { return sum(v); } MongoDB > m = function() { emit(this.peerhost,1) } > r = function(k,vals) { var sum = 0; for (var i in vals) sum += vals[i]; return sum; } > res = db.audit.mapReduce(m, r, { query:{"package.name":"dnsd"} } ) { "result" : "tmp.mr.mapreduce_1264448081_3", "timeMillis" : 6040, "counts" : { "input" : { "top" : 0, "bottom" : 204293 }, "emit" : { "top" : 0, "bottom" : 204293 }, "output" : { "top" : 0, "bottom" : 22 } }, "ok" : 1, } > db[res.result].find().limit(10) Comparison with ad-hoc query > db.setProfilingLevel(2,1000); > db.audit.group({ key:{ "peerhost":true }, cond:{ "package.name":"dnsd" }, initial:{ count:0 }, reduce:function(o,p){ p.count++ } }) > db.system.profile.find().sort({$natural:-1}).limit(10) { "ts" : "Mon Jan 25 2010 21:21:11 GMT+0100 (CET)", "info" : "query pxelator.$cmd ntoreturn:1 reslen:1148 nscanned:0 query: { group: { key: { peerhost: true }, cond: { package.name: \"dnsd\" }, initial: { count: 0.0 }, ns: \"audit\", $reduce: function (o, p) { p.count++; } } } nreturned:1 bytes:1132 2161ms", "millis" : 2161 } So, going through server-side JavaScript is 3x performance penalty Blog posts
fetchrss: http://blog.rot13.org/mt/mt-search.cgi?tag=MongoDB&Template=feed&IncludeBlogs=1
Debian amd64 installroot@klin:~/rest/virtual# debootstrap squeeze ./mongodb-squeeze-amd64 http://ftp.hr.debian.org/debian root@klin:~/rest/virtual# chroot mongodb-squeeze-amd64/ root@klin:/# apt-get install git-core root@klin:/# apt-get install locales # git clone to /srv/mongo, patch root@klin:/srv/mongo# apt-get install dpkg-dev debhelper scons root@klin:/srv/mongo# time dpkg-buildpackage -rfakeroot -b |