<?xml version="1.0" encoding="UTF-8"?>

<rss version="2.0"
 xmlns:blogChannel="http://backend.userland.com/blogChannelModule"
>

<channel>
<title><![CDATA[Dobrica Pavlinušić's random unstructured stuff: MongoDB]]></title>
<link>https://saturn.ffzg.hr/rot13/index.cgi?mongodb</link>
<description></description>
<pubDate>Sun, 31 Jan 2010 13:25:52 -0000</pubDate>
<webMaster>root@saturn.ffzg.hr</webMaster>
<generator>Socialtext Workspace v2.19.0.2</generator>

<item>
<title><![CDATA[MongoDB]]></title>
<link>https://saturn.ffzg.hr/rot13/index.cgi?mongodb</link>
<description><![CDATA[<div>Creator: Dobrica Pavlinušić</div><hr/><div>Tags: NoSQL</div><hr/><div class="wiki">
<div class="nlw_phrase"><table class="wafl_container">
  <tr>
    <td>
      <div class="wafl_box">
        
          <div class="wafl_titlebox">
            <div class="wafl_title">
              
                Contents: [Dobrica Pavlinušić's random unstructured stuff]
              
            </div>
          </div>
          <div class="wafl_items">
            
              <div class="wiki">
<ul>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#mongodb">Dobrica Pavlinušić's random unstructured stuff (MongoDB)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] MongoDB} --></span></li>

<ul>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#checkout_source">Dobrica Pavlinušić's random unstructured stuff (Checkout source)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] Checkout source} --></span></li>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#install_build_dependencies">Dobrica Pavlinušić's random unstructured stuff (Install build dependencies)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] Install build dependencies} --></span></li>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#build_debian_package">Dobrica Pavlinušić's random unstructured stuff (Build Debian package)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] Build Debian package} --></span></li>
</ul>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#perl_driver">Dobrica Pavlinušić's random unstructured stuff (Perl driver)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] Perl driver} --></span></li>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#binaries">Dobrica Pavlinušić's random unstructured stuff (Binaries)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] Binaries} --></span></li>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#queries">Dobrica Pavlinušić's random unstructured stuff (Queries)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] Queries} --></span></li>

<ul>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#pxelator_audit_examples">Dobrica Pavlinušić's random unstructured stuff (PXElator audit examples)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] PXElator audit examples} --></span></li>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#profile">Dobrica Pavlinušić's random unstructured stuff (Profile)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] Profile} --></span></li>
</ul>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#indexes">Dobrica Pavlinušić's random unstructured stuff (Indexes)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] Indexes} --></span></li>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#comparison_with_couchdb">Dobrica Pavlinušić's random unstructured stuff (Comparison with CouchDB)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] Comparison with CouchDB} --></span></li>

<ul>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#disk_usage">Dobrica Pavlinušić's random unstructured stuff (Disk usage)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] Disk usage} --></span></li>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#map_reduce_differences">Dobrica Pavlinušić's random unstructured stuff (Map/Reduce differences)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] Map/Reduce differences} --></span></li>
</ul>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#blog_posts">Dobrica Pavlinušić's random unstructured stuff (Blog posts)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] Blog posts} --></span></li>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#debian_amd64_version">Dobrica Pavlinušić's random unstructured stuff (Debian amd64 version)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] Debian amd64 version} --></span></li>

<ul>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#build">Dobrica Pavlinušić's random unstructured stuff (build)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] build} --></span></li>
<li><span class="nlw_phrase"><a title="section link" href="https://saturn.ffzg.hr/rot13/index.cgi?dobrica_pavlinu%C5%A1i%C4%87_s_random_unstructured_stuff#run">Dobrica Pavlinušić's random unstructured stuff (run)</a><!-- wiki: {link: [Dobrica Pavlinušić's random unstructured stuff] run} --></span></li>
</ul></ul>
</div>

            
            
          </div>
        
      </div>
    </td>
  </tr>
</table>
<!-- wiki: {toc: }
--></div><br /><br /><h1 id="mongodb">MongoDB</h1>
<h2 id="checkout_source">Checkout source</h2>
<pre>
dpavlin@t61p:/rest/cvs$ git clone git://github.com/mongodb/mongo.git
Initialized empty Git repository in /rest/cvs/mongo/.git/
remote: Counting objects: 32011, done.
remote: Compressing objects: 100% (9340/9340), done.
remote: Total 32011 (delta 22724), reused 31556 (delta 22412)
Receiving objects: 100% (32011/32011), 20.57 MiB | 1.12 MiB/s, done.
Resolving deltas: 100% (22724/22724), done.
</pre>
<br /><h2 id="install_build_dependencies">Install build dependencies</h2>
<pre>
dpavlin@t61p:/rest/cvs/mongo$ sudo apt-get install \
 libboost-dev libboost-thread-dev libboost-filesystem-dev libboost-program-options-dev libboost-date-time-dev \
 libpcre3-dev xulrunner-dev libreadline-dev
</pre>
<br /><h2 id="build_debian_package">Build Debian package</h2>
<p>
<tt>debian/control</tt> file needs modification for unstable: <a target="_blank" title="(external link)" href="http://svn.rot13.org/index.cgi/pxelator/view/mongodb/mongo-debian-control-xulrunner.diff">http://svn.rot13.org/index.cgi/pxelator/view/mongodb/mongo-debian-control-xulrunner.diff</a></p>
<pre>
dpavlin@t61p:/rest/cvs$ cd mongo/

# patch source
dpavlin@klin:/rest/cvs/mongo$ patch -p1 &lt; /srv/pxelator/mongodb/mongo-debian-control-xulrunner.diff 
patching file debian/control

# clean before new build
dpavlin@t61p:/rest/cvs/mongo$ sudo rm -Rf debian/mongodb

dpavlin@t61p:/rest/cvs/mongo$ time dpkg-buildpackage -rfakeroot -b

...

real    6m16.744s
user    5m41.701s
sys     0m19.393s
</pre>
<br /><h1 id="perl_driver">Perl driver</h1>
<pre>
dpavlin@t61p:/rest/cvs$ git clone git://github.com/mongodb/mongo-perl-driver.git
Initialized empty Git repository in /rest/cvs/mongo-perl-driver/.git/
remote: Counting objects: 1782, done.
remote: Compressing objects: 100% (1673/1673), done.
remote: Total 1782 (delta 1122), reused 0 (delta 0)
Receiving objects: 100% (1782/1782), 1.45 MiB | 747 KiB/s, done.
Resolving deltas: 100% (1122/1122), done.

sudo apt-get install libany-moose-perl libdata-types-perl

dpavlin@t61p:/rest/cvs$ cd mongo-perl-driver/

perl Makefile.PL
make test
sudo dh-make-perl
</pre>
<br /><h1 id="binaries">Binaries</h1>
<ul>
<li><a target="_blank" title="(external link)" href="http://debian.rot13.org/">http://debian.rot13.org/</a></li>
</ul>
<h1 id="queries">Queries</h1>
<h2 id="pxelator_audit_examples">PXElator audit examples</h2>
<pre>
&gt; use pexlator

&gt; db.audit.group({ key:{ 'package.name':true }, initial:{ count: 0 }, reduce:function(o,p) { p.count++ } });

&gt; show profile

11052ms Sun Jan 31 2010 13:24:47
query pxelator.$cmd ntoreturn:1 reslen:690 nscanned:0  
query: { group: { key: { package.name: true }, initial: { count: 0.0 }, ns: &quot;audit&quot;, $reduce: function (o, p) {
    p.count++;
} } }  nreturned:1 bytes:674 11052ms

&gt; db.audit.ensureIndex({ 'package.name':true })

&gt; db.audit.group({ key:{ 'package.name':true }, initial:{ count: 0 }, reduce:function(o,p) { p.count++ } });
</pre>
<br /><p>
no visible speed impact.</p>
<p>
We are really interested only in daemons which aren't null:</p>
<pre>
&gt; db.audit.ensureIndex( { daemon: true } )
&gt; db.audit.group({
 key: { daemon:true }
,cond: { daemon: { $exists: true } }
,initial: { count: 0 }
,reduce: function(o,p) { p.count++ }
});
</pre>
<br /><p>
dhcp count usage by ip</p>
<pre>
&gt; db.audit.ensureIndex( { &quot;package.name&quot;: true } )

&gt; db.audit.group({ key:{ ip:true }, cond: { &quot;package.name&quot;: &quot;dhcpd&quot; }, initial: { count: 0 }, reduce: function(o,p) { p.count++ } });
</pre>
<br /><p>
package usage</p>
<pre>
&gt; db.setProfilingLevel(2,1000);

&gt; db.audit.group({ key:{ &quot;package.name&quot;:true }, initial:{ count:0 }, reduce:function(o,p){ p.count++ } })

&gt; db.system.profile.find().sort({$natural:-1}).limit(10)

{ &quot;ts&quot; : &quot;Sun Jan 24 2010 15:07:53 GMT+0100 (CET)&quot;, &quot;info&quot; : &quot;query pxelator.$cmd ntoreturn:1 reslen:642 nscanned:0  
query: { group: { key: { package.name: true }, initial: { count: 0.0 }, ns: \&quot;audit\&quot;, $reduce: function (o, p) {
    p.count++;
} } }  nreturned:1 bytes:626 13887ms&quot;, &quot;millis&quot; : 13887 }

&gt; db.audit.ensureIndex({ &quot;package.name&quot;:true })

&gt; db.audit.group({ key:{ &quot;package.name&quot;:true }, initial:{ count:0 }, reduce:function(o,p){ p.count++ } })
</pre>
<br /><p>
doesn't help much, because we don't have <tt>cond</tt> in query.</p>
<h2 id="profile">Profile</h2>
<pre>
&gt; db.setProfilingLevel(2,1000);
{ &quot;was&quot; : 2, &quot;ok&quot; : 1 }
&gt; db.system.profile.find()
</pre>
<br /><h1 id="indexes">Indexes</h1>
<pre>
&gt; db.system.indexes.find()
{ &quot;name&quot; : &quot;_id_&quot;, &quot;ns&quot; : &quot;pxelator.audit&quot;, &quot;key&quot; : { &quot;_id&quot; : ObjectId(&quot;000000000000000000000000&quot;) } }
{ &quot;ns&quot; : &quot;pxelator.audit&quot;, &quot;key&quot; : { &quot;daemon&quot; : true }, &quot;name&quot; : &quot;daemon_&quot; }
{ &quot;ns&quot; : &quot;pxelator.audit&quot;, &quot;key&quot; : { &quot;key&quot; : &quot;package.time&quot; }, &quot;name&quot; : &quot;key_&quot; }
{ &quot;ns&quot; : &quot;pxelator.audit&quot;, &quot;key&quot; : { &quot;package.name&quot; : true }, &quot;name&quot; : &quot;package.name_&quot; }
</pre>
<br /><h1 id="comparison_with_couchdb">Comparison with CouchDB</h1>
<p>
Migrate from CouchDB to MongoDB using <a target="_blank" title="(external link)" href="http://svn.rot13.org/index.cgi/pxelator/view/bin/couchdb2mongodb.pl">http://svn.rot13.org/index.cgi/pxelator/view/bin/couchdb2mongodb.pl</a></p>
<h2 id="disk_usage">Disk usage</h2>
<pre>
root@opr:~# du -hc /var/lib/couchdb/0.9.0/.pxelator* /var/lib/couchdb/0.9.0/pxelator.couch
655M    /var/lib/couchdb/0.9.0/.pxelator_design
23M     /var/lib/couchdb/0.9.0/.pxelator_temp
7.8G    /var/lib/couchdb/0.9.0/pxelator.couch
8.4G    total

root@opr:~# du -hc /var/lib/mongodb/pxelator.*
65M     /var/lib/mongodb/pxelator.0
129M    /var/lib/mongodb/pxelator.1
257M    /var/lib/mongodb/pxelator.2
513M    /var/lib/mongodb/pxelator.3
513M    /var/lib/mongodb/pxelator.4
513M    /var/lib/mongodb/pxelator.5
17M     /var/lib/mongodb/pxelator.ns
2.0G    total
</pre>
<br /><h2 id="map_reduce_differences">Map/Reduce differences</h2>
<p>
CouchDB</p>
<pre>
# map
function(doc) {
  if ( doc.package.name == 'dnsd' )
   emit(doc.peerhost,1);
}

# reduce
function (k,v) {
 return sum(v);
}
</pre>
<br /><p>
MongoDB</p>
<pre>
&gt; m = function() { emit(this.peerhost,1) }

&gt; r = function(k,vals) { var sum = 0; for (var i in vals) sum += vals[i]; return sum; }

&gt; res = db.audit.mapReduce(m, r, { query:{&quot;package.name&quot;:&quot;dnsd&quot;} } )
{
        &quot;result&quot; : &quot;tmp.mr.mapreduce_1264448081_3&quot;,
        &quot;timeMillis&quot; : 6040,
        &quot;counts&quot; : {
                &quot;input&quot; : {
                        &quot;top&quot; : 0,
                        &quot;bottom&quot; : 204293
                },
                &quot;emit&quot; : {
                        &quot;top&quot; : 0,
                        &quot;bottom&quot; : 204293
                },
                &quot;output&quot; : {
                        &quot;top&quot; : 0,
                        &quot;bottom&quot; : 22
                }
        },
        &quot;ok&quot; : 1,
}

&gt; db[res.result].find().limit(10)
</pre>
<br /><p>
Comparison with ad-hoc query</p>
<pre>
&gt; db.setProfilingLevel(2,1000);

&gt; db.audit.group({ key:{ &quot;peerhost&quot;:true }, cond:{ &quot;package.name&quot;:&quot;dnsd&quot; },
 initial:{ count:0 }, reduce:function(o,p){ p.count++ } })

&gt; db.system.profile.find().sort({$natural:-1}).limit(10)

{ &quot;ts&quot; : &quot;Mon Jan 25 2010 21:21:11 GMT+0100 (CET)&quot;, &quot;info&quot; : &quot;query pxelator.$cmd ntoreturn:1 reslen:1148 nscanned:0  
query: { group: { key: { peerhost: true }, cond: { package.name: \&quot;dnsd\&quot; }, initial: { count: 0.0 }, ns: \&quot;audit\&quot;, $reduce: function (o, p) {
    p.count++;
} } }  nreturned:1 bytes:1132 2161ms&quot;, &quot;millis&quot; : 2161 }
</pre>
<br /><p>
So, going through server-side JavaScript is <strong>3x performance penalty</strong></p>
<h1 id="blog_posts">Blog posts</h1>
<div class="nlw_phrase"><div class="fetchrss_box">
  <div class="fetchrss_titlebox">
    
  
  
 <div class="fetchrss_title">
   <a href="https://blog.rot13.org/mt/mt-search.cgi?IncludeBlogs=1&search=MongoDB">Search Results for MongoDB</a>
 </div>


  </div>
  
 <ul class="fetchrss_items">
  
  
    
  <li class="fetchrss_item">
    <a href="https://blog.rot13.org/2010/01/its-about-system-stupid.html">It's about system, stupid!</a>
    
  </li>

  
    
  <li class="fetchrss_item">
    <a href="https://blog.rot13.org/2010/01/mongodb---so-you-want-fast-nosql-database-which-you-can-grok.html">MongoDB - so you want fast NoSQL database which you can grok</a>
    
  </li>

  
 </ul>

</div>
















<!-- wiki: {fetchrss: http://blog.rot13.org/mt/mt-=search.cgi?tag==MongoDB&Template==feed&IncludeBlogs==1}
--></div><br /><br /><h1 id="debian_amd64_version">Debian amd64 version</h1>
<h2 id="build">build</h2>
<pre>
root@klin:~/rest/virtual# debootstrap --arch amd64 squeeze ./mongodb-amd64 http://10.60.0.91:3142/debian

root@klin:~/rest/virtual# chroot mongodb-amd64/

root@klin:/# apt-get install \
 git-core locales dpkg-dev debhelper scons \
 libboost-dev libboost-thread-dev libboost-filesystem-dev libboost-program-options-dev libboost-date-time-dev \
 libpcre3-dev xulrunner-dev libreadline-dev

root@klin:/# cd /srv/
root@klin:/srv# git clone git://github.com/mongodb/mongo.git

root@klin:/srv# cd mongo/
root@klin:/srv/mongo# time dpkg-buildpackage -rfakeroot -b
</pre>
<br /><h2 id="run">run</h2>
<pre>
dpavlin@klin:~$ sudo chroot /virtual/mongodb-amd64/  su -c '/usr/bin/mongod --dbpath /var/lib/mongodb --logpath /var/log/mongodb/MongoDB.log run' mongodb
</pre>
</div>
]]></description>
<author>Dobrica Pavlinu&#x161;i&#x107;</author>
<category>NoSQL</category>
<guid isPermaLink="true">https://saturn.ffzg.hr/rot13/index.cgi?mongodb</guid>
<pubDate>Sun, 31 Jan 2010 13:25:52 -0000</pubDate>
</item>
</channel>
</rss>