<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
	>

<channel>
	<title>The JustOne Database Blog</title>
	<atom:link href="http://blogs.justonedatabase.com/feed/" rel="self" type="application/rss+xml" />
	<link>http://blogs.justonedatabase.com</link>
	<description></description>
	<lastBuildDate>Wed, 20 Jun 2012 17:11:35 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.com/</generator>
<cloud domain='blogs.justonedatabase.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
		<url>http://1.gravatar.com/blavatar/1aac1af4c50c24df3d187c729a8d9e2b?s=96&#038;d=http%3A%2F%2Fs2.wp.com%2Fi%2Fbuttonw-com.png</url>
		<title>The JustOne Database Blog</title>
		<link>http://blogs.justonedatabase.com</link>
	</image>
	<atom:link rel="search" type="application/opensearchdescription+xml" href="http://blogs.justonedatabase.com/osd.xml" title="The JustOne Database Blog" />
	<atom:link rel='hub' href='http://blogs.justonedatabase.com/?pushpress=hub'/>
		<item>
		<title>JustOneDB launched on Engine Yard</title>
		<link>http://blogs.justonedatabase.com/2012/06/20/justonedb-launched-on-engine-yard/</link>
		<comments>http://blogs.justonedatabase.com/2012/06/20/justonedb-launched-on-engine-yard/#comments</comments>
		<pubDate>Wed, 20 Jun 2012 16:24:04 +0000</pubDate>
		<dc:creator>Duncan</dc:creator>
				<category><![CDATA[General]]></category>

		<guid isPermaLink="false">http://blogs.justonedatabase.com/?p=576</guid>
		<description><![CDATA[JustOne Database Inc is pleased to announce that JustOneDB is now available on Engine Yard as an Add-on service. JustOneDB is an excellent relational database for enterprise level customers in the Cloud because of its ability to handle large data &#8230; <a href="http://blogs.justonedatabase.com/2012/06/20/justonedb-launched-on-engine-yard/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=576&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p><a href="http://justonedatabase.files.wordpress.com/2012/06/engine-yard-logo-long.png"><img class="alignnone size-full wp-image-582" title="engine-yard-logo-long" src="http://justonedatabase.files.wordpress.com/2012/06/engine-yard-logo-long.png?w=500" alt=""   /></a></p>
<p>JustOne Database Inc is pleased to announce that JustOneDB is now available on Engine Yard as an Add-on service.</p>
<p>JustOneDB is an excellent relational database for enterprise level customers in the Cloud because of its ability to handle large data volumes and effortlessly adapt to changing requirements without requiring any specialist database skills &#8211; so the Engine Yard service is a natural fit for JustOneDB.</p>
<p><strong>If you are an Engine Yard customer, follow these steps to access the JustOneDB Add-on:</strong></p>
<p>1. Go to <a href="https://cloud.engineyard.com/addons" target="_blank">https://cloud.engineyard.com/addons</a> (login required) or navigate to “Add-ons” in Engine Yard Cloud<br />
2. Click on the &#8220;Details&#8221; button for JustOneDB<br />
3. Sign up and follow the instructions for “Activate”, “Update Code” and “Deploy”</p>
<p>Further details for using the Engine Yard Add-on service are available <a title="Using JustOneDB on Engine Yard" href="http://www.justonedb.com/engineyard">here</a></p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/justonedatabase.wordpress.com/576/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/justonedatabase.wordpress.com/576/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/justonedatabase.wordpress.com/576/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/justonedatabase.wordpress.com/576/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/justonedatabase.wordpress.com/576/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/justonedatabase.wordpress.com/576/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/justonedatabase.wordpress.com/576/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/justonedatabase.wordpress.com/576/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/justonedatabase.wordpress.com/576/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/justonedatabase.wordpress.com/576/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/justonedatabase.wordpress.com/576/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/justonedatabase.wordpress.com/576/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/justonedatabase.wordpress.com/576/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/justonedatabase.wordpress.com/576/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=576&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://blogs.justonedatabase.com/2012/06/20/justonedb-launched-on-engine-yard/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d61fb61c2837718606307b2dfe3747fd?s=96&#38;d=http%3A%2F%2Fs0.wp.com%2Fi%2Fmu.gif&#38;r=G" medium="image">
			<media:title type="html">justonedatabase</media:title>
		</media:content>

		<media:content url="http://justonedatabase.files.wordpress.com/2012/06/engine-yard-logo-long.png" medium="image">
			<media:title type="html">engine-yard-logo-long</media:title>
		</media:content>
	</item>
		<item>
		<title>Big is in the Eye of the Beholder</title>
		<link>http://blogs.justonedatabase.com/2012/02/27/big-is-in-the-eye-of-the-beholder/</link>
		<comments>http://blogs.justonedatabase.com/2012/02/27/big-is-in-the-eye-of-the-beholder/#comments</comments>
		<pubDate>Mon, 27 Feb 2012 15:07:07 +0000</pubDate>
		<dc:creator>Duncan</dc:creator>
				<category><![CDATA[Database]]></category>
		<category><![CDATA[General]]></category>
		<category><![CDATA[Big Data]]></category>
		<category><![CDATA[Internet of Things]]></category>
		<category><![CDATA[IoT]]></category>
		<category><![CDATA[M2M]]></category>
		<category><![CDATA[Structured Data]]></category>
		<category><![CDATA[Unstructured Data]]></category>

		<guid isPermaLink="false">http://blogs.justonedatabase.com/?p=566</guid>
		<description><![CDATA[One of the really hot topics for 2012 is Big Data and every vendor seems keen to ride the Big Data wave. But what is big? According to Wikipedia, “Big data is a term applied to data sets whose size &#8230; <a href="http://blogs.justonedatabase.com/2012/02/27/big-is-in-the-eye-of-the-beholder/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=566&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>One of the really hot topics for 2012 is Big Data and every vendor seems keen to ride the Big Data wave. But what is big? According to Wikipedia, “<em>Big data is a term applied to data sets whose size is beyond the ability of commonly used software tools to capture, manage, and process the data within a tolerable elapsed time.</em>” In other words, Big Data is data that overwhelms a conventional database.</p>
<p>It is often perceived that Big Data is about unstructured data, but this is not so. Any data can be big – structured or unstructured. Indeed communications network operators have struggled with billions of daily structured network events and managed databases with multiple terabytes of structured data for decades now. This data was generated long before social networks arrived, so Big Data is nothing new, but the widespread adoption of the Internet has expanded the domain of the Big Data problem.</p>
<p>Currently, you can think of the Internet as connecting both places and people where those people initiate the vast majority of the data generated either directly or indirectly. The content of an unstructured document is typically written by a human, a web transaction is usually initiated by a human and most network events arise directly or indirectly from human activity on that network.</p>
<p>But the Internet is now hosting devices that autonomously create information about time, location, orientation, velocity and various other measurements as structured content. These are not just smartphones, these are sensors scattered and embedded everywhere to monitor and measure everything from energy use and health to asset tracking. This is the age of the “Internet of Things”.</p>
<p>Now consider that the Internet currently connects about 1 billion places and about 5 billion people. Yet it is estimated that over 50 billion devices will become connected by 2020 – each generating structured data at regular and frequent intervals. This machine-to-machine communications (M2M) market is widely regarded as a multi-trillion dollar opportunity and network operators are planning some fundamental changes to their network architecture to handle the deluge expected from  its exponential growth.</p>
<p>Not only is this device data more profuse than the data we see now, but it is mostly structured too. Big Data? As the song goes, you ain’t seen nothing yet.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/justonedatabase.wordpress.com/566/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/justonedatabase.wordpress.com/566/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/justonedatabase.wordpress.com/566/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/justonedatabase.wordpress.com/566/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/justonedatabase.wordpress.com/566/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/justonedatabase.wordpress.com/566/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/justonedatabase.wordpress.com/566/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/justonedatabase.wordpress.com/566/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/justonedatabase.wordpress.com/566/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/justonedatabase.wordpress.com/566/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/justonedatabase.wordpress.com/566/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/justonedatabase.wordpress.com/566/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/justonedatabase.wordpress.com/566/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/justonedatabase.wordpress.com/566/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=566&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://blogs.justonedatabase.com/2012/02/27/big-is-in-the-eye-of-the-beholder/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d61fb61c2837718606307b2dfe3747fd?s=96&#38;d=http%3A%2F%2Fs0.wp.com%2Fi%2Fmu.gif&#38;r=G" medium="image">
			<media:title type="html">justonedatabase</media:title>
		</media:content>
	</item>
		<item>
		<title>Memento</title>
		<link>http://blogs.justonedatabase.com/2012/01/03/memento/</link>
		<comments>http://blogs.justonedatabase.com/2012/01/03/memento/#comments</comments>
		<pubDate>Tue, 03 Jan 2012 16:18:58 +0000</pubDate>
		<dc:creator>Duncan</dc:creator>
				<category><![CDATA[Relational Database]]></category>
		<category><![CDATA[Software]]></category>
		<category><![CDATA[Architecture]]></category>
		<category><![CDATA[Auditing]]></category>
		<category><![CDATA[Event Sourcing]]></category>
		<category><![CDATA[Logging]]></category>
		<category><![CDATA[Performance]]></category>
		<category><![CDATA[Write Ahead]]></category>

		<guid isPermaLink="false">http://blogs.justonedatabase.com/?p=558</guid>
		<description><![CDATA[Event sourcing is a design pattern for retaining an exhaustive history of events such that the state of the system now (or at some point in the past) can be derived by a replay of those events. The goal of &#8230; <a href="http://blogs.justonedatabase.com/2012/01/03/memento/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=558&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Event sourcing is a design pattern for retaining an exhaustive history of events such that the state of the system now (or at some point in the past) can be derived by a replay of those events. The goal of event sourcing is to derive the current system state from the combination of past events with current application logic such that changes to logic are applied retrospectively and become implicitly reflected in the current system state.</p>
<p><em>Conceptually</em>, event sourcing only requires a single chronological log to record every event about every entity and a primary key generator for creating primary keys for new entities. Furthermore, in extremis, application state need never be persisted since it can always be recreated &#8211; you can think of this as lazy evaluation of system state. You can find a thorough discussion of event sourcing <a title="Martin Fowler - Event Sourcing" href="http://martinfowler.com/eaaDev/EventSourcing.html">here</a>.</p>
<p>There may appear to be analogies here to audit logs and write-ahead logging in databases; but closer inspection shows that an audit log only provides a history of events for a known system state; and a write-ahead log provides a limited history of recent changes which is retained only until the whole system state can be reliably persisted.</p>
<p>In principle, a relational database could apply an event sourcing design pattern such that every definition command, every insert, every update and every delete statement is recorded as an autonomous event so that the state of a row is recreated by replaying all of the events related to it. Of course, it is time consuming and inefficient to reconstruct everything from first principles for every query and therefore databases retain current state to avoid the reconstruction costs. Indeed, reconstruction of system state typically only occurs at database start-up when system state is recovered from any pending write-ahead or recovery logs. Hopefully, a database doesn’t expect to change its internal processing logic that frequently, so any potential advantages are enormously outweighed by the disadvantages of a wholly lazy evaluation.</p>
<p>While more volatile applications may choose an event sourcing pattern to effect a robust delivery environment for rapidly evolving requirements, performance considerations will often dictate a hybrid approach whereby current system state is recreated by applying recent events (rather than all events) to recent system state (rather than an empty state).</p>
<p>We also have to be careful to understand what an event is. Simply recording a change in state (such as a new attribute value) is not really event sourcing; whereas recording the cause (such as a command) that gave rise to that change is. While the latter allows system state to be fully revised according to changes in application logic; the former only allows us to recreate state at any given point in time &#8211; yet  this is still very useful within a database for the purposes of auditing and analytics.</p>
<p>Much like Memento (the film), which offers a delightfully ambiguous interpretation of reality, event sourcing allows a new reality to be created simply through an alternative interpretation of events – but, thankfully, databases are expected to hold a more definitive view of their world.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/justonedatabase.wordpress.com/558/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/justonedatabase.wordpress.com/558/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/justonedatabase.wordpress.com/558/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/justonedatabase.wordpress.com/558/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/justonedatabase.wordpress.com/558/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/justonedatabase.wordpress.com/558/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/justonedatabase.wordpress.com/558/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/justonedatabase.wordpress.com/558/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/justonedatabase.wordpress.com/558/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/justonedatabase.wordpress.com/558/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/justonedatabase.wordpress.com/558/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/justonedatabase.wordpress.com/558/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/justonedatabase.wordpress.com/558/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/justonedatabase.wordpress.com/558/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=558&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://blogs.justonedatabase.com/2012/01/03/memento/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d61fb61c2837718606307b2dfe3747fd?s=96&#38;d=http%3A%2F%2Fs0.wp.com%2Fi%2Fmu.gif&#38;r=G" medium="image">
			<media:title type="html">justonedatabase</media:title>
		</media:content>
	</item>
		<item>
		<title>Black and white</title>
		<link>http://blogs.justonedatabase.com/2011/08/02/black-and-white/</link>
		<comments>http://blogs.justonedatabase.com/2011/08/02/black-and-white/#comments</comments>
		<pubDate>Tue, 02 Aug 2011 09:53:25 +0000</pubDate>
		<dc:creator>Duncan</dc:creator>
				<category><![CDATA[Database]]></category>
		<category><![CDATA[Relational Database]]></category>
		<category><![CDATA[ACID]]></category>
		<category><![CDATA[Column Store]]></category>
		<category><![CDATA[Data Structures]]></category>
		<category><![CDATA[Logical]]></category>
		<category><![CDATA[Physical]]></category>
		<category><![CDATA[Row Store]]></category>
		<category><![CDATA[Schema]]></category>
		<category><![CDATA[Storage]]></category>

		<guid isPermaLink="false">http://blogs.justonedatabase.com/?p=552</guid>
		<description><![CDATA[The constant background debate about relational versus NoSQL has become entrenched in dogma and misses many fundamental aspects of data management.  Fortunately, the notion that the difference between SQL and NoSQL has anything to do with SQL has already been &#8230; <a href="http://blogs.justonedatabase.com/2011/08/02/black-and-white/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=552&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>The constant background debate about relational versus NoSQL has become entrenched in dogma and misses many fundamental aspects of data management.  Fortunately, the notion that the difference between SQL and NoSQL has anything to do with SQL has already been thoroughly dispelled &#8211; but there still remains an active debate about the virtues or otherwise of schemas and ACID transactions.</p>
<p>Let’s deal with schemas first. A schema is just an abstraction of the data and is not a method of storage. The schema is simply there to provide a conceptual model for data access and exists as a logical layer above the storage structure in much the same way that a file system provides a logical abstraction of the physical storage system.</p>
<p>While the relational model was conceived to provide an intuitive framework for understanding real-world entities and a self-documenting structure for supporting queries, the database industry made the mistake of closely coupling that model with the storage organisation. This has resulted in the unnecessary polarisation between row stores and column stores and the horrible complexity and inertia typically experienced with traditional relational databases. Whereas the underlying database storage structure <em>should</em> be schema-less and the relational model <em>should</em> simply be an optional schema layered upon it.</p>
<p>By using a schema-less storage structure that efficiently resolves arbitrary access paths between stored elements it does not matter whether the logical access model is relational or not. Indeed, there is no reason why the relational schema cannot co-exist with other models in the same database at the same time.</p>
<p>ACID or not? Actually, there’s a false premise to that question. ACID is not a single property, it includes atomicity, durability and consistency and these are all orthogonal features which can be varied independently. Should a sequence of updates be applied atomically as a single change? Does durability of a change need to be guaranteed? How consistent should these changes be across the database infrastructure? These are not the same question and that last one actually requires more than a simple binary answer because consistency is really a spectrum of possibilities ranging from none, to eventual, to immediate.</p>
<p>In fact the requirements for atomicity, durability and consistency can vary by individual transaction. It is easy to imagine an application where a class of transactions that require durability coexist with a class of transactions that do not require it at all. Similarly for atomicity and consistency.  So it is not even adequate to configure these guarantees globally across the whole database – let alone hardwire them into the database infrastructure itself, which is the typical state of affairs.</p>
<p>While the relational model may choose to enforce global acidity, durability and immediate consistency for transaction – the database engine need not.</p>
<p>Even durability is not a simple yes or no.</p>
<p>Does committing a change to a disk make that change durable? Not if the disk fails. Is mirrored storage sufficient? How many mirrors are deemed safe? Is a single-site sufficient? How distant do these replicas need to be? One man’s durable is another man’s ephemeral.</p>
<p>Forget the arguments about choosing black or white &#8211; we need grey-scale.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/justonedatabase.wordpress.com/552/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/justonedatabase.wordpress.com/552/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/justonedatabase.wordpress.com/552/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/justonedatabase.wordpress.com/552/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/justonedatabase.wordpress.com/552/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/justonedatabase.wordpress.com/552/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/justonedatabase.wordpress.com/552/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/justonedatabase.wordpress.com/552/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/justonedatabase.wordpress.com/552/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/justonedatabase.wordpress.com/552/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/justonedatabase.wordpress.com/552/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/justonedatabase.wordpress.com/552/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/justonedatabase.wordpress.com/552/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/justonedatabase.wordpress.com/552/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=552&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://blogs.justonedatabase.com/2011/08/02/black-and-white/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d61fb61c2837718606307b2dfe3747fd?s=96&#38;d=http%3A%2F%2Fs0.wp.com%2Fi%2Fmu.gif&#38;r=G" medium="image">
			<media:title type="html">justonedatabase</media:title>
		</media:content>
	</item>
		<item>
		<title>Cover up</title>
		<link>http://blogs.justonedatabase.com/2011/07/28/cover-up/</link>
		<comments>http://blogs.justonedatabase.com/2011/07/28/cover-up/#comments</comments>
		<pubDate>Thu, 28 Jul 2011 11:36:34 +0000</pubDate>
		<dc:creator>Duncan</dc:creator>
				<category><![CDATA[Relational Database]]></category>
		<category><![CDATA[B-Tree]]></category>
		<category><![CDATA[Bit Map]]></category>
		<category><![CDATA[Column Store]]></category>
		<category><![CDATA[Covering Index]]></category>
		<category><![CDATA[Data Structures]]></category>
		<category><![CDATA[EVI]]></category>
		<category><![CDATA[Fractal Tree]]></category>
		<category><![CDATA[Index]]></category>
		<category><![CDATA[Inverted List]]></category>
		<category><![CDATA[LSM-Tree]]></category>
		<category><![CDATA[Row Projection]]></category>
		<category><![CDATA[Row Store]]></category>
		<category><![CDATA[Trie]]></category>

		<guid isPermaLink="false">http://blogs.justonedatabase.com/?p=536</guid>
		<description><![CDATA[A traditional relational database falls into one of two camps – it is either a row store or a column store. These are both intuitive arrangements for storing a table structure, where either each row or each column is stored &#8230; <a href="http://blogs.justonedatabase.com/2011/07/28/cover-up/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=536&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>A traditional relational database falls into one of two camps – it is either a row store or a column store. These are both intuitive arrangements for storing a table structure, where either each row or each column is stored contiguously. The idea being that if you want to fetch a row, you will find it in one place or if you want to scan a column you will find all of the column values in one place.</p>
<p>Of course, the problem with both storage models is that they are overly simplistic and neither helps you find relevant rows in a selective OLTP query or eliminate irrelevant rows in a collective analytical query and the only way to make these storage models effective is to layer indexes on top or to throw more hardware at them.</p>
<p>Indexes are very familiar in a row store &#8211; but they also appear in column stores as row projections where multiple columns are stitched together to avoid the high cost of row re-construction; and more familiar index structures can also be found in some column stores too, for filtering columns and for joining between tables.</p>
<p>In its most general sense, any index can be considered to be a store of key and address pairs where both the key and the address may be stored either explicitly or implicitly for an index entry. So for example, in a key comparative structure (such as a B-Tree, LSM tree, fractal tree etc) both the key and the address are explicitly recorded in the index entry; whereas in a bit map, both the key and the address are implied from their position within the index. Naturally, a variety of possible index structures covers all four possible combinations  as shown in the table below (which is not an exhaustive list of structures by any means).</p>
<p style="text-align:left;"><a href="http://justonedatabase.files.wordpress.com/2011/07/cover-up1.jpg"><img class="aligncenter size-full wp-image-535" title="Cover up1" src="http://justonedatabase.files.wordpress.com/2011/07/cover-up1.jpg?w=500" alt=""   /></a>The advantage of an index with an implicit address is that the row address order will be preserved and scans across multiple indexes against the same table can be efficiently resolved to yield a combined result. Thus with bit map indexes, multiple indexes can be logically conjoined or disjoined for the same table without any need to sort/merge explicit addresses. With these types of indexes there is rarely any need for a composite key index because separate indexes can be easily combined into a single composite result.</p>
<p>An index with an explicit address will also typically retain the address order for a single and specific key value (since there is typically no advantage to changing the address order). Thus, with an inverted list we can easily merge the results from different lists; and similarly for a B-Tree, where we choose one particular key from each index we can merge the results directly because the address order is preserved.</p>
<p>However, when we attempt to scan an index with an explicit key and address for multiple keys, the addresses may no longer be in row address order and we have to either sort the addresses or create an ordered address map (such as a bit map) for each index to be able to merge addresses across indexes on the same table. Naturally, as a query covers more of the key domain, the more addresses must be mapped or sorted prior to merge and the less trivial this task becomes.</p>
<p>However, the important point here is that as soon as a query covers <em>more than one key</em> in an explicit key and address index, the overhead on index merging changes significantly.</p>
<p>Why is this important? Well, it is widely assumed, that an index built with composite keys (that is, multiple keys concatenated together) covers and removes the need for indexes that might be built on a subset of those keys. A classic example of this is building a compound B-Tree index on the department number and employee salary column in an employees table. This index might be used to find employees in a specific department in a certain salary range. Since we have already included the department number, this index can also be used to find all employees in a specific department regardless of their salary. Therefore, there is no need to build a separate department number index because the composite index will suffice. Right? Not quite. Whereas an index built solely on department number can be easily merged with an indexed built solely on employee gender to find all male employees in a specific department; the composite index requires the employees in the queried department to be sorted or address mapped before they can be merged with the gender index.</p>
<p>Worse still, while index scans that retain address order can be easily decomposed into an arbitrary number of address partitions across the address space with all partitions scanned and merged in parallel, this is no longer immediately possible if one or more the indexes has to be sorted or mapped first.</p>
<p>Therefore, the idea that a row store can be made analytical by overlaying covering indexes is only effective where those indexes <em>precisely</em> match the query requirements &#8211; but to cover all those requirements typically requires an awful lot of indexes.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/justonedatabase.wordpress.com/536/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/justonedatabase.wordpress.com/536/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/justonedatabase.wordpress.com/536/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/justonedatabase.wordpress.com/536/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/justonedatabase.wordpress.com/536/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/justonedatabase.wordpress.com/536/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/justonedatabase.wordpress.com/536/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/justonedatabase.wordpress.com/536/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/justonedatabase.wordpress.com/536/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/justonedatabase.wordpress.com/536/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/justonedatabase.wordpress.com/536/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/justonedatabase.wordpress.com/536/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/justonedatabase.wordpress.com/536/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/justonedatabase.wordpress.com/536/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=536&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://blogs.justonedatabase.com/2011/07/28/cover-up/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d61fb61c2837718606307b2dfe3747fd?s=96&#38;d=http%3A%2F%2Fs0.wp.com%2Fi%2Fmu.gif&#38;r=G" medium="image">
			<media:title type="html">justonedatabase</media:title>
		</media:content>

		<media:content url="http://justonedatabase.files.wordpress.com/2011/07/cover-up1.jpg" medium="image">
			<media:title type="html">Cover up1</media:title>
		</media:content>
	</item>
		<item>
		<title>Divide and ponder</title>
		<link>http://blogs.justonedatabase.com/2011/06/08/divide-and-ponder/</link>
		<comments>http://blogs.justonedatabase.com/2011/06/08/divide-and-ponder/#comments</comments>
		<pubDate>Wed, 08 Jun 2011 08:49:17 +0000</pubDate>
		<dc:creator>Duncan</dc:creator>
				<category><![CDATA[Database]]></category>
		<category><![CDATA[B-Tree]]></category>
		<category><![CDATA[Index]]></category>
		<category><![CDATA[Life-Cycle]]></category>
		<category><![CDATA[Parallel]]></category>
		<category><![CDATA[Partition]]></category>
		<category><![CDATA[Partitioning]]></category>
		<category><![CDATA[Performance]]></category>
		<category><![CDATA[Scalability]]></category>

		<guid isPermaLink="false">http://blogs.justonedatabase.com/?p=475</guid>
		<description><![CDATA[Partitioning is an essential component to achieving scalability in a database server and there are several reasons for adopting partitioning. One reason is to overcome the performance shortcomings of index structures and the following two partitioning  methods can be used &#8230; <a href="http://blogs.justonedatabase.com/2011/06/08/divide-and-ponder/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=475&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Partitioning is an essential component to achieving scalability in a database server and there are several reasons for adopting partitioning.</p>
<p>One reason is to overcome the performance shortcomings of index structures and the following two partitioning  methods can be used to mitigate them</p>
<ul>
<li>An index can be partitioned on time to reduce the active insert area of an index, such that a greater proportion of the active area can be cached than would be possible without partitioning. This can be used to alleviate poor insert and update performance in large indexes.</li>
<li>An index can be partitioned in line with queried keys to reduce the search area of an index, such that a query can eliminate index partitions based on key search criteria. This can be used to reduce query time in large indexes.</li>
</ul>
<p>Clearly, it is unlikely that the different partitioning requirements above will be in alignment and therefore attempting to partition indexes for both fast inserts and fast queries typically creates conflicting partitioning requirements and these schemes require careful design to achieve an optimal partitioning arrangement.</p>
<p>In addition, the following partitioning methods can be used to achieve general scalability by allowing parallel processing and by easing data life-cycle management.</p>
<ul>
<li>A table or index can be abundantly and equitably partitioned to allow arbitrarily parallel and independent operations against it.</li>
<li>A table or index can be partitioned by time divisions to facilitate life cycle management such that old and obsolete partitions can be dropped.</li>
</ul>
<p>In contrast to partitioning used to rescue index performance, these schemes are mutually accommodating and do not conflict with each other; nor do they not require design decisions &#8211; data can be implicitly divided into arbitrarily granular time divisions for lifecycle management and parallel query operations can exploit those partitions at will.</p>
<p>However, while it is beneficial to partition most structures by time division for general scalability and life-cycle management, tree indexes (B-Trees etc) do not behave well in this respect. This is because an index partitioned orthogonally to the keys used in a query requires all of the partitions to be scanned for that query. While adding more partitions may reduce the size of each individual partition, the cost of scanning multiple partitions rises linearly with the number of partitions, yet the search depth of each partition reduces logarithmically with partition size. Hence the more partitions we add, the costlier the query for a tree index.</p>
<p>This increase in retrieval cost for a tree index partitioned orthogonally to a query key over a non-partitioned index is given by P(1-log<sub>N</sub>(P)) where P is the number of partitions and N is the total number of keys. We can see that for any non-trivial value of N, the retrieval cost effectively increases linearly with the number of partitions.</p>
<p>Of course, we can scan those tree index partitions in parallel, but the linear increase in overall cost means we gain nothing – it will take about the same time as it would to scan an unpartitioned index and yet we have to perform a lot more work and consume many more resources to achieve much the same result.</p>
<p>So, while a tree index will offer logarithmic scaling for specific queries, when general scalability requirements impose query independent partitioning upon it, that benefit is lost.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/justonedatabase.wordpress.com/475/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/justonedatabase.wordpress.com/475/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/justonedatabase.wordpress.com/475/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/justonedatabase.wordpress.com/475/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/justonedatabase.wordpress.com/475/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/justonedatabase.wordpress.com/475/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/justonedatabase.wordpress.com/475/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/justonedatabase.wordpress.com/475/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/justonedatabase.wordpress.com/475/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/justonedatabase.wordpress.com/475/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/justonedatabase.wordpress.com/475/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/justonedatabase.wordpress.com/475/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/justonedatabase.wordpress.com/475/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/justonedatabase.wordpress.com/475/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=475&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://blogs.justonedatabase.com/2011/06/08/divide-and-ponder/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d61fb61c2837718606307b2dfe3747fd?s=96&#38;d=http%3A%2F%2Fs0.wp.com%2Fi%2Fmu.gif&#38;r=G" medium="image">
			<media:title type="html">justonedatabase</media:title>
		</media:content>
	</item>
		<item>
		<title>Gone in a flash</title>
		<link>http://blogs.justonedatabase.com/2011/06/02/gone-in-a-flash/</link>
		<comments>http://blogs.justonedatabase.com/2011/06/02/gone-in-a-flash/#comments</comments>
		<pubDate>Thu, 02 Jun 2011 11:22:24 +0000</pubDate>
		<dc:creator>Duncan</dc:creator>
				<category><![CDATA[Hardware]]></category>
		<category><![CDATA[Architecture]]></category>
		<category><![CDATA[CPU]]></category>
		<category><![CDATA[CPU Cache]]></category>
		<category><![CDATA[Flash]]></category>
		<category><![CDATA[Hard Disk]]></category>
		<category><![CDATA[Memory]]></category>
		<category><![CDATA[Non-Volatile Memory]]></category>
		<category><![CDATA[Storage]]></category>

		<guid isPermaLink="false">http://blogs.justonedatabase.com/?p=449</guid>
		<description><![CDATA[Flash storage has become mainstream. But is it here to stay? Probably not. There is a whole raft of non-volatile memory technologies snapping at the heels of flash memory. These new technologies use different effects such as magnetic, resistive and &#8230; <a href="http://blogs.justonedatabase.com/2011/06/02/gone-in-a-flash/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=449&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Flash storage has become mainstream. But is it here to stay? Probably not.</p>
<p>There is a whole raft of non-volatile memory technologies snapping at the heels of flash memory. These new technologies use different effects such as magnetic, resistive and material phase effects to store data rather than the charge effects used by flash memory. These new technologies include Phase Change Memory, Spin Torque Transfer Memory, Resistive Memory and Racetrack Memory and some of these are already in production.</p>
<p>The important thing about these new technologies is that they are all much faster than flash, they all offer greater density and have much lower power requirements than flash. In many cases, these new forms of memory promise orders of magnitude greater speed and density. In fact, we are likely to see non-volatile memory that is both faster and more dense than current DRAM.</p>
<p>In the longer term, these technologies have the potential to change the architecture of computing whereby storage is memory and is integrated with the CPU. It may be that memory becomes sufficiently dense and close to the CPU that caching becomes unnecessary. That makes CPU cores a whole lot simpler and removes the need for pre-fetching, branch predictions, pipelining or super-scaling. Simpler cores can mean faster cores and many more cores. We may be looking at an architecture of interconnected memory modules with integrated cores (and no storage).</p>
<p>In fact, recent research has proposed the idea that resistive memory can perform logic operations in addition to functioning as memory. That could create a very new and challenging architecture indeed.</p>
<p>Ultimately, we will probably need to forget about making software cache efficient (because there won&#8217;t be one) but we will need to ensure that software is embarrassingly parallel instead.</p>
<p>In the meantime, flash will have its decade and will become temporarily ubiquitous in mobile devices. What about enterprise storage? Can it replace hard disk in a decade? There is so much hard disk storage out there and new disk remains such a cheap option that flash can probably only become dominant as a storage cache to accelerate mechanical systems &#8211; before it is overtaken by one of these rapidly emerging technologies.</p>
<p>Flash storage. RIP.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/justonedatabase.wordpress.com/449/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/justonedatabase.wordpress.com/449/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/justonedatabase.wordpress.com/449/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/justonedatabase.wordpress.com/449/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/justonedatabase.wordpress.com/449/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/justonedatabase.wordpress.com/449/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/justonedatabase.wordpress.com/449/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/justonedatabase.wordpress.com/449/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/justonedatabase.wordpress.com/449/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/justonedatabase.wordpress.com/449/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/justonedatabase.wordpress.com/449/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/justonedatabase.wordpress.com/449/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/justonedatabase.wordpress.com/449/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/justonedatabase.wordpress.com/449/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=449&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://blogs.justonedatabase.com/2011/06/02/gone-in-a-flash/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d61fb61c2837718606307b2dfe3747fd?s=96&#38;d=http%3A%2F%2Fs0.wp.com%2Fi%2Fmu.gif&#38;r=G" medium="image">
			<media:title type="html">justonedatabase</media:title>
		</media:content>
	</item>
		<item>
		<title>A blunt tool</title>
		<link>http://blogs.justonedatabase.com/2011/05/25/an-inappropriate-tool/</link>
		<comments>http://blogs.justonedatabase.com/2011/05/25/an-inappropriate-tool/#comments</comments>
		<pubDate>Wed, 25 May 2011 08:54:56 +0000</pubDate>
		<dc:creator>Duncan</dc:creator>
				<category><![CDATA[Relational Database]]></category>
		<category><![CDATA[B-Tree]]></category>
		<category><![CDATA[Data Structures]]></category>
		<category><![CDATA[Primary Key]]></category>

		<guid isPermaLink="false">http://blogs.justonedatabase.com/?p=432</guid>
		<description><![CDATA[B-Trees are regularly used by relational databases to enforce primary key constraints. Let’s look at what we know about a primary key… It is unique The semantic ordering of a primary key is irrelevant (we do not care that one &#8230; <a href="http://blogs.justonedatabase.com/2011/05/25/an-inappropriate-tool/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=432&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>B-Trees are regularly used by relational databases to enforce primary key constraints.</p>
<p>Let’s look at what we know about a primary key…</p>
<ul>
<li>It is unique</li>
<li>The semantic ordering of a primary key is irrelevant (we do not care that one primary key is more or less than any another)</li>
</ul>
<p>But what properties does a B-Tree have in these respects?</p>
<ul>
<li>The structure does not implicitly enforce uniqueness (you have to find a key first and then make a separate judgement).</li>
<li>The structure rigorously enforces semantic ordering upon the keys.</li>
</ul>
<p>At first glance, the B-Tree does not sound like the best tool for the job and on  closer inspection things don&#8217;t get any better&#8230;</p>
<p>For each key inspected in random order, we have to visit a block at every level of the B-Tree and for each of those blocks that we visit we then have to perform either a binary search across the block (at best) or iterate sequentially through it (at worst). So even for a modest population  of 1 billion keys, using a B-Tree with 3 levels and 1000 entries per block will require somewhere between 30 and 3000 memory transfers just to find one key &#8211; and that’s on a good day i.e. when it happens to be completely cached within several gigabytes of memory.</p>
<p>Of course, the B-Tree is also available to service queries too. But the semantic ordering enforced by the B-Tree index is irrelevant to a query navigating a primary/foreign key relationship and the key order doesn&#8217;t assist with either nested loop or hash join processing. While the key order can be exploited by a merge join, the merge join is rarely a strategy of first choice and that limited advantage evaporates if additional query predicates need to be applied to the same table that holds the primary key index.</p>
<p>In essence, the B-Tree contains redundant information which is maintained at a high cost but yields minimal benefit. That&#8217;s not a good cost/benefit ratio.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/justonedatabase.wordpress.com/432/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/justonedatabase.wordpress.com/432/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/justonedatabase.wordpress.com/432/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/justonedatabase.wordpress.com/432/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/justonedatabase.wordpress.com/432/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/justonedatabase.wordpress.com/432/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/justonedatabase.wordpress.com/432/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/justonedatabase.wordpress.com/432/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/justonedatabase.wordpress.com/432/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/justonedatabase.wordpress.com/432/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/justonedatabase.wordpress.com/432/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/justonedatabase.wordpress.com/432/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/justonedatabase.wordpress.com/432/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/justonedatabase.wordpress.com/432/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=432&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://blogs.justonedatabase.com/2011/05/25/an-inappropriate-tool/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d61fb61c2837718606307b2dfe3747fd?s=96&#38;d=http%3A%2F%2Fs0.wp.com%2Fi%2Fmu.gif&#38;r=G" medium="image">
			<media:title type="html">justonedatabase</media:title>
		</media:content>
	</item>
		<item>
		<title>Up and out</title>
		<link>http://blogs.justonedatabase.com/2011/05/16/up-and-out/</link>
		<comments>http://blogs.justonedatabase.com/2011/05/16/up-and-out/#comments</comments>
		<pubDate>Mon, 16 May 2011 12:34:22 +0000</pubDate>
		<dc:creator>Duncan</dc:creator>
				<category><![CDATA[Database]]></category>
		<category><![CDATA[Hardware]]></category>
		<category><![CDATA[Software]]></category>
		<category><![CDATA[Architecture]]></category>
		<category><![CDATA[Blocking]]></category>
		<category><![CDATA[Cache Coherency]]></category>
		<category><![CDATA[CPU]]></category>
		<category><![CDATA[CPU Cache]]></category>
		<category><![CDATA[Lock Free]]></category>
		<category><![CDATA[Non Blocking]]></category>
		<category><![CDATA[Obstruction Free]]></category>
		<category><![CDATA[Performance]]></category>
		<category><![CDATA[Scale Out]]></category>
		<category><![CDATA[Scale Up]]></category>
		<category><![CDATA[Wait Free]]></category>

		<guid isPermaLink="false">http://blogs.justonedatabase.com/?p=418</guid>
		<description><![CDATA[Scaling out a database across a network of database servers requires an architecture which equitably distributes independent tasks across the network; where equitable distribution is required to ensure balanced use of the resources available and independence is required to minimise &#8230; <a href="http://blogs.justonedatabase.com/2011/05/16/up-and-out/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=418&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Scaling out a database across a network of database servers requires an architecture which <em>equitably</em> distributes <em>independent</em> tasks across the network; where equitable distribution is required to ensure balanced use of the resources available and independence is required to minimise communication between those tasks. This latter point is important because communication can create serialization between tasks and can saturate the network bandwidth as the number of tasks is increased. Both of these aspects hurt scalability.</p>
<p>Serialization is undesirable, because it forces tasks to wait on one another and a waiting task is an unproductive one &#8211; so serialization reduces the effectiveness of scale out. Network bandwidth saturation creates a ceiling for the number of tasks that can be executed simultaneously because adding more tasks just starves the remaining tasks of bandwidth &#8211; so bandwidth saturation limits the extent of scale out. Hence, both serialization and saturation make the architecture less scalable.</p>
<p>Of course, there has to be some communication at the start and end of each task if a task is to perform any useful general-purpose work; but each task needs to operate independently between those communication points to achieve scalability. The Map-Reduce framework exemplifies this approach wherein the map phase distributes the tasks which operate independently until completion and then the reduce phase consolidates the results.</p>
<p>Interestingly, a similar approach is also required within a single database server. It is rare to find a server with a single CPU core these days (outside of the mobile computing environment) and to scale up performance the database must equitably distribute independent tasks across the CPU cores. Again communication between the cores must be kept to a minimum to avoid serialization between tasks, memory bandwidth saturation and cache coherency traffic. Let’s take a closer look at that last point.</p>
<p>In a multi-core environment, each core will have a private cache as well as a shared cache to overcome the latencies experienced with memory access. The private caches will be small but will execute at near CPU speed while the shared cache will be larger and slower but still much faster than normal memory access. Indeed, memory access is likely to be non-uniform in a multi-core environment and different cores will likely experience different latencies when accessing the same memory location. These caches attempt to overcome memory latency and provide a cache hierarchy whereby content works its way from memory to shared cache to private cache for a memory read access and vice versa for a memory write access. Therefore, an update to a memory address starts in the private cache of a core and is not immediately visible to the other cores (because it is a private cache) and to avoid consistency problems with memory access across multiple cores, a cache coherency protocol has to be observed. A typical protocol will invalidate the content of a memory address in a private cache when the corresponding address is updated in the private cache of another CPU core and will force a flush of the memory address from the updated private cache into the shared cache and possibly memory also. Any core attempting to access their invalidated private cache location will then be forced to fetch the content again from the shared cache and/or memory. The cache coherency protocol involves traffic between cores and/or snooping between cores (to see which cores have accessed which memory locations) and forces access to slower shared cache or memory when a memory address is invalidated by another core. In other words, cache coherency is an expensive protocol which hits core performance and the only way to minimise the cache coherency overhead is to avoid updating the content of memory locations shared between cores wherever possible. Therefore, not only can communication between tasks create serialization and saturate memory bandwidth it will also scupper core performance through the burden of cache coherency.</p>
<p>Note that sharing a memory location across cores for read only access is perfectly fine. In this case, each private cache will get its own copy of the data and provided no core updates it, the cache coherency protocol need never be invoked for it. Problems only arise when a core updates the content of a shared memory address.</p>
<p>Inevitably, there will be some updating of memory shared between cores and there is a plethora of research regarding blocking, obstruction-free, lock-free and wait-free algorithms for dealing with shared data across contenting threads. However, outside of the research community there is some misunderstanding about the effects and benefits of these algorithms, so let’s take a closer look.</p>
<p>A blocking algorithm is the classic approach that places a guard or latch around a critical section to prevent more than one thread accessing the critical section at the same time. It typically uses a low level mutex or semaphore for this controlled access and these structures deliberately force serialized access to critical sections and hence serialization between tasks. Ultimately, a semaphore is a shared memory location with atomic test and increment/decrement operations. The use of semaphores becomes particularly unfortunate if it is used to count the number of concurrent readers of a critical section to prevent concurrent read/write access; in this case every read of the critical section may invoke cache coherency traffic even though the shared critical section is only being read and not updated at all.</p>
<p>Moreover, a thread holding a block on a critical section may be suspended and cause active threads using the same critical section to wait until the controlling thread becomes active again. Even worse, a block may be accidentally left in place if a controlling thread abnormally terminates. Hence, with a blocking protocol, the progress of any single thread and the system overall is heavily dependent upon the activities of all of the threads sharing the same critical section.</p>
<p>In light of this, the following non-blocking algorithms can be used as an alternative &#8211; they are designed to provide a guarantee about the progress of one or more threads competing for shared data.</p>
<ul>
<li>An obstruction-free algorithm guarantees that a hibernating thread cannot block an active thread from accessing the same data;</li>
<li>A lock-free algorithm guarantees that at least one active thread will make (genuine) progress amongst multiple active threads accessing the same data;</li>
<li>A wait-free algorithm guarantees that all active threads will make (genuine) progress when accessing the same data.</li>
</ul>
<p>Such algorithms are generally available for common structures such as stacks and queues etc. But they still require the use of atomic increment/decrement, test and exchange instructions operating on shared memory locations. Hence they still incur the burden of cache coherency and still remain limited by memory access bandwidth. They only alleviate the serialization experienced between threads. Nothing more.</p>
<p>Indeed, the benefit of the reduced serialization can incur additional cost in memory and processing cycles.  As the serialization guarantee gets more stringent, the cost of that guarantee also increases. Some phrase involving the words lunch and free comes to mind.</p>
<p>Clearly, the wait-free algorithm is the most stringent of all and only makes sense when threads accessing the same data are performing logically independent operations (albeit against the same physical structure). For example, if I push an item into a queue at the same time you push another independent item into the same queue, these are logically independent operations and both can progress independently. In effect, two versions of the queue are created (requiring more memory) and subsequently merged (requiring more processing) and memory reclaimed (requiring garbage collection) to achieve progress for both operations simultaneously. (Note that there is an analogy here with eventual consistency in a scaled-out architecture.)</p>
<p>A lock-free algorithm will cause non-progressing threads to spin around with repeated unsuccessful attempts at accessing the shared data and likely incurring cache coherency overhead on the progressing thread.</p>
<p>An obstruction-free algorithm may allow optimistic access to shared data and force unsuccessful threads to clean-up their misdemeanours after the fact.</p>
<p>Hence the use of the more stringent non-blocking algorithms does not deliver the best overall throughput. In particular, the wait-free guarantee is very expensive and is usually confined to real-time environments where any given thread cannot afford to wait too long.</p>
<p>Therefore a database architecture wishing to scale up effectively will use lock-free or obstruction-free algorithms to reduce serialization to a reasonable degree while maintaining a high overall throughput.</p>
<p>But more importantly, these non-blocking protocols are not a magic bullet and regardless of the protocols used, it is far more important to minimise the use of communication as much as possible. Less communication between cores means better scale-up and a database designed to scale-up well, will need to be scaled out a lot less than one that isn’t.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/justonedatabase.wordpress.com/418/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/justonedatabase.wordpress.com/418/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/justonedatabase.wordpress.com/418/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/justonedatabase.wordpress.com/418/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/justonedatabase.wordpress.com/418/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/justonedatabase.wordpress.com/418/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/justonedatabase.wordpress.com/418/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/justonedatabase.wordpress.com/418/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/justonedatabase.wordpress.com/418/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/justonedatabase.wordpress.com/418/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/justonedatabase.wordpress.com/418/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/justonedatabase.wordpress.com/418/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/justonedatabase.wordpress.com/418/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/justonedatabase.wordpress.com/418/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=418&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://blogs.justonedatabase.com/2011/05/16/up-and-out/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d61fb61c2837718606307b2dfe3747fd?s=96&#38;d=http%3A%2F%2Fs0.wp.com%2Fi%2Fmu.gif&#38;r=G" medium="image">
			<media:title type="html">justonedatabase</media:title>
		</media:content>
	</item>
		<item>
		<title>Permit or forgive?</title>
		<link>http://blogs.justonedatabase.com/2011/04/25/permit-or-forgive/</link>
		<comments>http://blogs.justonedatabase.com/2011/04/25/permit-or-forgive/#comments</comments>
		<pubDate>Mon, 25 Apr 2011 18:25:28 +0000</pubDate>
		<dc:creator>Duncan</dc:creator>
				<category><![CDATA[Relational Database]]></category>
		<category><![CDATA[Software]]></category>
		<category><![CDATA[Collision Detection]]></category>
		<category><![CDATA[Collision Prevention]]></category>
		<category><![CDATA[Collisions]]></category>
		<category><![CDATA[Contention]]></category>
		<category><![CDATA[Locking]]></category>
		<category><![CDATA[Optimistic Locking]]></category>
		<category><![CDATA[Performance]]></category>
		<category><![CDATA[Pessimistic Locking]]></category>
		<category><![CDATA[Updates]]></category>

		<guid isPermaLink="false">http://blogs.justonedatabase.com/?p=402</guid>
		<description><![CDATA[This post is about row update locking within a relational database instance. But before we delve into the nuances of locking, it is important to lay out the context of this discussion. We will be only talking about locking for &#8230; <a href="http://blogs.justonedatabase.com/2011/04/25/permit-or-forgive/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=402&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>This post is about row update locking within a relational database instance.</p>
<p>But before we delve into the nuances of locking, it is important to lay out the context of this discussion.</p>
<p>We will be only talking about locking for the purposes of resolving <em>competing updates</em> to the same data. The practice of a writer locking out reads for the same data (or vice versa) is unnecessary; similarly locking a whole table for one or more row updates is beyond rudimentary. Both practices have a devastating effect on performance amongst concurrent users and frankly, if you are running a shared database that invokes either of these dubious practices then it’s time to move on, you can do a lot better. OK, now that’s out of the way, let’s get into the detail of row update locking.</p>
<p>Locking can be pessimistic or optimistic – in the former we prevent any nastiness happening from competing updates and in the latter we clean up any nastiness that did happen because we didn’t stop it. In that sense, locking can be thought of as collision prevention (pessimistic locking with no nastiness) or collision detection (optimistic locking with nastiness clean-up).</p>
<p>There has been some debate over which type of locking/collision management affords the best performance and because it depends on so many factors, databases typically offer a choice. But how do you choose?</p>
<p>The first factor to be considered is the amount of <em>real</em> contention that goes on. If the probability that any two users are <em>likely</em> to update the same row at the same time then prevention is more efficient than clean up; whereas if real contention is exceptional then it may be more efficient to deal with the consequences of a collision rather than prevent it.</p>
<p>It’s a bit like operating traffic lights at a road junction. If the traffic is sufficiently light you can get away with turning the lights off and clearing up after occasional accidents – but if the traffic gets heavy you’d better turn them back on to avoid carnage.</p>
<p>If you really don’t know what the likelihood of a collision is, then it’s probably better to opt for prevention for all the reasons about to be outlined.</p>
<p>Collision detection (optimistic locking) requires the application using the database to deal with the consequences of a collision. If updates from two users do collide, then one of them has to be denied their update and informed that the data has changed so that they can choose to re-submit an action, change an action or cancel it entirely. Realistically, the database cannot decide on the user’s behalf and the user or application will have to resolve it.</p>
<p>Collision detection is easy for a database that performs an update-in-place (see <a title="Update-in-place vs. append semantics" href="http://blogs.justonedatabase.com/2011/04/19/tradition/">previous post</a>). In this case, writers place a new version number against each row updated so that a process can take note of a row version at read time and see if it has changed at update time (which implies a collision happened since the read). But collision detection gets a whole lot harder for a database that appends updates. Here, the original version of the data never changes and the update is placed elsewhere. Hence a writer has to go look and see if there have been any colliding appends since the original version was read and if there is a lot of update activity this can become a costly overhead. Plus what happens if the collision occurs between the collision check and the append? We now have to marshal access to the append area to prevent this and it starts to sound a lot more like collision prevention.</p>
<p>Ideally, any form of collision prevention (pessimistic locking) would be sufficiently fast and lightweight such that it presents a marginal cost in cases where genuine contention does not occur. But pessimistic locking is often considered a major overhead for a number of reasons.</p>
<p>It may be that lock requests become serialised because they are managed by a centralised lock manager and processes that have no natural contention are then forced to wait on each other.</p>
<p>It may be that locks are such a limited resource that processes have to wait for their availability or enlarge the granularity of each lock in order to use less of them. Again, this creates artificial contention which will hit performance.</p>
<p>However, these performance issues arise from the locking implementations used and not from the principle of pessimistic locking itself.</p>
<p>Then there are deadlocks. A deadlock occurs where two or more processes hold mutual locks between each other such that no process can proceed, unless one or more processes relinquish one or more locks allowing the other processes to continue.</p>
<p>If there’s only one knife and one fork on a dining table and I grab the fork at the same time you grab the knife, then neither of us can eat until one of us gives up an item of cutlery. We could be obstinate and both starve; or we could be altruistic and both starve; or we could defer to the choice of a third party in which case only one of us will starve. In the same way, deadlock detection has to choose which process will be forced to relinquish its locks and will use a graph of processes and locks to determine which process has the least to lose and the most to offer. This can be complicated and time consuming if the graph is large with lots of locks to consider.</p>
<p>However, if locks are sufficiently granular then deadlocks can only occur where there is genuine conflict and then it is quite reasonable to back out one or more of the conflicting parties in that case.</p>
<p>But if each party requests all of its locks through a single atomic request which is either granted or denied in whole then deadlocks cannot occur between those parties. In our dining example, we can each make a single request &#8211; for both a knife and a fork; one of us will succeed and the other will fail – but there is no deadlock. In the context of updating rows in a table, the rows to be updated are identified first and then locked atomically – rather than requesting locks incrementally. Deadlocks could still arise where multiple tables are being updated because each set of locks for each table will be requested separately. However, the deadlock graph is much simpler because it need only consider a handful of table dependencies rather than thousands of individual lock dependencies and this makes deadlock detection a much faster proposition.</p>
<p>Plus requesting locks in a bulk manner will minimise the number of requests made &#8211; which in turn reduces request serialization and amortizes the cost of lock requests.</p>
<p>Then there are a couple of classic red herrings often raised about pessimistic locking…</p>
<p>A user who can issue ad-hoc SQL, locks a set of rows for update and then goes off to lunch before finishing his transaction &#8211; leaving anybody else needing to update the same rows waiting until he returns later in the afternoon.</p>
<p>If this is just a test system, then who cares? If this is a production system then who allows users access to the SQL prompt anyway? If you do that kind of thing on a production system then it is probably already toast for a whole bunch of reasons unrelated to locking.</p>
<p>Another objection is that long-lasting web transactions may disconnect and leave table rows locked.</p>
<p>The clue to the misdirection here is the phrase long-lasting web transaction. Nobody (of sane mind) designs a web transaction to be long-lasting or allows it to span across temporary database connections. Web transactions should be encased in procedures such that they either happen or don’t &#8211; they can’t partially happen. Browsing available stock, selecting a stock item and purchasing a stock item are all separate and independent transactions. You never lock rows in a database for the period between adding an item to a cart and paying for it.</p>
<p>This all rather begs the question of why would you use collision detection (optimistic locking) with append semantics if you also have a fast and efficient method of preventing collisions? You wouldn’t.</p>
<p>It may be easier to ask for forgiveness rather than permission &#8211; but only if you’re not doing it all the time.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/justonedatabase.wordpress.com/402/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/justonedatabase.wordpress.com/402/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/justonedatabase.wordpress.com/402/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/justonedatabase.wordpress.com/402/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/justonedatabase.wordpress.com/402/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/justonedatabase.wordpress.com/402/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/justonedatabase.wordpress.com/402/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/justonedatabase.wordpress.com/402/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/justonedatabase.wordpress.com/402/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/justonedatabase.wordpress.com/402/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/justonedatabase.wordpress.com/402/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/justonedatabase.wordpress.com/402/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/justonedatabase.wordpress.com/402/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/justonedatabase.wordpress.com/402/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blogs.justonedatabase.com&#038;blog=19003202&#038;post=402&#038;subd=justonedatabase&#038;ref=&#038;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://blogs.justonedatabase.com/2011/04/25/permit-or-forgive/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d61fb61c2837718606307b2dfe3747fd?s=96&#38;d=http%3A%2F%2Fs0.wp.com%2Fi%2Fmu.gif&#38;r=G" medium="image">
			<media:title type="html">justonedatabase</media:title>
		</media:content>
	</item>
	</channel>
</rss>