This article outlines how to create a realtime heatmap of your syscall latency using HTML5, some great node modules, and DTrace. It was inspired by talk that Bryan Cantrill and Brendan Greg gave on Joyent's cool cloud analytics tools. While specific, the code provided could easily be adapted to provide a heatmap of any type of aggregation Dtrace is capable of providing.
System Requirements
First thing's first, you're going to need a system with DTrace. This likely means Solaris (or one of its decedents), OS X, or a BSD variant. There doesn't appear to be Dtrace available for Linux.
Security
Secondly, please be aware that at the time of writing the demo code contains a fairly substantial secruity vulnerabilty. Namely the d script is sent from the client with no authentication whatsoever. If you bind to localhost this shouldn't be a big deal for a demo. Time permitting I intend to clean up the code.
Dependencies
For this tutorial you'll also need:
node - http://nodejs.org/#download (duh)
npm - https://github.com/isaacs/npm (makes installing modules a breeze)
node-libdtrace - https://github.com/bcantrill/node-libdtrace (provides dtrace functionality)
Socket.IO - 'npm install socket.io' (web sockets made easy)
Server
Now we're ready to start writing our web server:
var http = require('http');
var libdtrace = require('libdtrace');
var io = require('socket.io');
var express = require('express');
/* create our express server and prepare to serve javascript files in ./public
*/
var app = express.createServer();
app.configure(function(){
app.use(express.staticProvider(__dirname + '/public'));
});
/* Before we go any further we must realize that each time a user connects we're going to want to
them send them dtrace aggregation every second. We can do so using 'setInterval', but we must
keep track of both the intervals we set and the dtrace consumers that are created as we'll need
them later when the client disconnects.
*/
var interval_id_by_session_id = {};
var dtp_by_session_id = {};
/* In order to effecienctly send packets we're going to use the Socket.IO library which seemlessly
integrates with express.
*/
var websocket_server = io.listen(app);
/* Now that we have a web socket server, we need to create a handler for connection events. These
events represet a client connecting to our server */
websocket_server.on('connection', function(socket) {
/* Like the web server object, we must also define handlers for various socket events that
will happen during the lifetime of the connection. These will define how we interact with
the client. The first is a message event which occurs when the client sends something to
the server. */
socket.on( 'message', function(message) {
/* The only message the client ever sends will be sent right after connecting.
So it will happen only once during the lifetime of a socket. This message also
contains a d script which defines an agregation to walk.
*/
var dtp = new libdtrace.Consumer();
var dscript = message['dscript'];
console.log( dscript );
dtp.strcompile(dscript);
dtp.go();
dtp_by_session_id[socket.sessionId] = dtp;
/* All that's left to do is send the aggration data from the dscript. */
interval_id_by_session_id[socket.sessionId] = setInterval(function () {
var aggdata = {};
try {
dtp.aggwalk(function (id, key, val) {
for( index in val ) {
/* console.log( 'key: ' + key + ', interval: ' +
val[index][0][0] + '-' + val[index][0][1], ', count ' + val[index][1] ); */
aggdata[key] = val;
}
} );
socket.send( aggdata );
} catch( err ) {
console.log(err);
}
}, 1001 );
} );
/* Not so fast. If a client disconnects we don't want their respective dtrace consumer to
keep collecting data any more. We also don't want to try to keep sending anything to them
period. So clean up. */
socket.on('disconnect', function(){
clearInterval(clearInterval(interval_id_by_session_id[socket.sessionId]));
var dtp = dtp_by_session_id[socket.sessionId];
delete dtp_by_session_id[socket.sessionId];
dtp.stop();
console.log('disconnected');
});
} );
app.listen(80);
Client
In order to display our heatmap, we're going to need some basic HTML with a canvas element:
<html>
<head>
<script src="http://localhost/socket.io/socket.io.js"></script>
<script src="http://localhost/heat_tracer_client.js"></script>
</head>
<body onLoad='heat_tracer()'>
<canvas id='canvas' width='1024' height='512'></canvas>
</body>
</html>
Finally the JavaScript client which translates the raw streaming data into pretty picture:
/* On load we create our web socket (or flash socket if your browser doesn't support it ) and
send the d script we wish to be tracing. This extremely powerful and *insecure*. */
function heat_tracer() {
//Global vars
setup();
var socket = new io.Socket('localhost'); //connect to localhost presently
socket.connect();
socket.on('connect', function(){
console.log('on connection');
var dscript = "syscall:::entry\n{\nself->syscall_entry_ts[probefunc] = vtimestamp;\n}\nsyscall:::return\n/self->syscall_entry_ts[probefunc]/\n{\n\n@time[probefunc] = lquantize((vtimestamp - self->syscall_entry_ts[probefunc] ) / 1000, 0, 63, 2);\nself->syscall_entry_ts[probefunc] = 0;\n}";
socket.send( { 'dscript' : dscript } );
});
/* The only messages we recieve should contain contain the dtrace aggregation data we requested
on connection. */
socket.on('message', function(message){
//console.log( message );
draw(message);
/* for ( key in message ) {
val = message[key];
console.log( 'key: ' + key + ', interval: ' + val[0][0] + '-' + val[0][1], ', count ' + val[1] );
}
*/
});
socket.on('disconnect', function(){
});
}
/* Take the aggregation data and update the heatmap */
function draw(message) {
/* Latest data goes in the right most column, initialize it */
var syscalls_by_latency = [];
for ( var index = 0; index < 32; index++ ) {
syscalls_by_latency[index] = 0;
}
/* Presently we have the latency for each system call quantized in our message. Merge the data
such that we have all the system call latency quantized together. This gives us the number
of syscalls made with latencies in each particular band. */
for ( var syscall in message ) {
var val = message[syscall];
for ( result_index in val ) {
var latency_start = val[result_index][0][0];
var count = val[result_index][1];
/* The d script we're using lquantizes from 0 to 63 in steps of two. So dividing by 2
tells us which row this result belongs in */
syscalls_by_latency[Math.floor(latency_start/2)] += count;
}
}
/* We just created a new column, shift the console to the left and add it. */
console_columns.shift();
console_columns.push(syscalls_by_latency);
drawArray(console_columns);
}
/* Draw the columns and rows that map up the heatmap on to the canvas element */
function drawArray(console_columns) {
var canvas = document.getElementById('canvas');
if (canvas.getContext) {
var ctx = canvas.getContext('2d');
for ( var column_index in console_columns ) {
var column = console_columns[column_index];
for ( var entry_index in column ) {
entry = column[entry_index];
/* We're using a logarithmic scale for the brightness. This was all arrived at by
trial and error and found to work well on my Mac. In the future this
could all be adjustable with controls */
var red_value = 0;
if ( entry != 0 ) {
red_value = Math.floor(Math.log(entry)/Math.log(2));
}
//console.log(red_value);
ctx.fillStyle = 'rgb(' + (red_value * 25) + ',0,0)';
ctx.fillRect(column_index*16, 496-(entry_index*16), 16, 16);
}
}
}
}
/* The heatmap is is really a 64x32 grid. Initialize the array which contains the grid data. */
function setup() {
console_columns = [];
for ( var column_index = 0; column_index < 64; column_index++ ) {
var column = [];
for ( var entry_index = 0; entry_index < 32; entry_index++ ) {
column[entry_index] = 0;
}
console_columns.push(column);
}
}
Run It!
Run Heat Tacer with the following. Note, sudo is required by dtrace as it does kernal magic.
sudo node heat_tracer.js
If all goes well you should see something a moving version of something like the image below.
Contribute
You can find the latest version of Heat Tracer here. It is my hope that this article will provide the ground work for a much more abitious performance analytics project. If you're interested in contributing please let me know.
Further Research
More information about Bryan and Brendan's demo can be found here.
Socket.IO can be found here.