/* * node-rdkafka - Node.js wrapper for RdKafka C/C++ library * * Copyright (c) 2016 Blizzard Entertainment * * This software may be modified and distributed under the terms * of the MIT license. See the LICENSE.txt file for details. */ 'use strict'; module.exports = KafkaConsumerStream; var Readable = require('stream').Readable; var util = require('util'); util.inherits(KafkaConsumerStream, Readable); /** * ReadableStream integrating with the Kafka Consumer. * * This class is used to read data off of Kafka in a streaming way. It is * useful if you'd like to have a way to pipe Kafka into other systems. You * should generally not make this class yourself, as it is not even exposed * as part of module.exports. Instead, you should KafkaConsumer.createReadStream. * * The stream implementation is slower than the continuous subscribe callback. * If you don't care so much about backpressure and would rather squeeze * out performance, use that method. Using the stream will ensure you read only * as fast as you write. * * The stream detects if Kafka is already connected. If it is, it will begin * reading. If it is not, it will connect and read when it is ready. * * This stream operates in objectMode. It streams {Consumer~Message} * * @param {Consumer} consumer - The Kafka Consumer object. * @param {object} options - Options to configure the stream. * @param {number} options.waitInterval - Number of ms to wait if Kafka reports * that it has timed out or that we are out of messages (right now). * @param {array} options.topics - Array of topics, or a function that parses * metadata into an array of topics * @constructor * @extends stream.Readable * @see Consumer~Message */ function KafkaConsumerStream(consumer, options) { if (!(this instanceof KafkaConsumerStream)) { return new KafkaConsumerStream(consumer, options); } if (options === undefined) { options = { waitInterval: 1000 }; } else if (typeof options === 'number') { options = { waitInterval: options }; } else if (options === null || typeof options !== 'object') { throw new TypeError('"options" argument must be a number or an object'); } var topics = options.topics; if (typeof topics === 'function') { // Just ignore the rest of the checks here } else if (!Array.isArray(topics)) { if (typeof topics !== 'string' && !(topics instanceof RegExp)) { throw new TypeError('"topics" argument must be a string, regex, or an array'); } else { topics = [topics]; } } options = Object.create(options); var fetchSize = options.fetchSize || 1; // Run in object mode by default. if (options.objectMode === null || options.objectMode === undefined) { options.objectMode = true; // If they did not explicitly set high water mark, and we are running // in object mode, set it to the fetch size + 2 to ensure there is room // for a standard fetch if (!options.highWaterMark) { options.highWaterMark = fetchSize + 2; } } if (options.objectMode !== true) { this._read = this._read_buffer; } else { this._read = this._read_message; } Readable.call(this, options); this.consumer = consumer; this.topics = topics; this.autoClose = options.autoClose === undefined ? true : !!options.autoClose; this.waitInterval = options.waitInterval === undefined ? 1000 : options.waitInterval; this.fetchSize = fetchSize; this.connectOptions = options.connectOptions || {}; this.streamAsBatch = options.streamAsBatch || false; // Hold the messages in here this.messages = []; var self = this; this.consumer .on('unsubscribed', function() { // Invalidate the stream when we unsubscribe self.push(null); }); // Call connect. Handles potentially being connected already this.connect(this.connectOptions); this.once('end', function() { if (this.autoClose) { this.destroy(); } }); } /** * Internal stream read method. This method reads message objects. * @param {number} size - This parameter is ignored for our cases. * @private */ KafkaConsumerStream.prototype._read_message = function(size) { if (this.messages.length > 0) { return this.push(this.messages.shift()); } if (!this.consumer) { // This consumer is set to `null` in the close function return; } if (!this.consumer.isConnected()) { this.consumer.once('ready', function() { // This is the way Node.js does it // https://github.com/nodejs/node/blob/master/lib/fs.js#L1733 this._read(size); }.bind(this)); return; } if (this.destroyed) { return; } var self = this; // If the size (number of messages) we are being advised to fetch is // greater than or equal to the fetch size, use the fetch size. // Only opt to use the size in case it is LESS than the fetch size. // Essentially, we want to use the smaller value here var fetchSize = size >= this.fetchSize ? this.fetchSize : size; this.consumer.consume(fetchSize, onread); // Retry function. Will wait up to the wait interval, with some // random noise if one is provided. Otherwise, will go immediately. function retry() { if (!self.waitInterval) { setImmediate(function() { self._read(size); }); } else { setTimeout(function() { self._read(size); }, self.waitInterval * Math.random()).unref(); } } function onread(err, messages) { // If there was an error we still want to emit it. // Essentially, if the user does not register an error // handler, it will still cause the stream to blow up. // // But... if one is provided, consumption will move on // as normal if (err) { self.emit('error', err); } // If there are no messages it means we reached EOF or a timeout. // Do what we used to do if (err || messages.length < 1) { // If we got an error or if there were no messages, initiate a retry retry(); return; } else { if (self.streamAsBatch) { self.push(messages); } else { for (var i = 0; i < messages.length; i++) { self.messages.push(messages[i]); } // Now that we have added them all the inner messages buffer, // we can just push the most recent one self.push(self.messages.shift()); } } } }; /** * Internal stream read method. This method reads message buffers. * @param {number} size - This parameter is ignored for our cases. * @private */ KafkaConsumerStream.prototype._read_buffer = function(size) { if (this.messages.length > 0) { return this.push(this.messages.shift()); } if (!this.consumer) { // This consumer is set to `null` in the close function return; } if (!this.consumer.isConnected()) { this.consumer.once('ready', function() { // This is the way Node.js does it // https://github.com/nodejs/node/blob/master/lib/fs.js#L1733 this._read(size); }.bind(this)); return; } if (this.destroyed) { return; } var self = this; // If the size (number of messages) we are being advised to fetch is // greater than or equal to the fetch size, use the fetch size. // Only opt to use the size in case it is LESS than the fetch size. // Essentially, we want to use the smaller value here var fetchSize = size >= this.fetchSize ? this.fetchSize : size; this.consumer.consume(fetchSize, onread); // Retry function. Will wait up to the wait interval, with some // random noise if one is provided. Otherwise, will go immediately. function retry() { if (!self.waitInterval) { setImmediate(function() { self._read(size); }); } else { setTimeout(function() { self._read(size); }, self.waitInterval * Math.random()).unref(); } } function onread(err, messages) { // If there was an error we still want to emit it. // Essentially, if the user does not register an error // handler, it will still cause the stream to blow up. // // But... if one is provided, consumption will move on // as normal if (err) { self.emit('error', err); } // If there are no messages it means we reached EOF or a timeout. // Do what we used to do if (err || messages.length < 1) { // If we got an error or if there were no messages, initiate a retry retry(); return; } else { if (self.streamAsBatch) { self.push(messages); } else { for (var i = 0; i < messages.length; i++) { self.messages.push(messages[i].value); } // Now that we have added them all the inner messages buffer, // we can just push the most recent one self.push(self.messages.shift()); } } } }; KafkaConsumerStream.prototype.connect = function(options) { var self = this; function connectCallback(err, metadata) { if (err) { self.emit('error', err); self.destroy(); return; } try { // Subscribe to the topics as well so we will be ready // If this throws the stream is invalid // This is the magic part. If topics is a function, before we subscribe, // pass the metadata in if (typeof self.topics === 'function') { var topics = self.topics(metadata); self.consumer.subscribe(topics); } else { self.consumer.subscribe(self.topics); } } catch (e) { self.emit('error', e); self.destroy(); return; } // start the flow of data self.read(); } if (!this.consumer.isConnected()) { self.consumer.connect(options, connectCallback); } else { // Immediately call the connect callback setImmediate(function() { connectCallback(null, self.consumer._metadata); }); } }; KafkaConsumerStream.prototype.destroy = function() { if (this.destroyed) { return; } this.destroyed = true; this.close(); }; KafkaConsumerStream.prototype.close = function(cb) { var self = this; if (cb) { this.once('close', cb); } if (!self.consumer._isConnecting && !self.consumer._isConnected) { // If we aren't even connected just exit. We are done. close(); return; } if (self.consumer._isConnecting) { self.consumer.once('ready', function() { // Don't pass the CB because it has already been passed. self.close(); }); return; } if (self.consumer._isConnected) { self.consumer.unsubscribe(); self.consumer.disconnect(function() { close(); }); } function close() { self.emit('close'); } };