失眠网 > Node.js Readable Stream的实现简析

Node.js Readable Stream的实现简析

时间：2020-09-18 19:17:17

作者：肖磊

个人主页：github

Readable Stream是对数据源的一种抽象。它提供了从数据源获取数据并缓存，以及将数据提供给数据消费者的能力。

接下来分别通过Readable Stream的2种模式来学习下可读流是如何获取数据以及将数据提供给消费者的。

Flowing模式

在flowing模式下，可读流自动从系统的底层读取数据，并通过EventEmitter接口的事件提供给消费者。如果不是开发者需要自己去实现可读流，大家可使用最为简单的readable.pipe()方法去消费数据。

接下来我们就通过一个简单的实例去具体分析下flowing模式下，可读流是如何工作的。

const { Readable } = require('stream')let c = 97 - 1// 实例化一个可读流const rs = new Readable({read () {if (c >= 'z'.charCodeAt(0)) return rs.push(null)setTimeout(() => {// 向可读流中推送数据rs.push(String.fromCharCode(++c))}, 100)}})// 将可读流的数据pipe到标准输出并打印出来rs.pipe(process.stdout)process.on('exit', () => {console.error('\n_read() called ' + (c - 97) + ' times')})复制代码

首先我们先来看下Readable构造函数的实现：

function Readable(options) {if (!(this instanceof Readable))return new Readable(options);// _readableState里面保存了关于可读流的不同阶段的状态值，下面会具体的分析this._readableState = new ReadableState(options, this);// legacythis.readable = true;if (options) {// 重写内部的_read方法，用以自定义从数据源获取数据if (typeof options.read === 'function')this._read = options.read;if (typeof options.destroy === 'function')// 重写内部的_destory方法this._destroy = options.destroy;}Stream.call(this);}复制代码

在我们创建可读流实例时，传入了一个read方法，用以自定义从数据源获取数据的方法，如果是开发者需要自己去实现可读流，那么这个方法一定需要去自定义，否则在程序的运行过程中会报错。ReadableState构造函数中定义了很多关于可读流的不同阶段的状态值：

在上面的例子中，当实例化一个可读流rs后，调用可读流实例的pipe方法。这正式开始了可读流在flowing模式下从数据源开始获取数据，以及process.stdout对数据的消费。

Readable.prototype.pipe = function (dest, pipeOpts) {var src = thisvar state = this._readableState...// 可读流实例监听data，可读流会从数据源获取数据，同时数据被传递到了消费者src.on('data', ondata)function ondata (chunk) {...var ret = dest.write(chunk)...}...}复制代码

Node提供的可读流有3种方式可以将初始态flowing = null的可读流转化为flowing = true：

监听data事件调用stream.resume()方法调用stream.pipe()方法

事实上这3种方式都回归到了一种方式上:strean.resume()，通过调用这个方法，将可读流的模式改变为flowing态。继续回到上面的例子当中，在调用了rs.pipe()方法后，实际上内部是调用了src.on('data', ondata)监听data事件，那么我们就来看下这个方法当中做了哪些工作。

Readable.prototype.on = function (ev, fn) {...// 监听data事件if (ev === 'data') {// 可读流一开始的flowing状态是null// Start flowing on next tick if stream isn't explicitly pausedif (this._readableState.flowing !== false)this.resume();} else if (ev === 'readable') {...}return res;}复制代码

可读流监听data事件，并调用resume方法：

Readable.prototype.resume = function() {var state = this._readableState;if (!state.flowing) {debug('resume');// 置为flowing状态state.flowing = true;resume(this, state);}return this;};function resume(stream, state) {if (!state.resumeScheduled) {state.resumeScheduled = true;process.nextTick(resume_, stream, state);}}function resume_(stream, state) {if (!state.reading) {debug('resume read 0');// 开始从数据源中获取数据stream.read(0);}state.resumeScheduled = false;// 如果是flowing状态的话，那么将awaitDrain置为0state.awaitDrain = 0;stream.emit('resume');flow(stream);if (state.flowing && !state.reading)stream.read(0);}复制代码

resume方法会判断这个可读流是否处于flowing模式下，同时在内部调用stream.read(0)开始从数据源中获取数据(其中stream.read()方法根据所接受到的参数会有不同的行为)：

TODO: 这个地方可说明stream.read(size)方法接收到的不同的参数

Readable.prototype.read = function (n) {...if (n === 0 &&state.needReadable &&(state.length >= state.highWaterMark || state.ended)) {debug('read: emitReadable', state.length, state.ended);// 如果缓存中没有数据且处于end状态if (state.length === 0 && state.ended)// 流状态结束endReadable(this);else// 触发readable事件emitReadable(this);return null;}...// 从缓存中可以读取的数据n = howMuchToRead(n, state);// 判断是否应该从数据源中获取数据// if we need a readable event, then we need to do some reading.var doRead = state.needReadable;debug('need readable', doRead);// if we currently have less than the highWaterMark, then also read some// 如果buffer的长度为0或者buffer的长度减去需要读取的数据的长度 < hwm 的时候，那么这个时候还需要继续读取数据// state.length - n 即表示当前buffer已有的数据长度减去需要读取的数据长度后，如果还小于hwm话，那么doRead仍然置为trueif (state.length === 0 || state.length - n < state.highWaterMark) {// 继续read数据doRead = true;debug('length less than watermark', doRead);}// however, if we've ended, then there's no point, and if we're already// reading, then it's unnecessary.// 如果数据已经读取完毕，或者处于正在读取的状态，那么doRead置为false表明不需要读取数据if (state.ended || state.reading) {doRead = false;debug('reading or ended', doRead);} else if (doRead) {debug('do read');state.reading = true;state.sync = true;// if the length is currently zero, then we *need* a readable event.// 如果当前缓冲区的长度为0，首先将needReadable置为true，那么再当缓冲区有数据的时候就触发readable事件if (state.length === 0)state.needReadable = true;// call internal read method// 从数据源获取数据，可能是同步也可能是异步的状态，这个取决于自定义_read方法的内部实现，可参见study里面的示例代码this._read(state.highWaterMark);state.sync = false;// If _read pushed data synchronously, then `reading` will be false,// and we need to re-evaluate how much data we can return to the user.// 如果_read方法是同步，那么reading字段将会为false。这个时候需要重新计算有多少数据需要重新返回给消费者if (!state.reading)n = howMuchToRead(nOrig, state);}// ret为输出给消费者的数据var ret;if (n > 0)ret = fromList(n, state);elseret = null;if (ret === null) {state.needReadable = true;n = 0;} else {state.length -= n;}if (state.length === 0) {// If we have nothing in the buffer, then we want to know// as soon as we *do* get something into the buffer.if (!state.ended)state.needReadable = true;// If we tried to read() past the EOF, then emit end on the next tick.if (nOrig !== n && state.ended)endReadable(this);}// 只要从数据源获取的数据不为null，即未EOF时，那么每次读取数据都会触发data事件if (ret !== null)this.emit('data', ret);return ret;}复制代码

这个时候可读流从数据源开始获取数据，调用this._read(state.highWaterMark)方法，对应着例子当中实现的read()方法：

const rs = new Readable({read () {if (c >= 'z'.charCodeAt(0)) return rs.push(null)setTimeout(() => {// 向可读流中推送数据rs.push(String.fromCharCode(++c))}, 100)}})复制代码

在read方法当中有一个非常中的方法需要开发者自己去调用，就是stream.push方法，这个方法即完成从数据源获取数据，并供消费者去调用。

Readable.prototype.push = function (chunk, encoding) {....// 对从数据源拿到的数据做处理return readableAddChunk(this, chunk, encoding, false, skipChunkCheck);}function readableAddChunk (stream, chunk, encoding, addToFront, skipChunkCheck) {... // 是否添加数据到头部if (addToFront) {// 如果不能在写入数据if (state.endEmitted)stream.emit('error',new errors.Error('ERR_STREAM_UNSHIFT_AFTER_END_EVENT'));elseaddChunk(stream, state, chunk, true);} else if (state.ended) { // 已经EOF，但是仍然还在推送数据，这个时候会报错stream.emit('error', new errors.Error('ERR_STREAM_PUSH_AFTER_EOF'));} else {// 完成一次读取后，立即将reading的状态置为falsestate.reading = false;if (state.decoder && !encoding) {chunk = state.decoder.write(chunk);if (state.objectMode || chunk.length !== 0)// 添加数据到尾部addChunk(stream, state, chunk, false);elsemaybeReadMore(stream, state);} else {// 添加数据到尾部addChunk(stream, state, chunk, false);}}...return needMoreData(state);}// 根据stream的状态来对数据做处理function addChunk(stream, state, chunk, addToFront) {// flowing为readable stream的状态，length为buffer的长度// flowing模式下且为异步读取数据的过程时，可读流的缓冲区并不保存数据，而是直接获取数据后触发data事件供消费者使用if (state.flowing && state.length === 0 && !state.sync) {// 对于flowing模式的Reabable，可读流自动从系统底层读取数据，直接触发data事件，且继续从数据源读取数据stream.read(0)stream.emit('data', chunk);// 继续从缓存池中获取数据stream.read(0);} else {// update the buffer info.// 数据的长度state.length += state.objectMode ? 1 : chunk.length;// 将数据添加到头部if (addToFront)state.buffer.unshift(chunk);else// 将数据添加到尾部state.buffer.push(chunk);// 触发readable事件，即通知缓存当中现在有数据可读if (state.needReadable)emitReadable(stream);}maybeReadMore(stream, state);}复制代码

在addChunk方法中完成对数据的处理，这里需要注意的就是，在flowing态下，数据被消耗的途径可能还不一样：

从数据源获取的数据可能进入可读流的缓冲区，然后被消费者使用;不进入可读流的缓冲区，直接被消费者使用。

这2种情况到底使用哪一种还要看开发者的是同步还是异步的去调用push方法，对应着state.sync的状态值。

当push方法被异步调用时，即state.sync为false：这个时候对于从数据源获取到的数据是直接通过触发data事件以供消费者来使用，而不用存放到缓冲区。然后调用stream.read(0)方法重复读取数据并供消费者使用。

当push方法是同步时，即state.sync为true：这个时候从数据源获取数据后，就不是直接通过触发data事件来供消费者直接使用，而是首先上数据缓冲到可读流的缓冲区。这个时候你看代码可能会疑惑，将数据缓存起来后，那么在flowing模式下，是如何流动起来的呢？事实上在一开始调用resume_方法时：

function resume_() {...// flow(stream);if (state.flowing && !state.reading)stream.read(0); // 继续从数据源获取数据}function flow(stream) {...// 如果处理flowing状态，那么调用stream.read()方法用以从stream的缓冲区中获取数据并供消费者来使用while (state.flowing && stream.read() !== null);}复制代码

在flow方法内部调用stream.read()方法取出可读流缓冲区的数据供消费者使用，同时继续调用stream.read(0)来继续从数据源获取数据。

以上就是在flowing模式下，可读流是如何完成从数据源获取数据并提供给消费者使用的大致流程。

paused模式

在pasued模式下，消费者如果要获取数据需要手动调用stream.read()方法去获取数据。

举个例子:

const { Readable } = require('stream')let c = 97 - 1const rs = new Readable({highWaterMark: 3,read () {if (c >= 'f'.charCodeAt(0)) return rs.push(null)setTimeout(() => {rs.push(String.fromCharCode(++c))}, 1000)}})rs.setEncoding('utf8')rs.on('readable', () => {// console.log(rs._readableState.length)console.log('get the data from readable: ', rs.read())})复制代码

通过监听readable事件，开始出发可读流从数据源获取数据。

Readable.prototype.on = function (env) {if (env === 'data') {...} else if (env === 'readable') {// 监听readable事件const state = this._readableState;if (!state.endEmitted && !state.readableListening) {state.readableListening = state.needReadable = true;state.emittedReadable = false;if (!state.reading) {process.nextTick(nReadingNextTick, this);} else if (state.length) {emitReadable(this);}}}}function nReadingNextTick(self) {debug('readable nexttick read 0');// 开始从数据源获取数据self.read(0);}复制代码

在nReadingNextTick当中调用self.read(0)方法后，后面的流程和上面分析的flowing模式的可读流从数据源获取数据的流程相似，最后都要调用addChunk方法，将数据获取到后推入可读流的缓冲区：

function addChunk(stream, state, chunk, addToFront) {if (state.flowing && state.length === 0 && !state.sync) {...} else {// update the buffer info.// 数据的长度state.length += state.objectMode ? 1 : chunk.length;// 将数据添加到头部if (addToFront)state.buffer.unshift(chunk);else// 将数据添加到尾部state.buffer.push(chunk);// 触发readable事件，即通知缓存当中现在有数据可读if (state.needReadable)emitReadable(stream);}maybeReadMore(stream, state);}复制代码

一旦有数据被加入到了缓冲区，且needReadable(这个字段表示是否需要触发readable事件用以通知消费者来消费数据)为true，这个时候会触发readable告诉消费者有新的数据被push进了可读流的缓冲区。此外还会调用maybeReadMore方法，异步的从数据源获取更多的数据：

每当可读流有新的数据被推进缓冲区，触发readable事件后，消费者通过调用stream.read()方法来从可读流中获取数据。

背压

当数据消费消费数据的速度慢于可写流提供给消费者的数据后会产生背压。

还是通过pipe管道来看：

Readable.prototype.pipe = function () {...// 监听drain事件var ondrain = pipeOnDrain(src);dest.on('drain', ondrain);...src.on('data', ondata)function ondata () {increasedAwaitDrain = false;// 向writable中写入数据var ret = dest.write(chunk);if (false === ret && !increasedAwaitDrain) {...src.pause();}}...}function pipeOnDrain(src) {return function() {var state = src._readableState;debug('pipeOnDrain', state.awaitDrain);// 减少pipes中awaitDrain的数量if (state.awaitDrain)state.awaitDrain--;// 如果awaitDrain的数量为0，且readable上绑定了data事件时(EE.listenerCount返回绑定的事件回调数量)if (state.awaitDrain === 0 && EE.listenerCount(src, 'data')) {// 重新开启flowing模式state.flowing = true;flow(src);}};}复制代码

当dest.write(chunk)返回false的时候，即代表可读流给可写流提供的数据过快，这个时候调用src.pause方法，暂停flowing状态，同步也暂停可写流从数据源获取数据以及向可写流输入数据。这个时候只有当可写流触发drain事件时，会调用ondrain来恢复flowing，同时可读流继续向可写流输入数据。关于可写流的背压可参见关于Writable_stream的源码分析。

以上就是通过可读流的2种模式分析了下可读流的内部工作机制。当然还有一些细节处大家有兴趣的话可以阅读相关的源码。

如果觉得《Node.js Readable Stream的实现简析》对你有帮助，请点赞、收藏，并留下你的观点哦！

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。