utf8-decoder.js•2.53 kB
const b4a = require('b4a')
/**
 * https://encoding.spec.whatwg.org/#utf-8-decoder
 */
module.exports = class UTF8Decoder {
  constructor () {
    this.codePoint = 0
    this.bytesSeen = 0
    this.bytesNeeded = 0
    this.lowerBoundary = 0x80
    this.upperBoundary = 0xbf
  }
  get remaining () {
    return this.bytesSeen
  }
  decode (data) {
    // If we have a fast path, just sniff if the last part is a boundary
    if (this.bytesNeeded === 0) {
      let isBoundary = true
      for (let i = Math.max(0, data.byteLength - 4), n = data.byteLength; i < n && isBoundary; i++) {
        isBoundary = data[i] <= 0x7f
      }
      if (isBoundary) return b4a.toString(data, 'utf8')
    }
    let result = ''
    for (let i = 0, n = data.byteLength; i < n; i++) {
      const byte = data[i]
      if (this.bytesNeeded === 0) {
        if (byte <= 0x7f) {
          result += String.fromCharCode(byte)
        } else {
          this.bytesSeen = 1
          if (byte >= 0xc2 && byte <= 0xdf) {
            this.bytesNeeded = 2
            this.codePoint = byte & 0x1f
          } else if (byte >= 0xe0 && byte <= 0xef) {
            if (byte === 0xe0) this.lowerBoundary = 0xa0
            else if (byte === 0xed) this.upperBoundary = 0x9f
            this.bytesNeeded = 3
            this.codePoint = byte & 0xf
          } else if (byte >= 0xf0 && byte <= 0xf4) {
            if (byte === 0xf0) this.lowerBoundary = 0x90
            if (byte === 0xf4) this.upperBoundary = 0x8f
            this.bytesNeeded = 4
            this.codePoint = byte & 0x7
          } else {
            result += '\ufffd'
          }
        }
        continue
      }
      if (byte < this.lowerBoundary || byte > this.upperBoundary) {
        this.codePoint = 0
        this.bytesNeeded = 0
        this.bytesSeen = 0
        this.lowerBoundary = 0x80
        this.upperBoundary = 0xbf
        result += '\ufffd'
        continue
      }
      this.lowerBoundary = 0x80
      this.upperBoundary = 0xbf
      this.codePoint = (this.codePoint << 6) | (byte & 0x3f)
      this.bytesSeen++
      if (this.bytesSeen !== this.bytesNeeded) continue
      result += String.fromCodePoint(this.codePoint)
      this.codePoint = 0
      this.bytesNeeded = 0
      this.bytesSeen = 0
    }
    return result
  }
  flush () {
    const result = this.bytesNeeded > 0 ? '\ufffd' : ''
    this.codePoint = 0
    this.bytesNeeded = 0
    this.bytesSeen = 0
    this.lowerBoundary = 0x80
    this.upperBoundary = 0xbf
    return result
  }
}