import { describe, it, expect } from 'vitest'; import { readStdinAsUtf8 } from './router-stdin-helper.mjs'; async function* fromBuffers(buffers) { for (const b of buffers) yield b; } describe('readStdinAsUtf8', () => { it('decodes UTF-8 cyrillic correctly across chunk boundaries', async () => { const text = 'посмотри сторожа достаточно ему информации?'; const buf = Buffer.from(text, 'utf-8'); // Split across multi-byte boundary (UTF-8 cyrillic = 2 bytes per char) const mid = 9; // mid-byte split for 'посмо|три...' const result = await readStdinAsUtf8(fromBuffers([buf.subarray(0, mid), buf.subarray(mid)])); expect(result).toBe(text); }); it('handles ASCII without modification', async () => { const text = 'hello world'; const result = await readStdinAsUtf8(fromBuffers([Buffer.from(text)])); expect(result).toBe(text); }); it('returns empty string on empty stream', async () => { const result = await readStdinAsUtf8(fromBuffers([])); expect(result).toBe(''); }); it('does NOT mangle byte-level concatenation (regression guard)', async () => { // The bug: `for await (const c of stdin) input += c` interprets Buffer // via Buffer.prototype.toString() = 'utf-8' by default in Node, BUT // concatenation across chunks at multi-byte boundary fails. // Our helper must use a StringDecoder to handle the boundary. const cyrillic = 'тест'; const buf = Buffer.from(cyrillic, 'utf-8'); // Split exactly in the middle of 'т' (2-byte char) const result = await readStdinAsUtf8(fromBuffers([buf.subarray(0, 1), buf.subarray(1)])); expect(result).toBe(cyrillic); }); });