Add splitText

This commit is contained in:
Sidharth Vinod 2023-06-09 11:06:45 +05:30
parent ac488dd800
commit c41df420d7
No known key found for this signature in database
GPG Key ID: FB5CCD378D3907CD
3 changed files with 173 additions and 1 deletions

View File

@ -0,0 +1,37 @@
import { splitTextToChars, splitLineToFitWidthLoop, type CheckFitFunction } from './splitText.js';
import { describe, it, expect } from 'vitest';
describe('splitText', () => {
it.each([
{ str: '', split: [] },
{ str: '🏳️‍⚧️🏳️‍🌈👩🏾‍❤️‍👨🏻', split: ['🏳️‍⚧️', '🏳️‍🌈', '👩🏾‍❤️‍👨🏻'] },
{ str: 'ok', split: ['o', 'k'] },
])('should split $str into graphemes', ({ str, split }: { str: string; split: string[] }) => {
expect(splitTextToChars(str)).toEqual(split);
});
});
describe('split lines', () => {
it.each([
// empty string
{ str: '', width: 1, split: [''] },
// Width >= Individual words
{ str: 'hello world', width: 5, split: ['hello', 'world'] },
{ str: 'hello world', width: 7, split: ['hello', 'world'] },
// width > full line
{ str: 'hello world', width: 20, split: ['hello world'] },
// width < individual word
{ str: 'hello world', width: 3, split: ['hel', 'lo', 'wor', 'ld'] },
{ str: 'hello 12 world', width: 4, split: ['hell', 'o 12', 'worl', 'd'] },
{ str: '🏳️‍⚧️🏳️‍🌈👩🏾‍❤️‍👨🏻', width: 1, split: ['🏳️‍⚧️', '🏳️‍🌈', '👩🏾‍❤️‍👨🏻'] },
{ str: 'Flag 🏳️‍⚧️ this 🏳️‍🌈', width: 6, split: ['Flag 🏳️‍⚧️', 'this 🏳️‍🌈'] },
])(
'should split $str into lines of $width characters',
({ str, split, width }: { str: string; width: number; split: string[] }) => {
const checkFn: CheckFitFunction = (text: string) => {
return splitTextToChars(text).length <= width;
};
expect(splitLineToFitWidthLoop(str.split(' '), checkFn)).toEqual(split);
}
);
});

View File

@ -0,0 +1,135 @@
export type CheckFitFunction = (text: string) => boolean;
/**
* Splits a string into graphemes if available, otherwise characters.
*/
export function splitTextToChars(text: string): string[] {
if (Intl.Segmenter) {
return [...new Intl.Segmenter().segment(text)].map((s) => s.segment);
}
return [...text];
}
export function splitWordToFitWidth(checkFit: CheckFitFunction, word: string): string[] {
console.error('splitWordToFitWidth', word);
const characters = splitTextToChars(word);
if (characters.length === 0) {
return [];
}
const newWord = [];
let lastCheckedCharacter = '';
while (characters.length > 0) {
lastCheckedCharacter = characters.shift() ?? ' ';
if (checkFit([...newWord, lastCheckedCharacter].join(''))) {
newWord.push(lastCheckedCharacter);
} else if (newWord.length === 0) {
// Even the first character was too long, we cannot split it, so return it as is.
// This is an edge case that can happen when the first character is a long grapheme.
return [lastCheckedCharacter, characters.join('')];
} else {
// The last character was too long, so we need to put it back and return the rest.
characters.unshift(lastCheckedCharacter);
break;
}
}
if (characters.length === 0) {
return [newWord.join('')];
}
console.error({ newWord, characters });
return [newWord.join(''), ...splitWordToFitWidth(checkFit, characters.join(''))];
}
export function splitWordToFitWidth2(checkFit: CheckFitFunction, word: string): [string, string] {
console.error('splitWordToFitWidth2', word);
const characters = splitTextToChars(word);
if (characters.length === 0) {
return ['', ''];
}
const newWord = [];
let lastCheckedCharacter = '';
while (characters.length > 0) {
lastCheckedCharacter = characters.shift() ?? ' ';
if (checkFit([...newWord, lastCheckedCharacter].join(''))) {
newWord.push(lastCheckedCharacter);
} else if (newWord.length === 0) {
// Even the first character was too long, we cannot split it, so return it as is.
// This is an edge case that can happen when the first character is a long grapheme.
return [lastCheckedCharacter, characters.join('')];
} else {
// The last character was too long, so we need to put it back and return the rest.
characters.unshift(lastCheckedCharacter);
break;
}
}
console.error({ newWord, characters });
return [newWord.join(''), characters.join('')];
}
export function splitLineToFitWidth(
words: string[],
checkFit: CheckFitFunction,
lines: string[] = [],
popped: string[] = []
): string[] {
console.error('splitLineToFitWidth', { words, lines, popped });
// Return if there is nothing left to split
if (words.length === 0 && popped.length === 0) {
return lines;
}
const remainingText = words.join(' ');
if (checkFit(remainingText)) {
lines.push(remainingText);
words = [...popped];
}
if (words.length > 1) {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
popped.unshift(words.pop()!);
return splitLineToFitWidth(words, checkFit, lines, popped);
} else if (words.length === 1) {
const [word, rest] = splitWordToFitWidth(checkFit, words[0]);
lines.push(word);
console.error({ word, rest });
if (rest) {
return splitLineToFitWidth([rest], checkFit, lines, []);
}
}
return lines;
}
export function splitLineToFitWidthLoop(words: string[], checkFit: CheckFitFunction): string[] {
console.error('splitLineToFitWidthLoop', { words });
if (words.length === 0) {
return [];
}
const lines: string[] = [];
let newLine: string[] = [];
let lastCheckedWord = '';
while (words.length > 0) {
lastCheckedWord = words.shift() ?? ' ';
console.error({ lastCheckedWord, words });
if (checkFit([...newLine, lastCheckedWord].join(' '))) {
newLine.push(lastCheckedWord);
} else {
console.error({ newLine });
if (newLine.length === 0) {
const [word, rest] = splitWordToFitWidth2(checkFit, lastCheckedWord);
console.error({ word, rest });
lines.push(word);
if (rest) {
words.unshift(rest);
}
} else {
words.unshift(lastCheckedWord);
lines.push(newLine.join(' '));
newLine = [];
}
}
console.error({ newLine, lastCheckedWord, words, lines });
}
if (newLine.length > 0) {
lines.push(newLine.join(' '));
}
console.error({ newLine, lastCheckedWord, words, lines });
return lines;
}

View File

@ -14,7 +14,7 @@
"target": "ES6" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */,
"lib": [
"DOM",
"ES2021"
"ES2022"
] /* Specify a set of bundled library declaration files that describe the target runtime environment. */,
// "jsx": "preserve", /* Specify what JSX code is generated. */
// "experimentalDecorators": true, /* Enable experimental support for TC39 stage 2 draft decorators. */