Handle proper formatting for markdown strings

This commit is contained in:
Sidharth Vinod 2023-07-06 20:34:17 +05:30
parent f5484636aa
commit 60a93f7377
No known key found for this signature in database
GPG Key ID: FB5CCD378D3907CD
6 changed files with 145 additions and 111 deletions

View File

@ -1,25 +1,17 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
// @ts-nocheck TODO: Fix types
import { log } from '../logger.js';
import { decodeEntities } from '../mermaidAPI.js';
import { markdownToHTML, markdownToLines } from '../rendering-util/handle-markdown-text.js';
import { splitLineToFitWidth } from './splitText.js';
/**
* @param dom
* @param styleFn
*/
import { MarkdownLine, MarkdownWord } from './types.js';
function applyStyle(dom, styleFn) {
if (styleFn) {
dom.attr('style', styleFn);
}
}
/**
* @param element
* @param {any} node
* @param width
* @param classes
* @param addBackground
* @returns {SVGForeignObjectElement} Node
*/
function addHtmlSpan(element, node, width, classes, addBackground = false) {
const fo = element.append('foreignObject');
// const newEl = document.createElementNS('http://www.w3.org/2000/svg', 'foreignObject');
@ -65,12 +57,12 @@ function addHtmlSpan(element, node, width, classes, addBackground = false) {
/**
* Creates a tspan element with the specified attributes for text positioning.
*
* @param {object} textElement - The parent text element to append the tspan element.
* @param {number} lineIndex - The index of the current line in the structuredText array.
* @param {number} lineHeight - The line height value for the text.
* @returns {object} The created tspan element.
* @param textElement - The parent text element to append the tspan element.
* @param lineIndex - The index of the current line in the structuredText array.
* @param lineHeight - The line height value for the text.
* @returns The created tspan element.
*/
function createTspan(textElement, lineIndex, lineHeight) {
function createTspan(textElement: any, lineIndex: number, lineHeight: number) {
return textElement
.append('tspan')
.attr('class', 'text-outer-tspan')
@ -79,55 +71,41 @@ function createTspan(textElement, lineIndex, lineHeight) {
.attr('dy', lineHeight + 'em');
}
/**
* Compute the width of rendered text
* @param {object} parentNode
* @param {number} lineHeight
* @param {string} text
* @returns {number}
*/
function computeWidthOfText(parentNode, lineHeight, text) {
function computeWidthOfText(parentNode: any, lineHeight: number, line: MarkdownLine): number {
const testElement = parentNode.append('text');
const testSpan = createTspan(testElement, 1, lineHeight);
updateTextContentAndStyles(testSpan, [{ content: text, type: 'normal' }]);
updateTextContentAndStyles(testSpan, line);
const textLength = testSpan.node().getComputedTextLength();
testElement.remove();
return textLength;
}
/**
* Creates a formatted text element by breaking lines and applying styles based on
* the given structuredText.
*
* @param {number} width - The maximum allowed width of the text.
* @param {object} g - The parent group element to append the formatted text.
* @param {Array} structuredText - The structured text data to format.
* @param addBackground
*/
function createFormattedText(width, g, structuredText, addBackground = false) {
function createFormattedText(
width: number,
g: any,
structuredText: MarkdownWord[][],
addBackground = false
) {
const lineHeight = 1.1;
const labelGroup = g.append('g');
let bkg = labelGroup.insert('rect').attr('class', 'background');
const bkg = labelGroup.insert('rect').attr('class', 'background');
const textElement = labelGroup.append('text').attr('y', '-10.1');
let lineIndex = 0;
structuredText.forEach((line) => {
for (const line of structuredText) {
/**
* Preprocess raw string content of line data
* Creating an array of strings pre-split to satisfy width limit
*/
let fullStr = line.map((data) => data.content).join(' ');
const checkWidth = (str) => computeWidthOfText(labelGroup, lineHeight, str) <= width;
const linesUnderWidth = checkWidth(fullStr)
? [fullStr]
: splitLineToFitWidth(fullStr, checkWidth);
const checkWidth = (line: MarkdownLine) =>
computeWidthOfText(labelGroup, lineHeight, line) <= width;
const linesUnderWidth = checkWidth(line) ? [line] : splitLineToFitWidth(line, checkWidth);
/** Add each prepared line as a tspan to the parent node */
const preparedLines = linesUnderWidth.map((w) => ({ content: w, type: line.type }));
for (const preparedLine of preparedLines) {
let tspan = createTspan(textElement, lineIndex, lineHeight);
updateTextContentAndStyles(tspan, [preparedLine]);
for (const preparedLine of linesUnderWidth) {
const tspan = createTspan(textElement, lineIndex, lineHeight);
updateTextContentAndStyles(tspan, preparedLine);
lineIndex++;
}
});
}
if (addBackground) {
const bbox = textElement.node().getBBox();
const padding = 2;
@ -143,44 +121,25 @@ function createFormattedText(width, g, structuredText, addBackground = false) {
}
}
/**
* Updates the text content and styles of the given tspan element based on the
* provided wrappedLine data.
*
* @param {object} tspan - The tspan element to update.
* @param {Array} wrappedLine - The line data to apply to the tspan element.
*/
function updateTextContentAndStyles(tspan, wrappedLine) {
function updateTextContentAndStyles(tspan: any, wrappedLine: MarkdownWord[]) {
tspan.text('');
wrappedLine.forEach((word, index) => {
const innerTspan = tspan
.append('tspan')
.attr('font-style', word.type === 'em' ? 'italic' : 'normal')
.attr('font-style', word.type === 'emphasis' ? 'italic' : 'normal')
.attr('class', 'text-inner-tspan')
.attr('font-weight', word.type === 'strong' ? 'bold' : 'normal');
const special = ['"', "'", '.', ',', ':', ';', '!', '?', '(', ')', '[', ']', '{', '}'];
// const special = ['"', "'", '.', ',', ':', ';', '!', '?', '(', ')', '[', ']', '{', '}'];
if (index === 0) {
innerTspan.text(word.content);
} else {
// TODO: check what joiner to use.
innerTspan.text(' ' + word.content);
}
});
}
/**
*
* @param el
* @param {*} text
* @param {*} param1
* @param root0
* @param root0.style
* @param root0.isTitle
* @param root0.classes
* @param root0.useHtmlLabels
* @param root0.isNode
* @returns
*/
// Note when using from flowcharts converting the API isNode means classes should be set accordingly. When using htmlLabels => to sett classes to'nodeLabel' when isNode=true otherwise 'edgeLabel'
// When not using htmlLabels => to set classes to 'title-row' when isTitle=true otherwise 'title-row'
export const createText = (
@ -210,7 +169,7 @@ export const createText = (
),
labelStyle: style.replace('fill:', 'color:'),
};
let vertexNode = addHtmlSpan(el, node, width, classes, addSvgBackground);
const vertexNode = addHtmlSpan(el, node, width, classes, addSvgBackground);
return vertexNode;
} else {
const structuredText = markdownToLines(text);

View File

@ -152,9 +152,8 @@ test('markdownToLines - Only italic formatting', () => {
});
it('markdownToLines - Mixed formatting', () => {
const input = `*Italic* and **bold** formatting`;
const expectedOutput = [
let input = `*Italic* and **bold** formatting`;
let expected = [
[
{ content: 'Italic', type: 'emphasis' },
{ content: 'and', type: 'normal' },
@ -162,9 +161,21 @@ it('markdownToLines - Mixed formatting', () => {
{ content: 'formatting', type: 'normal' },
],
];
expect(markdownToLines(input)).toEqual(expected);
const output = markdownToLines(input);
expect(output).toEqual(expectedOutput);
input = `*Italic with space* and **bold ws** formatting`;
expected = [
[
{ content: 'Italic', type: 'emphasis' },
{ content: 'with', type: 'emphasis' },
{ content: 'space', type: 'emphasis' },
{ content: 'and', type: 'normal' },
{ content: 'bold', type: 'strong' },
{ content: 'ws', type: 'strong' },
{ content: 'formatting', type: 'normal' },
],
];
expect(markdownToLines(input)).toEqual(expected);
});
it('markdownToLines - Mixed formatting', () => {

View File

@ -1,6 +1,7 @@
import type { Content } from 'mdast';
import { fromMarkdown } from 'mdast-util-from-markdown';
import { dedent } from 'ts-dedent';
import { MarkdownLine, MarkdownWordType } from './types.js';
/**
* @param markdown - markdown to process
@ -17,13 +18,13 @@ function preprocessMarkdown(markdown: string): string {
/**
* @param markdown - markdown to split into lines
*/
export function markdownToLines(markdown: string) {
export function markdownToLines(markdown: string): MarkdownLine[] {
const preprocessedMarkdown = preprocessMarkdown(markdown);
const { children } = fromMarkdown(preprocessedMarkdown);
const lines: { content: string; type: string }[][] = [[]];
const lines: MarkdownLine[] = [[]];
let currentLine = 0;
function processNode(node: Content, parentType = 'normal') {
function processNode(node: Content, parentType: MarkdownWordType = 'normal') {
if (node.type === 'text') {
const textLines = node.value.split('\n');
textLines.forEach((textLine, index) => {

View File

@ -1,5 +1,6 @@
import { splitTextToChars, splitLineToFitWidth, type CheckFitFunction } from './splitText.js';
import { splitTextToChars, splitLineToFitWidth, splitLineToWords } from './splitText.js';
import { describe, it, expect } from 'vitest';
import type { CheckFitFunction, MarkdownLine, MarkdownWordType } from './types.js';
describe('splitText', () => {
it.each([
@ -13,12 +14,35 @@ describe('splitText', () => {
});
describe('split lines', () => {
/**
* Creates a checkFunction for a given width
* @param width - width of characters to fit in a line
* @returns checkFunction
*/
const createCheckFn = (width: number): CheckFitFunction => {
return (text: string) => {
return splitTextToChars(text).length <= width;
return (text: MarkdownLine) => {
// Join all words into a single string
const joinedContent = text.map((w) => w.content).join('');
const characters = splitTextToChars(joinedContent);
return characters.length <= width;
};
};
it('should create valid checkFit function', () => {
const checkFit5 = createCheckFn(5);
expect(checkFit5([{ content: 'hello', type: 'normal' }])).toBe(true);
expect(
checkFit5([
{ content: 'hello', type: 'normal' },
{ content: 'world', type: 'normal' },
])
).toBe(false);
const checkFit1 = createCheckFn(1);
expect(checkFit1([{ content: 'A', type: 'normal' }])).toBe(true);
expect(checkFit1([{ content: '🏳️‍⚧️', type: 'normal' }])).toBe(true);
expect(checkFit1([{ content: '🏳️‍⚧️🏳️‍⚧️', type: 'normal' }])).toBe(false);
});
it.each([
// empty string
{ str: 'hello world', width: 7, split: ['hello', 'world'] },
@ -40,7 +64,10 @@ describe('split lines', () => {
'should split $str into lines of $width characters',
({ str, split, width }: { str: string; width: number; split: string[] }) => {
const checkFn = createCheckFn(width);
expect(splitLineToFitWidth(str, checkFn)).toEqual(split);
const line: MarkdownLine = getLineFromString(str);
expect(splitLineToFitWidth(line, checkFn)).toEqual(
split.map((str) => splitLineToWords(str).map((content) => ({ content, type: 'normal' })))
);
}
);
@ -48,8 +75,17 @@ describe('split lines', () => {
const checkFn: CheckFitFunction = createCheckFn(6);
const str = `Flag
🏳 this 🏳🌈`;
expect(() => splitLineToFitWidth(str, checkFn)).toThrowErrorMatchingInlineSnapshot(
expect(() =>
splitLineToFitWidth(getLineFromString(str), checkFn)
).toThrowErrorMatchingInlineSnapshot(
'"splitLineToFitWidth does not support newlines in the line"'
);
});
});
const getLineFromString = (str: string, type: MarkdownWordType = 'normal'): MarkdownLine => {
return splitLineToWords(str).map((content) => ({
content,
type,
}));
};

View File

@ -1,4 +1,4 @@
export type CheckFitFunction = (text: string) => boolean;
import type { CheckFitFunction, MarkdownLine, MarkdownWord, MarkdownWordType } from './types.js';
/**
* Splits a string into graphemes if available, otherwise characters.
@ -13,7 +13,7 @@ export function splitTextToChars(text: string): string[] {
/**
* Splits a string into words.
*/
function splitLineToWords(text: string): string[] {
export function splitLineToWords(text: string): string[] {
if (Intl.Segmenter) {
return [...new Intl.Segmenter(undefined, { granularity: 'word' }).segment(text)].map(
(s) => s.segment
@ -34,46 +34,61 @@ function splitLineToWords(text: string): string[] {
* @param word - Word to split
* @returns [first part of word that fits, rest of word]
*/
export function splitWordToFitWidth(checkFit: CheckFitFunction, word: string): [string, string] {
const characters = splitTextToChars(word);
export function splitWordToFitWidth(
checkFit: CheckFitFunction,
word: MarkdownWord
): [MarkdownWord, MarkdownWord] {
const characters = splitTextToChars(word.content);
if (characters.length === 0) {
return ['', ''];
return [
{ content: '', type: word.type },
{ content: '', type: word.type },
];
}
return splitWordToFitWidthRecursion(checkFit, [], characters);
return splitWordToFitWidthRecursion(checkFit, [], characters, word.type);
}
function splitWordToFitWidthRecursion(
checkFit: CheckFitFunction,
usedChars: string[],
remainingChars: string[]
): [string, string] {
remainingChars: string[],
type: MarkdownWordType
): [MarkdownWord, MarkdownWord] {
// eslint-disable-next-line no-console
console.error({ usedChars, remainingChars });
if (remainingChars.length === 0) {
return [usedChars.join(''), ''];
return [
{ content: usedChars.join(''), type },
{ content: '', type },
];
}
const [nextChar, ...rest] = remainingChars;
const newWord = [...usedChars, nextChar];
if (checkFit(newWord.join(''))) {
return splitWordToFitWidthRecursion(checkFit, newWord, rest);
if (checkFit([{ content: newWord.join(''), type }])) {
return splitWordToFitWidthRecursion(checkFit, newWord, rest, type);
}
return [usedChars.join(''), remainingChars.join('')];
return [
{ content: usedChars.join(''), type },
{ content: remainingChars.join(''), type },
];
}
export function splitLineToFitWidth(line: string, checkFit: CheckFitFunction): string[] {
if (line.includes('\n')) {
export function splitLineToFitWidth(
line: MarkdownLine,
checkFit: CheckFitFunction
): MarkdownLine[] {
if (line.some(({ content }) => content.includes('\n'))) {
throw new Error('splitLineToFitWidth does not support newlines in the line');
}
const words = splitLineToWords(line);
return splitLineToFitWidthRecursion(words, checkFit);
return splitLineToFitWidthRecursion(line, checkFit);
}
function splitLineToFitWidthRecursion(
words: string[],
words: MarkdownWord[],
checkFit: CheckFitFunction,
lines: string[] = [],
newLine = ''
): string[] {
lines: MarkdownLine[] = [],
newLine: MarkdownLine = []
): MarkdownLine[] {
// eslint-disable-next-line no-console
console.error({ words, lines, newLine });
// Return if there is nothing left to split
@ -82,17 +97,22 @@ function splitLineToFitWidthRecursion(
if (newLine.length > 0) {
lines.push(newLine);
}
return lines.length > 0 ? lines : [''];
return lines.length > 0 ? lines : [];
}
let joiner = '';
if (words[0] === ' ') {
if (words[0].content === ' ') {
joiner = ' ';
words.shift();
}
const nextWord = words.shift() ?? ' ';
const nextWord: MarkdownWord = words.shift() ?? { content: ' ', type: 'normal' };
// const nextWordWithJoiner: MarkdownWord = { ...nextWord, content: joiner + nextWord.content };
const lineWithNextWord: MarkdownLine = [...newLine];
if (joiner !== '') {
lineWithNextWord.push({ content: joiner, type: 'normal' });
}
lineWithNextWord.push(nextWord);
const nextWordWithJoiner = joiner + nextWord;
const lineWithNextWord = newLine ? `${newLine}${nextWordWithJoiner}` : nextWordWithJoiner;
if (checkFit(lineWithNextWord)) {
// nextWord fits, so we can add it to the new line and continue
return splitLineToFitWidthRecursion(words, checkFit, lines, lineWithNextWord);
@ -106,7 +126,7 @@ function splitLineToFitWidthRecursion(
} else {
// There was no text in newLine, so we need to split nextWord
const [line, rest] = splitWordToFitWidth(checkFit, nextWord);
lines.push(line);
lines.push([line]);
words.unshift(rest);
}
return splitLineToFitWidthRecursion(words, checkFit, lines);

View File

@ -0,0 +1,7 @@
export type MarkdownWordType = 'normal' | 'strong' | 'emphasis';
export interface MarkdownWord {
content: string;
type: MarkdownWordType;
}
export type MarkdownLine = MarkdownWord[];
export type CheckFitFunction = (text: MarkdownLine) => boolean;