<\/p><\/td>\n)*\s*<\/tr>/g, '');
}
__match_filter_tags(markdown) {
const startTagRegex = /<(include|exclude) target="(.+?)"/gmi
const endTagRegex = /<\/(include|exclude)>/gmi
const matches = [... markdown.matchAll(startTagRegex)]
var returns = []
matches.forEach(match => {
var tag = match[1].toLowerCase()
var target = match[2].trim()
var rest = markdown.slice(match.index)
var closeTagRegex = new RegExp(`${tag}>`, 'gmi')
var closeTagMatch = [... rest.matchAll(closeTagRegex)]
var startIndex = match.index
var endIndex = -1
for (let i = 0; i < closeTagMatch.length; i++) {
var t = markdown.slice(startIndex, startIndex+closeTagMatch[i].index+closeTagMatch[i][0].length)
var startCount = t.match(startTagRegex) ? t.match(startTagRegex).length : 0
var endCount = t.match(endTagRegex) ? t.match(endTagRegex).length : 0
if (startCount === endCount) {
endIndex = startIndex + closeTagMatch[i].index + closeTagMatch[i][0].length
break
}
}
if (endIndex === -1) console.warn(`No matching end tag for ${tag} tag at index ${match.index}`)
returns.push({
tag: tag,
target: target,
startIndex: startIndex,
endIndex: endIndex
})
})
return returns
}
__extract_description(markdown) {
const content = markdown.split('\n')
const title = content.filter(line => line.startsWith('# '))
var description = "(placeholder)"
if (title.length > 0) {
description = content[content.indexOf(title[0])+2] ? content[content.indexOf(title[0])+2].trim() : "(placeholder)"
}
return description
}
async __write_page({title, suffix, slug, beta, notebook, addedSince, lastModified, deprecateSince, path, type, token, sidebar_position, sidebar_label, keywords, doc_card_list}) {
let markdown = await this.__markdown()
markdown = this.__filter_content(markdown, this.targets)
markdown = markdown.replace(/(\s*\n){3,}/g, '\n\n').replace(/(
){2,}/, "
").replace(/
/g, '
');
markdown = markdown.replace(/^[\||\s][\s|\||
]*\|\n/gm, '')
markdown = markdown.replace(/\s*
\n(\s*( )*<\/td>\n)*\s*<\/tr>/g, '')
markdown = this.__example_http_urls(markdown)
markdown = await this.__mdx_patches(markdown)
const description = this.__extract_description(markdown)
let front_matter = this.__front_matters(title, suffix, slug, beta, notebook, type, token, sidebar_position, sidebar_label, keywords, this.displayedSidebar, description)
let tabs = markdown.split('\n').filter(line => {
return line.trim().startsWith(" 0)
// add sidebar_key attribute to doc frontmatter
front_matter = front_matter.split('\n')
front_matter.splice(3, 0, `sidebar_key: ${slug}`)
front_matter = front_matter.join('\n')
if (doc_card_list) {
markdown += "\n\nimport DocCardList from '@theme/DocCardList';\n\n"
}
var file_path = `${path}/${slug}.md`
if (this.targets.split('.').includes('zilliz')) {
markdown = markdown.replace(/http:\/\/localhost:19530/g, 'YOUR_CLUSTER_ENDPOINT')
markdown = markdown.replace(/127.0.0.1:19530/g, 'YOUR_CLUSTER_ENDPOINT')
markdown = markdown.replace(/localhost:19530/g, 'YOUR_CLUSTER_ENDPOINT')
markdown = markdown.replace(/root:Milvus/g, 'YOUR_CLUSTER_TOKEN')
}
if (this.targets.split('.').includes('milvus')) {
markdown = markdown.replace(/YOUR_CLUSTER_ENDPOINT/g, 'http://localhost:19530')
markdown = markdown.replace(/YOUR_CLUSTER_TOKEN/g, 'root:Milvus')
}
if (slug === 'home') {
let description = this.__extract_description(markdown)
// remove title
markdown = markdown.split('\n').filter(line => line !== `# ${title}`).join('\n');
// remove description
markdown = markdown.split('\n').filter(line => line !== description).join('\n');
// add imports
imports = [...imports.split('\n'), ...[
"\n\nimport Hero from '@site/src/components/Hero';",
"\n\nimport Bars from '@site/src/components/Bars';",
"\n\nimport Blocks from '@site/src/components/Blocks';",
"\n\nimport Cards from '@site/src/components/Cards';",
"\n\nimport Stories from '@site/src/components/Stories';",
"\n\nimport Banner from '@site/src/components/Banner';"
]].join('\n');
}
if (markdown.match(/\ 0) {
const children = block['children'].map(child => this.__retrieve_block_by_id(child))
const found = this.__find_api_compose_block(children)
if (found) return found
}
}
return null
}
async __write_api_page({title, slug, beta, path, type, token, sidebar_position, sidebar_label, keywords, apiComposeBlock}) {
let recordData
try {
recordData = JSON.parse(apiComposeBlock['record'] || '{}')
} catch (e) {
console.error(`Failed to parse ApiCompose record for ${slug}: ${e.message}`)
return
}
const specs = recordData
const method = (specs.method || 'get').toLowerCase()
const endpoint = specs.endpoint || ''
const description = (specs.summary || title || '').replace(/"/g, '\\"')
const frontMatter = `---
displayed_sidebar: restfulSidebar
sidebar_position: ${sidebar_position || 1}
slug: /restful/${slug}
beta: ${beta ? 'TRUE' : 'FALSE'}
title: "${title || specs.summary || 'API'} | RESTful"
description: "${description} | RESTful"
hide_table_of_contents: true
sidebar_label: "${sidebar_label || title || specs.summary || 'API'}"
sidebar_custom_props: { badges: ['${method}'] }
${keywords ? 'keywords: \n - ' + keywords.split(',').map(k => k.trim()).join('\n - ') + '\n' : ''}---`
const specsJson = JSON.stringify(specs, null, 2)
const mdxBody = `# ${title || specs.summary || 'API'}
import RestSpecs from '@site/src/components/RestSpecs';
export const specs = ${specsJson}
export const endpoint = "${endpoint}"
export const method = "${method}"`
const file_path = `${path}/${slug}.mdx`
fs.writeFileSync(file_path, frontMatter + '\n\n' + mdxBody)
console.log(`Generated API doc: ${file_path}`)
}
__front_matters (title, suffix, slug, beta, notebook, type, token, sidebar_position=undefined, sidebar_label="", keywords="", displayed_sidebar=this.displayedSidebar, description="") {
let hide_title = '';
let hide_toc = '';
if (keywords !== "") {
// keywords = keywords + ',' + this.keyword_picker().join(',')
keywords = "keywords: \n - " + keywords.split(',').map(item => item.trim()).join('\n - ') + '\n'
}
if (displayed_sidebar === 'default') {
displayed_sidebar = ''
} else if (displayed_sidebar === 'agentsSidebar' ) {
displayed_sidebar = `displayed_sidebar: ${displayed_sidebar}\n`
} else {
slug = `${displayed_sidebar.replace('Sidebar', '').trim()}/${slug}`
displayed_sidebar = `displayed_sidebar: ${displayed_sidebar}\n`
}
if (description) {
description = description.trim().replace('\n', '|').replace(/\[(.*)\]\(.*\)/g, '$1').replace(':', '').replace(/\*+|_+/g, '').replace(/\"/g, "\\\"")
description = description.replace(/<\/?[^>]+>/g, '').trim()
if (description.length === 0) {
description = title
}
}
if (slug === 'home') {
hide_title = "hide_title: true";
hide_toc = "hide_table_of_contents: true";
}
let front_matter = '---\n' +
`title: "${title} | ${suffix}"` + '\n' +
`slug: /${slug}` + '\n' +
`sidebar_label: "${sidebar_label !== "" ? sidebar_label : title}"` + '\n' +
`beta: ${beta ? beta : 'FALSE'}` + '\n' +
`notebook: ${notebook ? notebook : 'FALSE'}` + '\n' +
`description: "${description} | ${suffix}"` + '\n' +
`type: ${type}` + '\n' +
`token: ${token}` + '\n' +
`sidebar_position: ${sidebar_position}` + '\n' +
keywords +
displayed_sidebar + '\n' +
`${hide_title ? hide_title + '\n' : ''}` +
`${hide_toc ? hide_toc + '\n' : ''}` +
'---'
return front_matter
}
__imports (cond=null) {
let block_types = this.blocks.map(block => {
return this.block_types[block.block_type - 1]
}).join('')
if (block_types.match(/(code){2,}/g) || cond) {
return ["import Admonition from '@theme/Admonition';", "import Tabs from '@theme/Tabs';",
"import TabItem from '@theme/TabItem';"].join('\n')
}
return "import Admonition from '@theme/Admonition';" + "\n"
}
async __markdown(blocks=null, indent=0) {
const markdown = [];
const idt = " ".repeat(indent);
if (blocks === null) {
blocks = this.blocks;
markdown.push(await this.__page(this.page_blocks[0]['page']));
}
for (let idx = 0; idx < blocks.length; idx++) {
const block = blocks[idx];
if (!block) {
continue;
}
console.log(block['block_id'], this.block_types[block['block_type']-1], block['block_type']);
const prev_block = idx > 0 ? blocks[idx-1] : null;
const next_block = idx < blocks.length-1 ? blocks[idx+1] : null;
if (this.block_types[block['block_type']-1] === undefined) {
markdown.push('[Unsupported block type]');
} else if (this.block_types[block['block_type']-1] === 'text') {
let content = await this.__text(block['text']);
if (content.trim().indexOf('\n') > 0) {
content = content.split('\n').map(line => idt + line).join('\n');
} else {
content = idt + content;
}
markdown.push(content);
} else if (this.block_types[block['block_type']-1].includes('heading')) {
const level = parseInt(this.block_types[block['block_type']-1].slice(-1));
markdown.push(idt + await this.__heading(block[`heading${level}`], level));
} else if (this.block_types[block['block_type']-1] === 'bullet') {
markdown.push(await this.__bullet(block, indent));
} else if (this.block_types[block['block_type']-1] === 'ordered') {
markdown.push(await this.__ordered(block, indent));
} else if (this.block_types[block['block_type']-1] === 'code') {
markdown.push(await this.__code(block['code'], indent, prev_block, next_block, blocks));
} else if (this.block_types[block['block_type']-1] === 'quote_container') {
markdown.push(await this.__quote(block, indent));
} else if (this.block_types[block['block_type']-1] === 'image') {
markdown.push(idt + (await this.__image(block['image'])));
} else if (this.block_types[block['block_type']-1] === 'iframe') {
markdown.push(idt + (await this.__iframe(block)));
} else if (this.block_types[block['block_type']-1] === 'table') {
markdown.push(await this.__table(block['table'], indent));
} else if (this.block_types[block['block_type']-1] === 'sheet') {
markdown.push(await this.__sheet(block['sheet'], indent));
} else if (this.block_types[block['block_type']-1] === 'callout') {
markdown.push(await this.__callout(block, indent));
} else if (this.block_types[block['block_type']-1] === 'board') {
markdown.push(await this.__board(block['board'], indent));
} else if (this.block_types[block['block_type']-1] === 'grid') {
markdown.push(await this.__grid(block, indent));
} else if (this.block_types[block['block_type']-1] === 'add_ons') {
// supademo add-ons
if (block['add_ons']['component_type_id'] === 'blk_682093ba9580c002363b9dc3') {
markdown.push(await this.__supademo(block['add_ons'], indent));
}
} else if (this.block_types[block['block_type']-1] === 'source_synced') {
markdown.push(await this.__source_synced(block, indent));
} else if (block['block_type'] === 999 && block['children']) {
const children = block['children'].map(child => {
return this.__retrieve_block_by_id(child)
})
markdown.push(await this.__markdown(children, indent));
} else {
console.log(`Unprocessed: ${block['block_id']}`);
}
}
return markdown.join('\n\n')
.replace(/(\s*\n){3,}/g, '\n\n')
.replace(/ /g, ' ')
.replace(/( ){2,}/g, ' ')
.replace(" ", "")
.replace(/\n\s*\n(\s*<\/p><\/td>\n)*\s*<\/tr>/g, '');
}
__example_http_urls(content) {
// Find all fenced code blocks and mark their ranges
const codeBlockRegex = /```[\s\S]*?```/g;
let codeBlocks = [];
let match;
while ((match = codeBlockRegex.exec(content)) !== null) {
codeBlocks.push({ start: match.index, end: match.index + match[0].length });
}
// Helper to check if a position is inside any code block
function isInCodeBlock(pos) {
return codeBlocks.some(block => pos >= block.start && pos < block.end);
}
// Match URLs, including those containing <, >, [, ], {, }
const urlRegex = /https?:\/\/[^\s'")]+/g;
let result = '';
let lastIndex = 0;
// Find all URLs and process those outside code blocks
while ((match = urlRegex.exec(content)) !== null) {
const urlStart = match.index;
const urlEnd = urlStart + match[0].length;
// Append content before the URL
result += content.slice(lastIndex, urlStart);
if (!isInCodeBlock(urlStart)) {
// Keep example URLs intact and let MDX patching handle escaping/safety.
result += match[0];
} else {
// Inside code block, leave as is
result += match[0];
}
lastIndex = urlEnd;
}
// Append remaining content
result += content.slice(lastIndex);
return result;
}
__escape_currency_dollars(content) {
// Replace currency $ with $ outside fenced code blocks and
// inline code spans, to prevent remark-math/KaTeX from treating them as math
// delimiters (which causes unicodeTextInMathMode warnings and broken rendering).
const lines = content.split('\n');
let inCodeBlock = false;
const result = [];
for (let line of lines) {
const stripped = line.trim();
if (stripped.startsWith('```') || stripped.startsWith('~~~')) {
inCodeBlock = !inCodeBlock;
}
if (!inCodeBlock) {
// Split by inline code spans; odd-indexed segments are inside backticks
const parts = line.split(/(`+[^`]+`+)/);
line = parts.map((part, i) => {
if (i % 2 === 0) {
// Outside inline code — replace $ with HTML entity
return part.replace(/\$(?=\d)/g, '$');
}
return part; // Inside inline code — leave unchanged
}).join('');
}
result.push(line);
}
return result.join('\n');
}
__escape_non_html_tags(content) {
// Escape any lowercase tag whose name is not a known HTML element or a content-filter
// tag used by this writer, outside fenced code blocks and inline code spans.
// Such tags are URL/API placeholder patterns (e.g. , ,
// , ) that MDX would otherwise parse as JSX elements.
// Both opening and closing forms are escaped (e.g. → \).
// PascalCase JSX components (Tabs, TabItem, Admonition…) are never matched because
// the regex anchors on a leading lowercase letter.
// / filter tags are added so their orphaned closing forms are not
// accidentally escaped (they are removed by __filter_content before this runs anyway).
const KNOWN_TAGS = new Set([
// Standard HTML elements
'a', 'abbr', 'address', 'area', 'article', 'aside', 'audio',
'b', 'base', 'bdi', 'bdo', 'blockquote', 'br', 'button',
'canvas', 'caption', 'cite', 'code', 'col', 'colgroup',
'data', 'datalist', 'dd', 'del', 'details', 'dfn', 'dialog', 'div', 'dl', 'dt',
'em', 'embed',
'fieldset', 'figcaption', 'figure', 'footer', 'form',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html',
'i', 'iframe', 'img', 'input', 'ins',
'kbd',
'label', 'legend', 'li', 'link',
'main', 'map', 'mark', 'menu', 'meta', 'meter',
'nav', 'noscript',
'object', 'ol', 'optgroup', 'option', 'output',
'p', 'picture', 'pre', 'progress',
'q',
'rp', 'rt', 'ruby',
's', 'samp', 'script', 'section', 'select', 'slot', 'small', 'source', 'span',
'strong', 'style', 'sub', 'summary', 'sup',
'table', 'tbody', 'td', 'template', 'textarea', 'tfoot', 'th', 'thead',
'time', 'title', 'tr', 'track',
'u', 'ul',
'var', 'video',
'wbr',
// Content-filter tags used by this writer (processed before MDX patching)
'include', 'exclude',
]);
// Structural pre-scan: build set of safe uppercase/PascalCase tag names.
// A tag is safe if it appears with a close tag, self-closing form, or attributes
// anywhere in the document. Combined with a KNOWN_JSX fallback whitelist as a
// safety net for legitimate components that may be orphaned in edge cases.
const safeUppercaseTags = new Set(KNOWN_JSX_TAGS);
const upperScanRegex = /[<]([A-Z][A-Za-z0-9]*)/g;
let upperMatch;
while ((upperMatch = upperScanRegex.exec(content)) !== null) {
const tn = upperMatch[1];
if (safeUppercaseTags.has(tn)) continue;
if (new RegExp(`<\\/${tn}>`).test(content) ||
new RegExp(`<${tn}\\s*\\/>`).test(content) ||
new RegExp(`<${tn}\\s+`).test(content)) {
safeUppercaseTags.add(tn);
}
}
const lines = content.split('\n');
let inCodeBlock = false;
const result = [];
for (let line of lines) {
const stripped = line.trim();
if (stripped.startsWith('```') || stripped.startsWith('~~~')) {
inCodeBlock = !inCodeBlock;
}
if (!inCodeBlock) {
// Split by inline code spans; odd-indexed segments are inside backticks
const parts = line.split(/(`+[^`]+`+)/);
line = parts.map((part, i) => {
if (i % 2 === 0) {
// Escape non-HTML lowercase placeholder tags (e.g. , ).
// Tags with attributes won't match because the regex only allows \s*\/?>
part = part.replace(/(?/g, (match, tagName) => {
return KNOWN_TAGS.has(tagName) ? match : '\\' + match;
});
// Escape uppercase/PascalCase tags not identified as real JSX components.
// Uses HTML entities so the angle brackets render correctly in the output.
part = part.replace(/(?/g, (match, tagName) => {
if (safeUppercaseTags.has(tagName)) return match;
return match.replace(//g, '>');
});
// Escape dotted-name PascalCase tags (e.g. ),
// which are Java/C# type references that MDX misparses as JSX member expressions.
// Backslash escaping does not suppress MDX JSX parsing for dotted names, so
// always convert to HTML entities, stripping any preceding backslash first.
part = part.replace(/\\?<\/?([A-Z][A-Za-z0-9]*(?:\.[A-Za-z][A-Za-z0-9]*)+)\s*\/?>/g, (match) => {
return match.replace(/^\\/, '').replace(//g, '>');
});
return part;
}
return part; // Inside inline code — leave unchanged
}).join('');
}
result.push(line);
}
return result.join('\n');
}
async __mdx_patches(content) {
try {
// Import MDX compiler dynamically as it's an ES module
const { compile } = await import('@mdx-js/mdx');
const remarkMath = (await import('remark-math')).default;
// Pre-process: fix translation/editor artefacts, then escape problem characters
let patchedContent = removeTabsHallucinations(content);
patchedContent = unescapeKnownJsxTags(patchedContent);
patchedContent = normalizeCodeTagContent(patchedContent);
patchedContent = this.__escape_currency_dollars(patchedContent);
patchedContent = this.__escape_non_html_tags(patchedContent);
let maxIterations = 50; // Prevent infinite loops
let iteration = 0;
const seenHashes = new Set();
while (iteration < maxIterations) {
// Cycle detection: stop if we've visited this exact content state before
let h = 5381;
for (let i = 0; i < patchedContent.length; i++) {
h = Math.imul(h, 33) ^ patchedContent.charCodeAt(i);
}
if (seenHashes.has(h)) {
console.warn('Cycle detected in MDX patch loop, stopping to prevent infinite iteration');
break;
}
seenHashes.add(h);
try {
// Try to compile the current content
await compile(patchedContent, { development: false, remarkPlugins: [remarkMath] });
console.log(`MDX compilation succeeded after ${iteration} fixes`);
return patchedContent; // If compilation succeeds, return the fixed content
} catch (error) {
console.log(`MDX compilation error detected (iteration ${iteration + 1}): ${error.message}`);
// console.log(error)
// Identify problematic characters based on the error
let madeChanges = false;
let line, column, offset;
switch (error.ruleId) {
case 'acorn':
line = error.place.line;
column = error.place.column;
offset = error.place.offset;
// console.log(patchedContent.split('\n')[line-1]);
if (offset !== undefined && offset > 0 && offset < patchedContent.length) {
for (let i = offset - 1; i >= 0; i--) {
if (patchedContent[i] === '{') {
patchedContent = patchedContent.slice(0, i) + '\\' + patchedContent.slice(i);
madeChanges = true;
break;
}
}
}
break;
case 'end-tag-mismatch': {
// Error: "Unexpected closing tag ``, expected corresponding closing tag for `` (line:col-line:col)"
// The position refers to the OPENING tag .
// Strategy: replace the wrong closing tag with the correct .
// Exception: if is a non-standard tag (contains _ or -) it is a URL/API
// placeholder, not a real element. Replacing the closing tag causes an
// oscillating loop; instead fall through to the fallback (escape opening tag).
const wrongClose = error.message.match(/Unexpected closing tag `<\/([^>]+)>`/)?.[1];
const expectedOpen = error.message.match(/closing tag for `<([A-Za-z][^>/ ]*)(?:\s[^>]*)?>?`/)?.[1];
const posMatch = error.message.match(/(\d+):(\d+)-(\d+):(\d+)/);
const isPlaceholder = expectedOpen && /[_-]/.test(expectedOpen);
if (!isPlaceholder && wrongClose && expectedOpen && wrongClose !== expectedOpen && posMatch) {
const openLine = parseInt(posMatch[1]) - 1; // 0-indexed
const wrongCloseTag = `${wrongClose}>`;
const correctCloseTag = `${expectedOpen}>`;
const lines = patchedContent.split('\n');
for (let i = openLine; i < lines.length; i++) {
const idx = lines[i].indexOf(wrongCloseTag);
if (idx !== -1) {
lines[i] = lines[i].slice(0, idx) + correctCloseTag + lines[i].slice(idx + wrongCloseTag.length);
madeChanges = true;
break;
}
}
if (madeChanges) {
patchedContent = lines.join('\n');
}
} else if (!wrongClose && expectedOpen && posMatch) {
// Variant: "Expected a closing tag for `` (line:col-line:col) before the end of `paragraph`"
// Skip known JSX components — escaping their opening tag causes a
// cascade: the orphaned is then deleted by unexpected-closing-slash,
// destroying the component structure. The real fix is inside the component
// (e.g. unescaped braces) which the acorn handler will address.
if (KNOWN_JSX_TAGS.has(expectedOpen)) {
break;
}
// The opening tag is not closed within its paragraph. Escape it with < so it
// renders as literal text instead of being treated as a JSX element.
const openLine = parseInt(posMatch[1]) - 1; // 0-indexed
const openCol = parseInt(posMatch[2]) - 1; // 0-indexed
const lines = patchedContent.split('\n');
if (openLine < lines.length && lines[openLine][openCol] === '<') {
lines[openLine] = lines[openLine].slice(0, openCol) + '<' + lines[openLine].slice(openCol + 1);
patchedContent = lines.join('\n');
madeChanges = true;
}
}
break;
}
case 'unexpected-closing-slash': {
// "Unexpected closing slash `/` in tag, expected an open tag first"
// The error offset points to the `/` inside the orphaned closing tag.
// Strategy: walk back to find `<`, forward to find `>`, then remove the entire tag.
const slashOffset = error.place?.offset;
if (slashOffset !== undefined) {
let tagStart = slashOffset - 1;
while (tagStart > 0 && patchedContent[tagStart] !== '<') tagStart--;
let tagEnd = slashOffset;
while (tagEnd < patchedContent.length && patchedContent[tagEnd] !== '>') tagEnd++;
if (patchedContent[tagStart] === '<' && tagEnd < patchedContent.length) {
const before = patchedContent.slice(0, tagStart);
let after = patchedContent.slice(tagEnd + 1);
if (after.startsWith('\n')) after = after.slice(1);
patchedContent = before + after;
madeChanges = true;
}
}
if (!madeChanges) {
// Fallback: remove erroneous closing tags via regex
const originalContent = patchedContent;
patchedContent = patchedContent.replace(/<\/(?:content|[\w\d]+)>\s*$/, '');
if (originalContent !== patchedContent) {
madeChanges = true;
} else {
patchedContent = patchedContent.replace(/<[/](\w+)>/g, (match, tagName) => {
const openingTagCount = (patchedContent.match(new RegExp(`<${tagName}(?:\\s|>|/>)`, 'g')) || []).length;
const closingTagCount = (patchedContent.match(new RegExp(`<\\/${tagName}>`, 'g')) || []).length;
if (closingTagCount > openingTagCount) {
return '';
}
return match;
});
if (originalContent !== patchedContent) {
madeChanges = true;
}
}
}
break;
}
case 'unexpected-character':
offset = error.place?.offset;
if (
(error.message.includes('U+003D') || /U\+003[0-9]/.test(error.message)) &&
offset !== undefined && offset > 0
) {
// `=` sign or a digit (0–9) unexpected — typically from `<=` or `<10` where
// `<` was parsed as a JSX tag opener but the following char is not a valid name start.
// Replace `<` with `<` (not `\`) so the entity renders correctly in HTML.
for (let i = offset - 1; i >= Math.max(0, offset - 10); i--) {
if (patchedContent[i] === '<') {
patchedContent = patchedContent.slice(0, i) + '<' + patchedContent.slice(i + 1);
madeChanges = true;
break;
}
}
} else if (
(error.message.includes('U+007C') || error.message.includes('U+0026')) &&
offset !== undefined && offset > 0
) {
// `|` (union types like ``) or `&` (HTML entities like `<`
// inside angle brackets like ``) unexpected in JSX tag.
// Walk backward to find `<` and replace with `<`.
for (let i = offset - 1; i >= Math.max(0, offset - 30); i--) {
if (patchedContent[i] === '<') {
patchedContent = patchedContent.slice(0, i) + '<' + patchedContent.slice(i + 1);
madeChanges = true;
break;
}
}
} else if (
(error.message.includes('U+002C') || error.message.includes('U+002A') || error.message.includes('U+3001')) &&
offset !== undefined && offset > 0 && offset < patchedContent.length
) {
// Comma, asterisk, or ideographic comma — escape the nearest preceding `<` with backslash
for (let i = offset - 1; i >= 0; i--) {
if (patchedContent[i] === '<') {
patchedContent = patchedContent.slice(0, i) + '\\' + patchedContent.slice(i);
madeChanges = true;
break;
}
}
}
break;
default:
madeChanges = false;
break;
}
if (!madeChanges) {
console.warn('No changes made to content, breaking loop to prevent infinite iteration');
break;
}
}
iteration++;
}
if (iteration >= maxIterations) {
console.warn(`Maximum MDX patch iterations (${maxIterations}) reached, returning last attempt`);
}
return patchedContent;
} catch (error) {
console.error('Failed to import MDX compiler:', error.message);
return content; // Return original content if compiler import fails
}
}
async __page(page) {
return '# ' + await this.__text_elements(page['elements']);
}
async __text(text) {
return await this.__text_elements(text['elements']);
}
async __heading(heading, level) {
let content = await this.__text_elements(heading['elements'])
content = this.__clean_headings(content)
if (content.length > 0) {
if (content.indexOf('{#') < 0) {
let slug = slugify(content.split('|')[0].trim(), {lower: true, strict: true})
return '#'.repeat(level) + ' ' + content + '{#'+slug+'}';
} else {
return '#'.repeat(level) + ' ' + content;
}
} else {
return '';
}
}
__clean_headings(content) {
// filter content
content = this.__filter_content(content, this.targets)
// remove html tags
content = content.replace(/<\/?[^>]+(>|$)/g, "")
// remove trailing and leading spaces
content = content.trim()
return content
}
async __bullet(block, indent) {
let children = ''
if (block.children) {
children = block.children.map(child => {
return this.__retrieve_block_by_id(child)
})
children = await this.__markdown(children, indent+4)
}
let content = await this.__text_elements(block['bullet']['elements'])
return ' '.repeat(indent) + '- ' + content + '\n\n' + children;
}
async __ordered(block, indent) {
let children = ''
if (block.children) {
children = block.children.map(child => {
return this.__retrieve_block_by_id(child)
})
children = await this.__markdown(children, indent+4)
}
let content = await this.__text_elements(block['ordered']['elements'])
return ' '.repeat(indent) + '1. ' + content + '\n\n' + children;
}
/**
* Convert showdown HTML to MDX-safe content for use inside JSX components.
* - Replaces blocks with markdown fenced code blocks
* - Escapes { and } outside inline spans
*/
__showdownToMdxSafe(html) {
// Escape { and } outside ... and ... spans first
// (before converting to fences, so code content is still protected)
const parts = html.split(/(<(?:code|pre)(?:\s[^>]*)?>[\s\S]*?<\/(?:code|pre)>)/g);
html = parts.map((part, i) => {
if (i % 2 === 0) {
return part.replace(/\{/g, '\\{').replace(/\}/g, '\\}');
}
return part;
}).join('');
// Convert ... to fenced code blocks
html = html.replace(/([\s\S]*?)<\/code><\/pre>/g, (match, classAttr, code) => {
let lang = '';
if (classAttr) {
const langMatch = classAttr.match(/(?:^|\s)language-(\S+)/);
lang = langMatch ? langMatch[1] : (classAttr.split(/\s+/)[0] || '');
}
const decoded = code.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"');
return '\n```' + lang + '\n' + decoded.replace(/^\n|\n$/g, '') + '\n```\n';
});
return html;
}
async __callout(block, indent) {
let children = []
if (block.children) {
children = block.children.map(child => {
return this.__retrieve_block_by_id(child)
})
children = await this.__markdown(children, indent)
children = this.__filter_content(children, this.targets)
children = children.split('\n')
}
let emoji = block['callout']['emoji_id']
let type;
switch (emoji) {
case 'blue_book':
type = ``
break;
case 'construction':
type = ``
break;
default:
type = ``
break;
}
const converter = new showdown.Converter()
let html = converter.makeHtml(children.slice(1).map(line => line.replace(/^\s*/g, '')).join('\n'))
html = this.__showdownToMdxSafe(html);
const raw = ' '.repeat(indent) + type + '\n\n' + ' '.repeat(indent) + html.split('\n').join('\n' + ' '.repeat(indent)) + '\n\n' + ' '.repeat(indent) + '';
return raw.replace(/(\s*\n){3,}/g, `\n${' '.repeat(indent)}\n`);
}
async __code(code, indent, prev, next, blocks) {
const valid_langs = ['Python', 'JavaScript', 'Java', 'Go', 'C++', 'Bash', 'Shell']
let lang = code.style.language ? this.code_langs[code['style']['language']] : 'plaintext'
let elements = (await Promise.all(code['elements'].map( async x => {
let content = await this.__text_run(x, code['elements'], true)
content = content.replaceAll('$', '$')
return content
}))).join('')
if (lang === 'C++') return; // to be removed once c++ is supported
if (valid_langs.includes(lang)) {
const prev_type = prev ? this.block_types[prev['block_type']-1] : null;
const next_type = next ? this.block_types[next['block_type']-1] : null;
const prev_lang = prev && prev_type === 'code' && prev['code']['style']['language'] ? this.code_langs[prev['code']['style']['language']] : null;
const next_lang = next && next_type === 'code' && next['code']['style']['language'] ? this.code_langs[next['code']['style']['language']] : null;
// first block
if ((!prev || (prev && prev_type !== 'code') ||
(prev && prev_type === 'code' && (!valid_langs.includes(prev_lang) || prev_lang === lang))) &&
(next && next_type === 'code' && valid_langs.includes(next_lang) && next_lang !== lang)
) {
console.log('first block')
const values = this.__code_tabs(code, prev, next, blocks)
.filter(tab => tab.value !== 'c++'); // to be removed once c++ is supported
return this.__code_block_split(elements, indent, lang, 'first', values);
}
// last block
if (prev && prev_type === 'code' && valid_langs.includes(prev_lang) && prev_lang !== lang &&
(!next || (next && next_type !== 'code') ||
(next && next_type === 'code' && (!valid_langs.includes(next_lang) || next_lang === lang)))) {
console.log('last block')
return this.__code_block_split(elements, indent, lang, 'last');
}
// middle block
if (prev && prev_type === 'code' && valid_langs.includes(next_lang) && prev_lang !== lang && next && next_type === 'code' && valid_langs.includes(next_lang) && next_lang !== code['style']['language']) {
console.log('middle block')
return this.__code_block_split(elements, indent, lang, 'middle');
}
// only block
if (!prev || (prev && prev_type !== 'code') ||
(prev && (prev_type === 'code' || !valid_langs.includes(prev_lang) || prev_lang === lang)) ||
(next && (next_type === 'code' || !valid_langs.includes(next_lang) || next_lang === lang)) ||
!next || (next && next_type !== 'code')
) {
console.log('only block')
return this.__code_block_split(elements, indent, lang);
}
} else {
console.log('not valid lang')
return this.__code_block_split(elements, indent, lang);
}
}
__code_block_split(elements, indent, lang, position, values=null) {
elements = elements.split('\n').map(line => line.replaceAll('`', '\\`'));
var divider = elements.indexOf(elements.filter(x => x.match(/^[#\/]\/* ==*/))[0]);
var tab_item_start = `${' '.repeat(indent)}\n`;
var tab_item_end = `${' '.repeat(indent)}`
var tabs_end = `${' '.repeat(indent)}`
if (divider === -1) {
elements = `${' '.repeat(indent)}\`\`\`${lang.toLowerCase()}\n${' '.repeat(indent) + elements.join('\n' + ' '.repeat(indent))}\n${' '.repeat(indent)}\`\`\`\n`
switch (position) {
case 'first':
var tabs_start = `${' '.repeat(indent)}`;
return [tabs_start, tab_item_start, elements, tab_item_end].join('\n');
case 'last':
return [tab_item_start, elements, tab_item_end, tabs_end].join('\n');
case 'middle':
return [tab_item_start, elements, tab_item_end].join('\n');
default:
return elements;
}
} else {
var comment_mark = lang === 'Python' || lang === 'Bash' ? '# ' : '// '
var half_1 = elements.slice(0, divider)
var half_1_label = half_1[0].replace(comment_mark, '')
var half_2 = elements.slice(divider)
var half_2_label = half_2[1].replace(comment_mark, '')
lang = lang.toLowerCase()
var inner_values = []
inner_values.push({label: half_1_label, value: lang})
inner_values.push({label: half_2_label, value: `${lang}_1`})
var inner_tabs_start = `${' '.repeat(indent)}`;
var inner_tab_item_start_1 = `${' '.repeat(indent)}\n`;
var inner_tab_item_start_2 = `${' '.repeat(indent)}\n`;
var inner_tab_item_end = `${' '.repeat(indent)}`
var inner_tabs_end = `${' '.repeat(indent)}`
half_1 = `${' '.repeat(indent)}\`\`\`${lang.toLowerCase()}\n${' '.repeat(indent) + half_1.slice(1).join('\n' + ' '.repeat(indent))}\n${' '.repeat(indent)}\`\`\`\n`
half_2 = `${' '.repeat(indent)}\`\`\`${lang.toLowerCase()}\n${' '.repeat(indent) + half_2.slice(3).join('\n' + ' '.repeat(indent))}\n${' '.repeat(indent)}\`\`\`\n`
switch (position) {
case 'first':
var tabs_start = `${' '.repeat(indent)}`;
return [tabs_start, tab_item_start, inner_tabs_start, inner_tab_item_start_1, half_1, inner_tab_item_end, inner_tab_item_start_2, half_2, inner_tab_item_end, inner_tabs_end, tab_item_end ].join('\n');
case 'last':
return [tab_item_start, inner_tabs_start, inner_tab_item_start_1, half_1, inner_tab_item_end, inner_tab_item_start_2, half_2, inner_tab_item_end, inner_tabs_end, tab_item_end, tabs_end].join('\n');
case 'middle':
return [tab_item_start, inner_tabs_start, inner_tab_item_start_1, half_1, inner_tab_item_end, inner_tab_item_start_2, half_2, inner_tab_item_end, inner_tabs_end, tab_item_end].join('\n');
default:
return [inner_tabs_start, inner_tab_item_start_1, half_1, inner_tab_item_end, inner_tab_item_start_2, half_2, inner_tab_item_end, inner_tabs_end].join('\n');
}
}
}
__code_tabs(code, prev, next, blocks) {
let values = [];
let lang = code.style.language ? this.code_langs[code.style.language] : 'plaintext'
if ((!prev || (prev && this.block_types[prev['block_type']-1] !== 'code')) && next && this.block_types[next['block_type']-1] === 'code') {
values.push({ label: get_label(lang), value: lang.toLowerCase() });
has_next_code(next, this.block_types, this.code_langs);
function has_next_code(next, block_types, code_langs) {
const next_lang = code_langs[next['code']['style']['language']];
values.push({ label: get_label(next_lang), value: next_lang.toLowerCase() });
try {
next = blocks[blocks.indexOf(next) + 1];
if (next && block_types[next['block_type']-1] === 'code') {
has_next_code(next, block_types, code_langs);
}
} catch {
// do nothing
}
}
function get_label(lang) {
let label;
switch (lang) {
case 'JavaScript':
label = 'NodeJS'
break;
case 'Bash':
label = 'cURL'
break;
case 'Shell':
label = 'Zilliz CLI'
break;
default:
label = lang
break;
}
return label;
}
}
return values;
}
async __quote(block, indent) {
let quotes = block['children'].map( (child) => {
return this.__retrieve_block_by_id(child)
});
let res = (await this.__markdown(quotes, indent)).split('\n');
let type = 'info Notes';
let possible_titles = ['Notes', 'Note', '说明', 'ノート', 'Warning', 'Warn', '警告']
let title = possible_titles.find((x, i) => res[0].includes(x));
if (title && ['Warning', 'Warn', '警告'].indexOf(title) == -1) {
type = `info 📘 ${title}`;
} else {
type = `caution 🚧 ${title}`;
}
type = ``;
res.splice(1, 0, "");
const converter = new showdown.Converter()
let html = converter.makeHtml(res.slice(1).map(line => line.replace(/^\s*/g, '')).join('\n'))
html = this.__showdownToMdxSafe(html);
const raw = ' '.repeat(indent) + type + '\n\n' + ' '.repeat(indent) + html.split('\n').join('\n' + ' '.repeat(indent)) + '\n\n' + ' '.repeat(indent) + '';
return raw.replace(/(\s*\n){3,}/g, '\n\n');
}
async __image(image) {
const root = this.upload_to_s3 ? IMAGE_BED_URL : `/${this.imageDir.replace(/^static\//g, '')}`
const caption = image.caption?.content ? image.caption.content.trim() : image.token;
const slug = slugify(caption, {lower: true, strict: true})
if (this.skip_image_download) {
return ``;
}
try {
const { buffer } = await this.downloader.__downloadImage(image.token)
if (this.upload_to_s3) {
await this.downloader.__uploadToS3(buffer, `${slug}.png`);
} else {
fs.writeFileSync(`${this.downloader.target_path}/${slug}.png`, buffer);
}
} catch (error) {
console.error(`Image ${image.token} error [${error.constructor.name}]: ${error.message}`)
}
return ``;
}
async __board(board, indent) {
const root = this.upload_to_s3 ? IMAGE_BED_URL : `/${ this.imageDir.replace(/^static\//g, '')}`
if (this.skip_image_download) {
return ' '.repeat(indent) + ``;
}
try {
const result = await this.downloader.__downloadBoardPreview(board.token)
if (!result.ok) {
console.error(`Board ${board.token} download failed: HTTP ${result.status} ${result.statusText}`)
} else {
const buffer = await result.buffer()
console.log(`Board ${board.token} buffer size: ${buffer.length} bytes`)
const trimmedBuffer = await this.__trim_white_borders(buffer);
if (this.upload_to_s3) {
await this.downloader.__uploadToS3(trimmedBuffer, `${board["token"]}.png`);
} else {
fs.writeFileSync(`${this.downloader.target_path}/${board["token"]}.png`, trimmedBuffer);
}
}
} catch (error) {
console.error(`Board ${board.token} error [${error.constructor.name}]: ${error.message}`)
}
return ' '.repeat(indent) + ``;
}
async __trim_white_borders(image) {
const sharp = require('sharp');
const timeout = new Promise((_, reject) =>
setTimeout(() => reject(new Error('sharp toBuffer() timed out after 30s')), 30000)
);
try {
console.log(`sharp trim: input ${image.length} bytes, magic ${image.slice(0,4).toString('hex')}`)
const trimmedImage = await sharp(image)
.trim({
background: { r: 255, g: 255, b: 255 },
threshold: 10
}).png()
console.log(`sharp trim: pipeline built, calling toBuffer()`)
const borderedImage = trimmedImage.extend({
top: 20,
bottom: 20,
left: 20,
right: 20,
background: { r: 255, g: 255, b: 255 }
});
const buffer = await Promise.race([borderedImage.toBuffer(), timeout]);
console.log(`sharp trim: output ${buffer.length} bytes`)
return buffer;
} catch (error) {
throw new Error(`Failed to trim image borders: ${error.message}`);
}
}
async __iframe(block) {
const root = this.upload_to_s3 ? IMAGE_BED_URL : `/${ this.imageDir.replace(/^static\//g, '')}`
const block_id = block['block_id'];
const iframe = block['iframe'];
const existing_iframe = this.iframes.find(x => x.block_id === block_id)
if (existing_iframe) {
return ``;
}
if (iframe['component']['iframe_type'] !== 8) {
return '';
} else if (this.skip_image_download) {
const url = new URL(decodeURIComponent(iframe.component.url))
const key = url.pathname.split('/')[2]
const node = url.searchParams.get('node-id').split('-').join(":")
const caption = (await this.downloader.__fetchCaption(key, node)).nodes[node].document.name;
this.iframes.push({
block_id,
caption
})
return ``;
} else {
try {
const url = new URL(decodeURIComponent(iframe.component.url))
const key = url.pathname.split('/')[2]
const node = url.searchParams.get('node-id').split('-').join(":")
const caption = (await this.downloader.__fetchCaption(key, node)).nodes[node].document.name;
const result = await this.downloader.__downloadIframe(key, node);
const buffer = await result.buffer();
if (this.upload_to_s3) {
await this.downloader.__uploadToS3(buffer, `${caption}.png`);
} else if (!fs.existsSync(`${this.downloader.target_path}/${caption}.png`)) {
result.body.pipe(fs.createWriteStream(`${this.downloader.target_path}/${caption}.png`));
this.iframes.push({
block_id,
caption
})
}
return ``;
} catch (error) {
console.log(error)
console.log("-------------- A retry is needed -----------------");
console.log("Sleeping for a minute")
await new Promise(resolve => setTimeout(resolve, 60000));
this.__iframe(block)
}
}
}
async __table(table, indent) {
const converter = new showdown.Converter({ underline: true })
const cells = table['cells'];
const cell_blocks = cells.map(cell => {
return this.__retrieve_block_by_id(cell).children
});
const cell_texts = await Promise.all(cell_blocks.map(async (cell) => {
let blocks = cell.map(block => this.__retrieve_block_by_id(block));
return (await this.__markdown(blocks, 1)).replace(/\n/g, ' ');
}));
const row_size = table['property']['row_size'];
const column_size = table['property']['column_size'];
var merge_info = table['property']['merge_info'];
merge_info = merge_info.map((merge, idx) => {
if (merge) {
for (var i = 1; i < merge.col_span; i++) {
merge_info[idx+i] = null;
}
for (var j = 1; j < merge.row_span; j++) {
merge_info[idx+j*column_size] = null;
}
}
return merge
})
var html = ' '.repeat(indent) + '\n';
for (var i = 0; i < row_size; i++) {
html += ' '.repeat(indent) +' \n';
for (var j = 0; j < column_size; j++) {
const cell_idx = i * column_size + j;
const merge = merge_info[cell_idx];
if (merge) {
const colspan = merge.col_span > 1 ? ` colspan="${merge.col_span}"` : "";
const rowspan = merge.row_span > 1 ? ` rowspan="${merge.row_span}"` : "";
let cell_text = this.__filter_content(cell_texts[cell_idx], this.targets).trim()
.replace(/^\n/, '')
.replace(/ /g, '\n\n');
// Escape solitary tildes to prevent MDX strikethrough parsing
cell_text = cell_text.replace(/(? wrapping
var admonitions = [];
cell_text = cell_text.replace(
/]*>[\s\S]*?<\/Admonition>/g,
(match) => {
admonitions.push(match);
return `%%ADMONITION_${admonitions.length - 1}%%`;
}
);
admonitions = admonitions.map(admonition => admonition.replace(/\n/g, ''));
cell_text = converter.makeHtml(cell_text)
.replace(/\n/g, '')
.replace(/&/g, '&')
.replace(/\*/g, '*');
admonitions = admonitions.map(admonition => admonition.replace(/\n/g, ''));
cell_text = converter.makeHtml(cell_text)
.replace(/\n/g, '')
.replace(/&/g, '&')
.replace(/\*/g, '*');
// Restore Admonition components (strip wrapper showdown added)
cell_text = cell_text.replace(
/ %%ADMONITION_(\d+)%%<\/p>/g,
(_, idx) => admonitions[parseInt(idx)]
);
// escape { and } for MDX
cell_text = cell_text.replace(/\{/g, '\\{').replace(/\}/g, '\\}');
if (i === 0) {
html += ` ${' '.repeat(indent)} ${cell_text} | \n`;
} else {
html += ` ${' '.repeat(indent)} ${cell_text} | \n`;
}
}
}
html += ' '.repeat(indent) +' \n';
}
html += ' '.repeat(indent) + ' \n';
return html;
}
async __sheet(sheet, indent) {
const converter = new showdown.Converter({ underline: true })
const merges = sheet.meta?.data.sheet.merges;
const values = sheet.values.data.valueRange.values;
var result = ' '.repeat(indent) + "" + "\n";
values.forEach((row, ridx) => {
result += ' '.repeat(indent) + ' ' + "" + "\n";
row.forEach((cell, cidx) => {
var colspan = "";
var rowspan = "";
if (merges) {
const match = merges.filter(merge => merge.start_row_index === ridx && merge.start_column_index === cidx);
if (match.length > 0) {
colspan = `colspan="${match[0].end_column_index -match[0].start_column_index + 1}"`;
rowspan = `rowspan="${match[0].end_row_index -match[0].start_row_index + 1}"`;
}
}
if (typeof cell ==='string') {
cell = cell.replace(/\n/g, ' ')
}
if (typeof cell === 'object') {
cell = this.__sheet_cell(cell)
}
if (typeof cell === 'number') {
cell = cell.toString()
}
cell = cell.trim().replace(/ /g, '\n\n');
if (ridx === 0) {
result += `${' '.repeat(indent) + ' '.repeat(2)}${converter.makeHtml(cell).replace(/\n/g, '')} | \n`
} else {
result += `${' '.repeat(indent) + ' '.repeat(2)}${converter.makeHtml(cell).replace(/\n/g, '')} | \n`
}
})
result += ' '.repeat(indent) + ' ' + " " + "\n"
});
result += ' '.repeat(indent) + " " + "\n";
return result.replace('"{', '"\\{');
}
__sheet_cell(cell) {
if (cell instanceof Array) {
return cell.map(block => {
if (block['type'] === 'text') {
return block['text']
}
if (block['type'] === 'url') {
return `${block['text']}`
}
}).join('')
} else {
console.log(cell)
return ''
}
}
async __supademo(addons, indent) {
const record = JSON.parse(addons['record']);
return ' '.repeat(indent) + ``;
}
async __grid(block, indent) {
const grid_columns = block.children.map(child => this.__retrieve_block_by_id(child));
const column_size = block.grid.column_size;
const width_ratios = grid_columns.map(column => column.grid_column.width_ratio);
// Await all columns' children markdown
const columnsContent = await Promise.all(
grid_columns.map(async column => {
const children = column.children.map(child => this.__retrieve_block_by_id(child));
// Join all children's markdown for this column
let childMarkdowns = await this.__markdown(children, indent + 8);
childMarkdowns = childMarkdowns.replace(/({#[0-9a-z-]+})/g, "\\$1")
return `${' '.repeat(indent + 4)}\n\n${' '.repeat(indent + 8)}${childMarkdowns.trim()}\n\n${' '.repeat(indent + 4)} `;
})
);
return (
`${' '.repeat(indent)}\n\n` +
columnsContent.join('\n\n') +
`\n\n${' '.repeat(indent)}\n`
);
}
async __source_synced(block, indent) {
let children = block.children.map(child => this.__retrieve_block_by_id(child));
let content = await this.__markdown(children, indent);
return content;
}
__retrieve_block_by_id(block_id) {
if (!this.page_blocks) {
throw new Error('Page blocks not found');
}
// console.log(this.page_blocks)
return this.page_blocks.find(x => x['block_id'] === block_id);
}
async __equation(element, elements, asis=false) {
let content = element['equation']['content'];
let style = element['equation']['text_element_style'];
let prev = elements[elements.indexOf(element) - 1] || null;
let prev_element_type = prev? prev['equation'] ? 'equation' : 'text_run' : null;
let next = elements[elements.indexOf(element) + 1] || null;
let next_element_type = next? next['equation'] ? 'equation' : 'text_run' : null;
let rip_off_line_breaks = false;
// separate single equation
if (!prev && !next) {
return `$$\n${content.trim()}\n$$\n`;
}
// inline single equation
if ((!prev || prev_element_type === 'text_run') && (!next ||next_element_type === 'text_run')) {
return `$${content.trim()}$`;
}
return content;
}
async __text_run(element, elements, asis=false) {
let content = element['text_run']['content'];
let style = element['text_run']['text_element_style'];
if (!content.match(/^\s+$/) && !asis) {
element['text_run']['content'] = content.replace(/\$/g, '$') // escape $ for markdown
.replace(/\*/g, '*') // escape * for markdown
if (style['inline_code']) {
content = this.__style_markdown(element, elements, 'inline_code', '`');
content = content.replaceAll('$', '$')
content = content.replaceAll('*', '*')
}
if (style['bold']) {
content = this.__style_markdown(element, elements, 'bold', '**');
}
if (style['italic']) {
content = this.__style_markdown(element, elements, 'italic', '*');
}
if (style['strikethrough']) {
content = this.__style_markdown(element, elements, 'strikethrough', '~~');
}
if ('link' in style) {
const url = await this.__convert_link(decodeURIComponent(style['link']['url']))
var prefix = [...content.matchAll(/(^\*\*|^\*|^~~)/g)]
var suffix = [...content.matchAll(/(\*\*$|\*$|~~$)/g)]
if (prefix.length > 0) {
prefix = prefix[0][0]
} else {
prefix = ''
}
if (suffix.length > 0) {
suffix = suffix[0][0]
} else {
suffix = ''
}
content = `${prefix}[${content.replace(prefix, '').replace(suffix, '')}](${url})${suffix}`;
}
}
return content;
}
__style_markdown(element, elements, style_name, decorator) {
let element_type = element['equation'] ? 'equation' : 'text_run';
let content = element[element_type]['content'];
let style = element[element_type]['text_element_style'];
let prev = elements[elements.indexOf(element) - 1] || null;
let prev_element_type = prev? prev['equation'] ? 'equation' : 'text_run' : null;
let next = elements[elements.indexOf(element) + 1] || null;
let next_element_type = next? next['equation'] ? 'equation' : 'text_run' : null;
if (!content.match(/^\s+$/)) {
// single element
if ((!prev || prev_element_type === 'equation' || (prev && !prev[prev_element_type]['text_element_style'][style_name])) && style[style_name] && (!next || next_element_type === 'equation' || (next && !next[next_element_type]['text_element_style'][style_name]))) {
let prefix_spaces = content.match(/^\s*/)[0];
let suffix_spaces = content.match(/\s*$/)[0];
content = `${prefix_spaces}${decorator}${content.trim()}${decorator}${suffix_spaces}`;
}
// first element
if ((!prev || prev_element_type === 'equation' || (prev && !prev[prev_element_type]['text_element_style'][style_name])) && style[style_name] && next && next_element_type === 'text_run' && next[next_element_type]['text_element_style'][style_name]) {
let prefix_spaces = content.match(/^\s*/)[0];
content = `${prefix_spaces}${decorator}${content.trimStart()}`;
}
// last element
if (prev && prev_element_type === 'text_run' && prev[prev_element_type]['text_element_style'][style_name] && style[style_name] && (!next || next_element_type === 'equation' || (next && !next[next_element_type]['text_element_style'][style_name]))) {
let suffix_spaces = content.match(/\s*$/)[0];
content = `${content.trimEnd()}${decorator}${suffix_spaces}`;
}
// middle element
if (prev && prev_element_type === 'text_run' && prev[prev_element_type]['text_element_style'][style_name] && style[style_name] && next && next_element_type === 'text_run' && next[next_element_type]['text_element_style'][style_name]) {
content = `${content}`;
}
}
return content;
}
async __mention_doc(element) {
let title = element['mention_doc']['title'];
let url = await this.__convert_link(decodeURIComponent(element['mention_doc']['url']));
if (url) {
return `[${title}](${url})`;
} else {
console.log(`Cannot find ${title}`)
return title;
}
}
async __convert_link(url) {
if (url.includes('zilliverse')) {
url = new URL(url);
const token = url.pathname.split('/').pop();
const header = url.hash.slice(1);
const isWikiUrl = url.pathname.split('/')[1] === 'wiki';
var page;
if (isWikiUrl) {
try {
page = this.__fetch_doc_source('node_token', token);
} catch (error) {
page = null;
}
if (!page) {
try {
page = this.__fetch_doc_source('origin_node_token', token);
} catch (error) {
page = null;
}
}
} else {
try {
page = this.__fetch_doc_source(['token', 'obj_token'], token);
} catch (error) {
page = null;
}
}
if (page) {
const title = page['title'];
// const meta = await this.__is_to_publish(title);
const slug = page['slug'];
// let newUrl = this.target === 'saas' ? `./${slug}` : `./byoc/${slug}`;
let newUrl = `./${slug}`;
if (header) {
let headerBlock;
try {
headerBlock = page['blocks']['items'].filter(x => x['block_id'] === header)[0];
} catch (error) {
throw new Error(`Header block ${header} not found in page ${title}, and the page token is ${token}`);
}
if (headerBlock) {
const blockType = this.block_types[headerBlock['block_type'] - 1];
if (parseInt(blockType.slice(-1)) <= 9) {
var content = await this.__text_elements(headerBlock[blockType]['elements']);
content = this.__filter_content(content, this.targets)
content = this.__clean_headings(content)
const slug = content.includes('{#') ? content.split('{#')[1].replace(/}$/, '') : slugify(content, {strict: true, lower: true});
newUrl += `#${slug}`;
}
}
}
console.log(newUrl)
url = newUrl.replace(/\/\//g, "/");
} else {
url = null;
}
}
if (url?.startsWith('https://docs.zilliz.com/')) {
url = url.replace('https://docs.zilliz.com/', '/');
}
return url;
}
async __text_elements(elements) {
let paragraph = "";
for (let element of elements) {
if ('text_run' in element) {
paragraph += await this.__text_run(element, elements);
}
if ('mention_doc' in element) {
paragraph += await this.__mention_doc(element);
}
if ('equation' in element) {
paragraph += await this.__equation(element, elements);
}
}
if (this.docs) {
paragraph = await this.__auto_link(paragraph, this.docs)
}
return paragraph;
}
async __auto_link(paragraph, docs) {
console.log(docs)
for (let doc of docs) {
paragraph = paragraph.replace(doc, `[${doc.title}](${doc.slug})`)
}
return paragraph
}
async __fetch_sdk_versions (url) {
const sdks = {
python: 'https://github.com/milvus-io/pymilvus/releases',
node: 'https://github.com/milvus-io/milvus-sdk-node/releases',
java: 'https://github.com/milvus-io/milvus-sdk-java/releases',
go: 'https://github.com/milvus-io/milvus-sdk-go/releases',
}
for (let key in sdks) {
if (sdks[key]) {
const res = await fetch(sdks[key])
const $ = cheerio.load(await res.text())
const version = $('section > h2').first().text().match(/\d+\.\d+\.\d+/)[0]
const released = $('section').first().find('relative-time').attr('datetime')
sdks[key] = {
version: version,
released: released
}
}
}
return sdks
}
__block_types() {
return [
"page",
"text",
"heading1",
"heading2",
"heading3",
"heading4",
"heading5",
"heading6",
"heading7",
"heading8",
"heading9",
"bullet",
"ordered",
"code",
"quote",
null,
"todo",
"bitable",
"callout",
"chat_card",
"diagram",
"divider",
"file",
"grid",
"grid_column",
"iframe",
"image",
"isv",
"mindnote",
"sheet",
"table",
"table_cell",
"view",
"quote_container",
"task",
"okr",
"okr_objective",
"okr_key_result",
"okr_progress",
"add_ons",
"jira_issue",
"wiki_catelog",
"board",
"agenda",
"agenda_item",
"agenda_item_title",
"agenda_item_content",
"link_preview",
"source_synced",
"reference_synced",
"sub_page_list",
"ai_template"
]
}
__code_langs() {
return [
null,
"PlainText",
"ABAP",
"Ada",
"Apache",
"Apex",
"Assembly",
"Bash",
"CSharp",
"C++",
"C",
"COBOL",
"CSS",
"CoffeeScript",
"D",
"Dart",
"Delphi",
"Django",
"Dockerfile",
"Erlang",
"Fortran",
"FoxPro",
"Go",
"Groovy",
"HTML",
"HTMLBars",
"HTTP",
"Haskell",
"JSON",
"Java",
"JavaScript",
"Julia",
"Kotlin",
"LateX",
"Lisp",
"Logo",
"Lua",
"MATLAB",
"Makefile",
"Markdown",
"Nginx",
"Objective",
"OpenEdgeABL",
"PHP",
"Perl",
"PostScript",
"Power",
"Prolog",
"ProtoBuf",
"Python",
"R",
"RPG",
"Ruby",
"Rust",
"SAS",
"SCSS",
"SQL",
"Scala",
"Scheme",
"Scratch",
"Shell",
"Swift",
"Thrift",
"TypeScript",
"VBScript",
"Visual",
"XML",
"YAML",
"CMake",
"Diff",
"Gherkin",
"GraphQL",
"OpenGL Shading Language",
"Properties",
"Solidity",
"TOML",
]
}
keyword_picker() {
const keywords = fs.readFileSync(node_path.join('plugins', 'lark-docs', 'meta', 'keywords.txt'), 'utf8').trim().split('\n')
const seed = Math.floor(Math.random() * keywords.length)
return [keywords[seed], keywords[(seed+1)%keywords.length], keywords[(seed+2)%keywords.length], keywords[(seed+3)%keywords.length]]
}
}
module.exports = larkDocWriter
|