chore: improve super markdown export when exporting nested nodes

2023-12-15 21:42:20 +05:30
parent 5dc1a401e8
commit 118421c5e0
4 changed files with 267 additions and 46 deletions
--- a/packages/web/src/javascripts/Components/SuperEditor/Lexical/Utils/MarkdownExport.ts
+++ b/packages/web/src/javascripts/Components/SuperEditor/Lexical/Utils/MarkdownExport.ts
@@ -0,0 +1,214 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ */
+
+/**
+ * Taken from https://github.com/facebook/lexical/blob/main/packages/lexical-markdown/src/MarkdownExport.ts
+ * but modified using changes from https://github.com/facebook/lexical/pull/4957 to make nested elements work
+ * better when exporting to markdown.
+ */
+
+import type { ElementTransformer, TextFormatTransformer, TextMatchTransformer, Transformer } from '@lexical/markdown'
+import {
+  ElementNode,
+  LexicalNode,
+  TextFormatType,
+  TextNode,
+  $getRoot,
+  $isDecoratorNode,
+  $isElementNode,
+  $isLineBreakNode,
+  $isTextNode,
+} from 'lexical'
+import { TRANSFORMERS, transformersByType } from './MarkdownImportExportUtils'
+
+export function createMarkdownExport(transformers: Array<Transformer>): (node?: ElementNode) => string {
+  const byType = transformersByType(transformers)
+
+  // Export only uses text formats that are responsible for single format
+  // e.g. it will filter out *** (bold, italic) and instead use separate ** and *
+  const textFormatTransformers = byType.textFormat.filter((transformer) => transformer.format.length === 1)
+
+  return (node) => {
+    const output = []
+    const children = (node || $getRoot()).getChildren()
+
+    for (const child of children) {
+      const result = exportTopLevelElements(child, byType.element, textFormatTransformers, byType.textMatch)
+
+      if (result != null) {
+        output.push(result)
+      }
+    }
+
+    return output.join('\n\n')
+  }
+}
+
+function exportTopLevelElements(
+  node: LexicalNode,
+  elementTransformers: Array<ElementTransformer>,
+  textTransformersIndex: Array<TextFormatTransformer>,
+  textMatchTransformers: Array<TextMatchTransformer>,
+): string | null {
+  for (const transformer of elementTransformers) {
+    const result = transformer.export(node, (_node) =>
+      exportChildren(_node, elementTransformers, textTransformersIndex, textMatchTransformers),
+    )
+
+    if (result != null) {
+      return result
+    }
+  }
+
+  if ($isElementNode(node)) {
+    return exportChildren(node, elementTransformers, textTransformersIndex, textMatchTransformers)
+  } else if ($isDecoratorNode(node)) {
+    return node.getTextContent()
+  } else {
+    return null
+  }
+}
+
+function exportChildren(
+  node: ElementNode,
+  elementTransformers: Array<ElementTransformer>,
+  textTransformersIndex: Array<TextFormatTransformer>,
+  textMatchTransformers: Array<TextMatchTransformer>,
+): string {
+  const output = []
+  const children = node.getChildren()
+
+  mainLoop: for (const child of children) {
+    if ($isElementNode(child)) {
+      for (const transformer of elementTransformers) {
+        const result = transformer.export(child, (_node) =>
+          exportChildren(_node, elementTransformers, textTransformersIndex, textMatchTransformers),
+        )
+
+        if (result != null) {
+          output.push(result)
+          if (children.indexOf(child) !== children.length - 1) {
+            output.push('\n')
+          }
+          continue mainLoop
+        }
+      }
+    }
+
+    for (const transformer of textMatchTransformers) {
+      const result = transformer.export(
+        child,
+        (parentNode) => exportChildren(parentNode, elementTransformers, textTransformersIndex, textMatchTransformers),
+        (textNode, textContent) => exportTextFormat(textNode, textContent, textTransformersIndex),
+      )
+
+      if (result != null) {
+        output.push(result)
+        continue mainLoop
+      }
+    }
+
+    if ($isLineBreakNode(child)) {
+      output.push('\n')
+    } else if ($isTextNode(child)) {
+      output.push(exportTextFormat(child, child.getTextContent(), textTransformersIndex))
+    } else if ($isElementNode(child)) {
+      output.push(exportChildren(child, elementTransformers, textTransformersIndex, textMatchTransformers), '\n')
+    } else if ($isDecoratorNode(child)) {
+      output.push(child.getTextContent())
+    }
+  }
+
+  return output.join('')
+}
+
+function exportTextFormat(node: TextNode, textContent: string, textTransformers: Array<TextFormatTransformer>): string {
+  // This function handles the case of a string looking like this: "   foo   "
+  // Where it would be invalid markdown to generate: "**   foo   **"
+  // We instead want to trim the whitespace out, apply formatting, and then
+  // bring the whitespace back. So our returned string looks like this: "   **foo**   "
+  const frozenString = textContent.trim()
+  let output = frozenString
+
+  const applied = new Set()
+
+  for (const transformer of textTransformers) {
+    const format = transformer.format[0]
+    const tag = transformer.tag
+
+    if (hasFormat(node, format) && !applied.has(format)) {
+      // Multiple tags might be used for the same format (*, _)
+      applied.add(format)
+      // Prevent adding opening tag is already opened by the previous sibling
+      const previousNode = getTextSibling(node, true)
+
+      if (!hasFormat(previousNode, format)) {
+        output = tag + output
+      }
+
+      // Prevent adding closing tag if next sibling will do it
+      const nextNode = getTextSibling(node, false)
+
+      if (!hasFormat(nextNode, format)) {
+        output += tag
+      }
+    }
+  }
+
+  // Replace trimmed version of textContent ensuring surrounding whitespace is not modified
+  return textContent.replace(frozenString, output)
+}
+
+// Get next or previous text sibling a text node, including cases
+// when it's a child of inline element (e.g. link)
+function getTextSibling(node: TextNode, backward: boolean): TextNode | null {
+  let sibling = backward ? node.getPreviousSibling() : node.getNextSibling()
+
+  if (!sibling) {
+    const parent = node.getParentOrThrow()
+
+    if (parent.isInline()) {
+      sibling = backward ? parent.getPreviousSibling() : parent.getNextSibling()
+    }
+  }
+
+  while (sibling) {
+    if ($isElementNode(sibling)) {
+      if (!sibling.isInline()) {
+        break
+      }
+
+      const descendant = backward ? sibling.getLastDescendant() : sibling.getFirstDescendant()
+
+      if ($isTextNode(descendant)) {
+        return descendant
+      } else {
+        sibling = backward ? sibling.getPreviousSibling() : sibling.getNextSibling()
+      }
+    }
+
+    if ($isTextNode(sibling)) {
+      return sibling
+    }
+
+    if (!$isElementNode(sibling)) {
+      return null
+    }
+  }
+
+  return null
+}
+
+function hasFormat(node: LexicalNode | null | undefined, format: TextFormatType): boolean {
+  return $isTextNode(node) && node.hasFormat(format)
+}
+
+export function $convertToMarkdownString(transformers: Array<Transformer> = TRANSFORMERS, node?: ElementNode): string {
+  const exportMarkdown = createMarkdownExport(transformers)
+  return exportMarkdown(node)
+}
--- a/packages/web/src/javascripts/Components/SuperEditor/Lexical/Utils/MarkdownImport.ts
+++ b/packages/web/src/javascripts/Components/SuperEditor/Lexical/Utils/MarkdownImport.ts
@@ -6,16 +6,13 @@
 *
 */

+/**
+ * Taken from https://github.com/facebook/lexical/blob/main/packages/lexical-markdown/src/MarkdownImport.ts
+ * but modified to allow keeping new lines when importing markdown.
+ */
+
 import { CodeNode, $createCodeNode } from '@lexical/code'
-import {
-  ElementTransformer,
-  TextFormatTransformer,
-  TextMatchTransformer,
-  Transformer,
-  ELEMENT_TRANSFORMERS,
-  TEXT_FORMAT_TRANSFORMERS,
-  TEXT_MATCH_TRANSFORMERS,
-} from '@lexical/markdown'
+import { ElementTransformer, TextFormatTransformer, TextMatchTransformer, Transformer } from '@lexical/markdown'

 import { $isListItemNode, $isListNode } from '@lexical/list'
 import { $isQuoteNode } from '@lexical/rich-text'
@@ -33,39 +30,10 @@ import {
  ElementNode,
 } from 'lexical'
 import { IS_APPLE_WEBKIT, IS_IOS, IS_SAFARI } from '../Shared/environment'
+import { TRANSFORMERS, transformersByType } from './MarkdownImportExportUtils'

 const PUNCTUATION_OR_SPACE = /[!-/:-@[-`{-~\s]/

-function indexBy<T>(list: Array<T>, callback: (arg0: T) => string): Readonly<Record<string, Array<T>>> {
-  const index: Record<string, Array<T>> = {}
-
-  for (const item of list) {
-    const key = callback(item)
-
-    if (index[key]) {
-      index[key].push(item)
-    } else {
-      index[key] = [item]
-    }
-  }
-
-  return index
-}
-
-function transformersByType(transformers: Array<Transformer>): Readonly<{
-  element: Array<ElementTransformer>
-  textFormat: Array<TextFormatTransformer>
-  textMatch: Array<TextMatchTransformer>
-}> {
-  const byType = indexBy(transformers, (t) => t.type)
-
-  return {
-    element: (byType.element || []) as Array<ElementTransformer>,
-    textFormat: (byType['text-format'] || []) as Array<TextFormatTransformer>,
-    textMatch: (byType['text-match'] || []) as Array<TextMatchTransformer>,
-  }
-}
-
 const MARKDOWN_EMPTY_LINE_REG_EXP = /^\s{0,3}$/
 const CODE_BLOCK_REG_EXP = /^```(\w{1,10})?\s?$/
 type TextFormatTransformersIndex = Readonly<{
@@ -392,12 +360,6 @@ function createTextFormatTransformersIndex(
  }
 }

-const TRANSFORMERS: Array<Transformer> = [
-  ...ELEMENT_TRANSFORMERS,
-  ...TEXT_FORMAT_TRANSFORMERS,
-  ...TEXT_MATCH_TRANSFORMERS,
-]
-
 export function $convertFromMarkdownString(
  markdown: string,
  transformers: Array<Transformer> = TRANSFORMERS,
--- a/packages/web/src/javascripts/Components/SuperEditor/Lexical/Utils/MarkdownImportExportUtils.ts
+++ b/packages/web/src/javascripts/Components/SuperEditor/Lexical/Utils/MarkdownImportExportUtils.ts
@@ -0,0 +1,45 @@
+import {
+  ElementTransformer,
+  TextFormatTransformer,
+  TextMatchTransformer,
+  Transformer,
+  ELEMENT_TRANSFORMERS,
+  TEXT_FORMAT_TRANSFORMERS,
+  TEXT_MATCH_TRANSFORMERS,
+} from '@lexical/markdown'
+
+function indexBy<T>(list: Array<T>, callback: (arg0: T) => string): Readonly<Record<string, Array<T>>> {
+  const index: Record<string, Array<T>> = {}
+
+  for (const item of list) {
+    const key = callback(item)
+
+    if (index[key]) {
+      index[key].push(item)
+    } else {
+      index[key] = [item]
+    }
+  }
+
+  return index
+}
+
+export function transformersByType(transformers: Array<Transformer>): Readonly<{
+  element: Array<ElementTransformer>
+  textFormat: Array<TextFormatTransformer>
+  textMatch: Array<TextMatchTransformer>
+}> {
+  const byType = indexBy(transformers, (t) => t.type)
+
+  return {
+    element: (byType.element || []) as Array<ElementTransformer>,
+    textFormat: (byType['text-format'] || []) as Array<TextFormatTransformer>,
+    textMatch: (byType['text-match'] || []) as Array<TextMatchTransformer>,
+  }
+}
+
+export const TRANSFORMERS: Array<Transformer> = [
+  ...ELEMENT_TRANSFORMERS,
+  ...TEXT_FORMAT_TRANSFORMERS,
+  ...TEXT_MATCH_TRANSFORMERS,
+]
--- a/packages/web/src/javascripts/Components/SuperEditor/Tools/HeadlessSuperConverter.tsx
+++ b/packages/web/src/javascripts/Components/SuperEditor/Tools/HeadlessSuperConverter.tsx
@@ -1,5 +1,4 @@
 import { createHeadlessEditor } from '@lexical/headless'
-import { $convertToMarkdownString } from '@lexical/markdown'
 import { FileItem, PrefKey, PrefValue, SuperConverterServiceInterface } from '@standardnotes/snjs'
 import {
  $createParagraphNode,
@@ -18,6 +17,7 @@ import { FileNode } from '../Plugins/EncryptedFilePlugin/Nodes/FileNode'
 import { $createFileExportNode } from '../Lexical/Nodes/FileExportNode'
 import { $createInlineFileNode } from '../Plugins/InlineFilePlugin/InlineFileNode'
 import { $convertFromMarkdownString } from '../Lexical/Utils/MarkdownImport'
+import { $convertToMarkdownString } from '../Lexical/Utils/MarkdownExport'
 export class HeadlessSuperConverter implements SuperConverterServiceInterface {
  private importEditor: LexicalEditor
  private exportEditor: LexicalEditor