chore: keep new lines when converting plaintext/markdown to Super

potential todo: open a PR to add this option to upstream
2023-11-21 18:44:09 +05:30
parent 6c835b9aab
commit f2ad03e16c
2 changed files with 411 additions and 2 deletions
--- a/packages/web/src/javascripts/Components/SuperEditor/Lexical/Utils/MarkdownImport.ts
+++ b/packages/web/src/javascripts/Components/SuperEditor/Lexical/Utils/MarkdownImport.ts
@@ -0,0 +1,409 @@
+/**
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ */
+
+import { CodeNode, $createCodeNode } from '@lexical/code'
+import {
+  ElementTransformer,
+  TextFormatTransformer,
+  TextMatchTransformer,
+  Transformer,
+  ELEMENT_TRANSFORMERS,
+  TEXT_FORMAT_TRANSFORMERS,
+  TEXT_MATCH_TRANSFORMERS,
+} from '@lexical/markdown'
+
+import { $isListItemNode, $isListNode } from '@lexical/list'
+import { $isQuoteNode } from '@lexical/rich-text'
+import { $findMatchingParent } from '@lexical/utils'
+import {
+  LexicalNode,
+  TextNode,
+  $createLineBreakNode,
+  $createParagraphNode,
+  $createTextNode,
+  $getRoot,
+  $getSelection,
+  $isParagraphNode,
+  $isTextNode,
+  ElementNode,
+} from 'lexical'
+import { IS_APPLE_WEBKIT, IS_IOS, IS_SAFARI } from '../Shared/environment'
+
+const PUNCTUATION_OR_SPACE = /[!-/:-@[-`{-~\s]/
+
+function indexBy<T>(list: Array<T>, callback: (arg0: T) => string): Readonly<Record<string, Array<T>>> {
+  const index: Record<string, Array<T>> = {}
+
+  for (const item of list) {
+    const key = callback(item)
+
+    if (index[key]) {
+      index[key].push(item)
+    } else {
+      index[key] = [item]
+    }
+  }
+
+  return index
+}
+
+function transformersByType(transformers: Array<Transformer>): Readonly<{
+  element: Array<ElementTransformer>
+  textFormat: Array<TextFormatTransformer>
+  textMatch: Array<TextMatchTransformer>
+}> {
+  const byType = indexBy(transformers, (t) => t.type)
+
+  return {
+    element: (byType.element || []) as Array<ElementTransformer>,
+    textFormat: (byType['text-format'] || []) as Array<TextFormatTransformer>,
+    textMatch: (byType['text-match'] || []) as Array<TextMatchTransformer>,
+  }
+}
+
+const MARKDOWN_EMPTY_LINE_REG_EXP = /^\s{0,3}$/
+const CODE_BLOCK_REG_EXP = /^```(\w{1,10})?\s?$/
+type TextFormatTransformersIndex = Readonly<{
+  fullMatchRegExpByTag: Readonly<Record<string, RegExp>>
+  openTagsRegExp: RegExp
+  transformersByTag: Readonly<Record<string, TextFormatTransformer>>
+}>
+
+function createMarkdownImport(
+  transformers: Array<Transformer>,
+): (markdownString: string, node?: ElementNode, keepNewLines?: boolean) => void {
+  const byType = transformersByType(transformers)
+  const textFormatTransformersIndex = createTextFormatTransformersIndex(byType.textFormat)
+
+  return (markdownString, node, keepNewLines = false) => {
+    const lines = markdownString.split('\n')
+    const linesLength = lines.length
+    const root = node || $getRoot()
+    root.clear()
+
+    for (let i = 0; i < linesLength; i++) {
+      const lineText = lines[i]
+      // Codeblocks are processed first as anything inside such block
+      // is ignored for further processing
+      // TODO:
+      // Abstract it to be dynamic as other transformers (add multiline match option)
+      const [codeBlockNode, shiftedIndex] = importCodeBlock(lines, i, root)
+
+      if (codeBlockNode != null) {
+        i = shiftedIndex
+        continue
+      }
+
+      importBlocks(lineText, root, byType.element, textFormatTransformersIndex, byType.textMatch)
+    }
+
+    if (!keepNewLines) {
+      // Removing empty paragraphs as md does not really
+      // allow empty lines and uses them as dilimiter
+      const children = root.getChildren()
+      for (const child of children) {
+        if (isEmptyParagraph(child)) {
+          child.remove()
+        }
+      }
+    }
+
+    if ($getSelection() !== null) {
+      root.selectEnd()
+    }
+  }
+}
+
+function isEmptyParagraph(node: LexicalNode): boolean {
+  if (!$isParagraphNode(node)) {
+    return false
+  }
+
+  const firstChild = node.getFirstChild()
+  return (
+    firstChild == null ||
+    (node.getChildrenSize() === 1 &&
+      $isTextNode(firstChild) &&
+      MARKDOWN_EMPTY_LINE_REG_EXP.test(firstChild.getTextContent()))
+  )
+}
+
+function importBlocks(
+  lineText: string,
+  rootNode: ElementNode,
+  elementTransformers: Array<ElementTransformer>,
+  textFormatTransformersIndex: TextFormatTransformersIndex,
+  textMatchTransformers: Array<TextMatchTransformer>,
+) {
+  const lineTextTrimmed = lineText.trim()
+  const textNode = $createTextNode(lineTextTrimmed)
+  const elementNode = $createParagraphNode()
+  elementNode.append(textNode)
+  rootNode.append(elementNode)
+
+  for (const { regExp, replace } of elementTransformers) {
+    const match = lineText.match(regExp)
+
+    if (match) {
+      textNode.setTextContent(lineText.slice(match[0].length))
+      replace(elementNode, [textNode], match, true)
+      break
+    }
+  }
+
+  importTextFormatTransformers(textNode, textFormatTransformersIndex, textMatchTransformers)
+
+  // If no transformer found and we left with original paragraph node
+  // can check if its content can be appended to the previous node
+  // if it's a paragraph, quote or list
+  if (elementNode.isAttached() && lineTextTrimmed.length > 0) {
+    const previousNode = elementNode.getPreviousSibling()
+    if ($isParagraphNode(previousNode) || $isQuoteNode(previousNode) || $isListNode(previousNode)) {
+      let targetNode: LexicalNode | null = previousNode
+
+      if ($isListNode(previousNode)) {
+        const lastDescendant = previousNode.getLastDescendant()
+        if (lastDescendant == null) {
+          targetNode = null
+        } else {
+          targetNode = $findMatchingParent(lastDescendant, $isListItemNode)
+        }
+      }
+
+      if (targetNode != null && targetNode.getTextContentSize() > 0) {
+        targetNode.splice(targetNode.getChildrenSize(), 0, [$createLineBreakNode(), ...elementNode.getChildren()])
+        elementNode.remove()
+      }
+    }
+  }
+}
+
+function importCodeBlock(
+  lines: Array<string>,
+  startLineIndex: number,
+  rootNode: ElementNode,
+): [CodeNode | null, number] {
+  const openMatch = lines[startLineIndex].match(CODE_BLOCK_REG_EXP)
+
+  if (openMatch) {
+    let endLineIndex = startLineIndex
+    const linesLength = lines.length
+
+    while (++endLineIndex < linesLength) {
+      const closeMatch = lines[endLineIndex].match(CODE_BLOCK_REG_EXP)
+
+      if (closeMatch) {
+        const codeBlockNode = $createCodeNode(openMatch[1])
+        const textNode = $createTextNode(lines.slice(startLineIndex + 1, endLineIndex).join('\n'))
+        codeBlockNode.append(textNode)
+        rootNode.append(codeBlockNode)
+        return [codeBlockNode, endLineIndex]
+      }
+    }
+  }
+
+  return [null, startLineIndex]
+}
+
+// Processing text content and replaces text format tags.
+// It takes outermost tag match and its content, creates text node with
+// format based on tag and then recursively executed over node's content
+//
+// E.g. for "*Hello **world**!*" string it will create text node with
+// "Hello **world**!" content and italic format and run recursively over
+// its content to transform "**world**" part
+function importTextFormatTransformers(
+  textNode: TextNode,
+  textFormatTransformersIndex: TextFormatTransformersIndex,
+  textMatchTransformers: Array<TextMatchTransformer>,
+) {
+  const textContent = textNode.getTextContent()
+  const match = findOutermostMatch(textContent, textFormatTransformersIndex)
+
+  if (!match) {
+    // Once text format processing is done run text match transformers, as it
+    // only can span within single text node (unline formats that can cover multiple nodes)
+    importTextMatchTransformers(textNode, textMatchTransformers)
+    return
+  }
+
+  let currentNode, remainderNode, leadingNode
+
+  // If matching full content there's no need to run splitText and can reuse existing textNode
+  // to update its content and apply format. E.g. for **_Hello_** string after applying bold
+  // format (**) it will reuse the same text node to apply italic (_)
+  if (match[0] === textContent) {
+    currentNode = textNode
+  } else {
+    const startIndex = match.index || 0
+    const endIndex = startIndex + match[0].length
+
+    if (startIndex === 0) {
+      ;[currentNode, remainderNode] = textNode.splitText(endIndex)
+    } else {
+      ;[leadingNode, currentNode, remainderNode] = textNode.splitText(startIndex, endIndex)
+    }
+  }
+
+  currentNode.setTextContent(match[2])
+  const transformer = textFormatTransformersIndex.transformersByTag[match[1]]
+
+  if (transformer) {
+    for (const format of transformer.format) {
+      if (!currentNode.hasFormat(format)) {
+        currentNode.toggleFormat(format)
+      }
+    }
+  }
+
+  // Recursively run over inner text if it's not inline code
+  if (!currentNode.hasFormat('code')) {
+    importTextFormatTransformers(currentNode, textFormatTransformersIndex, textMatchTransformers)
+  }
+
+  // Run over leading/remaining text if any
+  if (leadingNode) {
+    importTextFormatTransformers(leadingNode, textFormatTransformersIndex, textMatchTransformers)
+  }
+
+  if (remainderNode) {
+    importTextFormatTransformers(remainderNode, textFormatTransformersIndex, textMatchTransformers)
+  }
+}
+
+function importTextMatchTransformers(textNode_: TextNode, textMatchTransformers: Array<TextMatchTransformer>) {
+  let textNode = textNode_
+
+  mainLoop: while (textNode) {
+    for (const transformer of textMatchTransformers) {
+      const match = textNode.getTextContent().match(transformer.importRegExp)
+
+      if (!match) {
+        continue
+      }
+
+      const startIndex = match.index || 0
+      const endIndex = startIndex + match[0].length
+      let replaceNode, leftTextNode, rightTextNode
+
+      if (startIndex === 0) {
+        ;[replaceNode, textNode] = textNode.splitText(endIndex)
+      } else {
+        ;[leftTextNode, replaceNode, rightTextNode] = textNode.splitText(startIndex, endIndex)
+      }
+      if (leftTextNode) {
+        importTextMatchTransformers(leftTextNode, textMatchTransformers)
+      }
+      if (rightTextNode) {
+        textNode = rightTextNode
+      }
+      transformer.replace(replaceNode, match)
+      continue mainLoop
+    }
+
+    break
+  }
+}
+
+// Finds first "<tag>content<tag>" match that is not nested into another tag
+function findOutermostMatch(
+  textContent: string,
+  textTransformersIndex: TextFormatTransformersIndex,
+): RegExpMatchArray | null {
+  const openTagsMatch = textContent.match(textTransformersIndex.openTagsRegExp)
+
+  if (openTagsMatch == null) {
+    return null
+  }
+
+  for (const match of openTagsMatch) {
+    // Open tags reg exp might capture leading space so removing it
+    // before using match to find transformer
+    const tag = match.replace(/^\s/, '')
+    const fullMatchRegExp = textTransformersIndex.fullMatchRegExpByTag[tag]
+    if (fullMatchRegExp == null) {
+      continue
+    }
+
+    const fullMatch = textContent.match(fullMatchRegExp)
+    const transformer = textTransformersIndex.transformersByTag[tag]
+    if (fullMatch != null && transformer != null) {
+      if (transformer.intraword !== false) {
+        return fullMatch
+      }
+
+      // For non-intraword transformers checking if it's within a word
+      // or surrounded with space/punctuation/newline
+      const { index = 0 } = fullMatch
+      const beforeChar = textContent[index - 1]
+      const afterChar = textContent[index + fullMatch[0].length]
+
+      if (
+        (!beforeChar || PUNCTUATION_OR_SPACE.test(beforeChar)) &&
+        (!afterChar || PUNCTUATION_OR_SPACE.test(afterChar))
+      ) {
+        return fullMatch
+      }
+    }
+  }
+
+  return null
+}
+
+function createTextFormatTransformersIndex(
+  textTransformers: Array<TextFormatTransformer>,
+): TextFormatTransformersIndex {
+  const transformersByTag: Record<string, TextFormatTransformer> = {}
+  const fullMatchRegExpByTag: Record<string, RegExp> = {}
+  const openTagsRegExp = []
+  const escapeRegExp = '(?<![\\\\])'
+
+  for (const transformer of textTransformers) {
+    const { tag } = transformer
+    transformersByTag[tag] = transformer
+    const tagRegExp = tag.replace(/(\*|\^|\+)/g, '\\$1')
+    openTagsRegExp.push(tagRegExp)
+
+    if (IS_SAFARI || IS_IOS || IS_APPLE_WEBKIT) {
+      fullMatchRegExpByTag[tag] = new RegExp(
+        `(${tagRegExp})(?![${tagRegExp}\\s])(.*?[^${tagRegExp}\\s])${tagRegExp}(?!${tagRegExp})`,
+      )
+    } else {
+      fullMatchRegExpByTag[tag] = new RegExp(
+        `(?<![\\\\${tagRegExp}])(${tagRegExp})((\\\\${tagRegExp})?.*?[^${tagRegExp}\\s](\\\\${tagRegExp})?)((?<!\\\\)|(?<=\\\\\\\\))(${tagRegExp})(?![\\\\${tagRegExp}])`,
+      )
+    }
+  }
+
+  return {
+    // Reg exp to find open tag + content + close tag
+    fullMatchRegExpByTag,
+    // Reg exp to find opening tags
+    openTagsRegExp: new RegExp(
+      (IS_SAFARI || IS_IOS || IS_APPLE_WEBKIT ? '' : `${escapeRegExp}`) + '(' + openTagsRegExp.join('|') + ')',
+      'g',
+    ),
+    transformersByTag,
+  }
+}
+
+const TRANSFORMERS: Array<Transformer> = [
+  ...ELEMENT_TRANSFORMERS,
+  ...TEXT_FORMAT_TRANSFORMERS,
+  ...TEXT_MATCH_TRANSFORMERS,
+]
+
+export function $convertFromMarkdownString(
+  markdown: string,
+  transformers: Array<Transformer> = TRANSFORMERS,
+  node?: ElementNode,
+  keepNewLines = false,
+): void {
+  const importMarkdown = createMarkdownImport(transformers)
+  return importMarkdown(markdown, node, keepNewLines)
+}
--- a/packages/web/src/javascripts/Components/SuperEditor/Plugins/ImportPlugin/ImportPlugin.tsx
+++ b/packages/web/src/javascripts/Components/SuperEditor/Plugins/ImportPlugin/ImportPlugin.tsx
@@ -1,11 +1,11 @@
 import { useLexicalComposerContext } from '@lexical/react/LexicalComposerContext'
 import { useEffect } from 'react'
-import { $convertFromMarkdownString } from '@lexical/markdown'
 import { $createParagraphNode, $createRangeSelection, LexicalEditor } from 'lexical'
 import { handleEditorChange } from '../../Utils'
 import { SuperNotePreviewCharLimit } from '../../SuperEditor'
 import { $generateNodesFromDOM } from '@lexical/html'
 import { MarkdownTransformers } from '../../MarkdownTransformers'
+import { $convertFromMarkdownString } from '../../Lexical/Utils/MarkdownImport'

 /** Note that markdown conversion does not insert new lines. See: https://github.com/facebook/lexical/issues/2815 */
 export default function ImportPlugin({
@@ -34,7 +34,7 @@ export default function ImportPlugin({

    editor.update(() => {
      if (format === 'md') {
-        $convertFromMarkdownString(text, MarkdownTransformers)
+        $convertFromMarkdownString(text, MarkdownTransformers, undefined, true)
      } else {
        const parser = new DOMParser()
        const dom = parser.parseFromString(text, 'text/html')