fix: Fixed issue with some Evernote imports getting cut-off

2024-04-19 16:55:45 +05:30
parent 6e282c1c9a
commit 2bc7d57235
4 changed files with 38 additions and 25 deletions
--- a/packages/files/src/Domain/Service/SuperConverterServiceInterface.ts
+++ b/packages/files/src/Domain/Service/SuperConverterServiceInterface.ts
@@ -1,5 +1,9 @@
 import { FileItem, PrefKey, PrefValue } from '@standardnotes/models'

+export type SuperConverterHTMLOptions = {
+  addLineBreaks?: boolean
+}
+
 export interface SuperConverterServiceInterface {
  isValidSuperString(superString: string): boolean
  convertSuperStringToOtherFormat: (
@@ -18,9 +22,7 @@ export interface SuperConverterServiceInterface {
    otherFormatString: string,
    fromFormat: 'txt' | 'md' | 'html' | 'json',
    options?: {
-      html?: {
-        addLineBreaks?: boolean
-      }
+      html?: SuperConverterHTMLOptions
    },
  ) => string
  getEmbeddedFileIDsFromSuperString(superString: string): string[]
--- a/packages/ui-services/src/Import/Converter.ts
+++ b/packages/ui-services/src/Import/Converter.ts
@@ -1,6 +1,7 @@
 import { NoteType } from '@standardnotes/features'
 import { DecryptedItemInterface, FileItem, ItemContent, NoteContent, SNNote, SNTag } from '@standardnotes/models'
 import { ConversionResult } from './ConversionResult'
+import { SuperConverterHTMLOptions } from '@standardnotes/snjs'

 export interface Converter {
  getImportType(): string
@@ -18,7 +19,7 @@ export interface Converter {
      canUploadFiles: boolean
      uploadFile: UploadFileFn
      canUseSuper: boolean
-      convertHTMLToSuper: (html: string) => string
+      convertHTMLToSuper: (html: string, options?: SuperConverterHTMLOptions) => string
      convertMarkdownToSuper: (markdown: string) => string
      readFileAsText: (file: File) => Promise<string>
      linkItems(
--- a/packages/ui-services/src/Import/EvernoteConverter/EvernoteConverter.ts
+++ b/packages/ui-services/src/Import/EvernoteConverter/EvernoteConverter.ts
@@ -75,13 +75,15 @@ export class EvernoteConverter implements Converter {
          .filter(Boolean) as EvernoteResource[]

        const contentNode = xmlNote.getElementsByTagName('content')[0]
-        const contentXmlString = this.getXmlStringFromContentElement(contentNode)
+        let contentXmlString = this.getXmlStringFromContentElement(contentNode)
        if (!contentXmlString) {
          continue
        }
-        const contentXml = this.loadXMLString(contentXmlString, 'html')
+        // Convert any en-media self-closing tags to normal closing tags
+        contentXmlString = contentXmlString.replace(/<((en-media)[^<>]+)\/>/g, '<$1></$2>')
+        const content = this.loadXMLString(contentXmlString, 'html')

-        const noteElement = contentXml.getElementsByTagName('en-note')[0] as HTMLElement
+        const noteElement = content.getElementsByTagName('en-note')[0] as HTMLElement

        const unorderedLists = Array.from(noteElement.getElementsByTagName('ul'))

@@ -92,16 +94,7 @@ export class EvernoteConverter implements Converter {
        }

        this.removeEmptyAndOrphanListElements(noteElement)
-        this.removeUnnecessaryTopLevelBreaks(noteElement)
-
-        const mediaElements = Array.from(noteElement.getElementsByTagName('en-media'))
-        const { uploadedFiles } = await this.replaceMediaElementsWithResources(
-          mediaElements,
-          resources,
-          canUploadFiles,
-          uploadFile,
-        )
-        filesToPotentiallyCleanup.push(...uploadedFiles)
+        this.unwrapTopLevelBreaks(noteElement)

        // Some notes have <font> tags that contain separate <span> tags with text
        // which causes broken paragraphs in the note.
@@ -113,13 +106,26 @@ export class EvernoteConverter implements Converter {
          fontElement.innerText = fontElement.textContent || ''
        }

+        const mediaElements = Array.from(noteElement.getElementsByTagName('en-media'))
+        const { uploadedFiles } = await this.replaceMediaElementsWithResources(
+          mediaElements,
+          resources,
+          canUploadFiles,
+          uploadFile,
+        )
+        filesToPotentiallyCleanup.push(...uploadedFiles)
+
        let contentHTML = noteElement.innerHTML
        if (!canUseSuper) {
          contentHTML = contentHTML.replace(/<\/div>/g, '</div>\n')
          contentHTML = contentHTML.replace(/<li[^>]*>/g, '\n')
          contentHTML = contentHTML.trim()
        }
-        const text = !canUseSuper ? this.stripHTML(contentHTML) : convertHTMLToSuper(contentHTML)
+        const text = !canUseSuper
+          ? this.stripHTML(contentHTML)
+          : convertHTMLToSuper(contentHTML, {
+              addLineBreaks: false,
+            })

        const createdAtDate = created ? dayjs.utc(created, dateFormat).toDate() : new Date()
        const updatedAtDate = updated ? dayjs.utc(updated, dateFormat).toDate() : createdAtDate
@@ -285,11 +291,13 @@ export class EvernoteConverter implements Converter {
    })
  }

-  removeUnnecessaryTopLevelBreaks(noteElement: HTMLElement) {
-    Array.from(noteElement.querySelectorAll('* > p > br')).forEach((br) => {
+  unwrapTopLevelBreaks(noteElement: HTMLElement) {
+    Array.from(noteElement.querySelectorAll('* > p > br, * > div > br')).forEach((br) => {
      const parent = br.parentElement!
-      if (parent.children.length === 1) {
-        parent.remove()
+      const children = Array.from(parent.children)
+      const isEveryChildBR = children.every((child) => child.tagName === 'BR')
+      if (isEveryChildBR) {
+        parent.replaceWith(children[0])
      }
    })
  }
--- a/packages/ui-services/src/Import/Importer.ts
+++ b/packages/ui-services/src/Import/Importer.ts
@@ -27,7 +27,7 @@ import { HTMLConverter } from './HTMLConverter/HTMLConverter'
 import { SuperConverter } from './SuperConverter/SuperConverter'
 import { CleanupItemsFn, Converter, InsertNoteFn, InsertTagFn, LinkItemsFn, UploadFileFn } from './Converter'
 import { ConversionResult } from './ConversionResult'
-import { FilesClientInterface, SuperConverterServiceInterface } from '@standardnotes/files'
+import { FilesClientInterface, SuperConverterHTMLOptions, SuperConverterServiceInterface } from '@standardnotes/files'
 import { ContentType } from '@standardnotes/domain-core'

 const BytesInOneMegabyte = 1_000_000
@@ -207,12 +207,14 @@ export class Importer {
    )
  }

-  convertHTMLToSuper = (html: string): string => {
+  convertHTMLToSuper = (html: string, options?: SuperConverterHTMLOptions): string => {
    if (!this.canUseSuper()) {
      return html
    }

-    return this.superConverterService.convertOtherFormatToSuperString(html, 'html')
+    return this.superConverterService.convertOtherFormatToSuperString(html, 'html', {
+      html: options,
+    })
  }

  convertMarkdownToSuper = (markdown: string): string => {