fix: Fixed issue with some Evernote imports getting cut-off

This commit is contained in:
Aman Harwara
2024-04-19 16:55:45 +05:30
parent 6e282c1c9a
commit 2bc7d57235
4 changed files with 38 additions and 25 deletions

View File

@@ -1,5 +1,9 @@
import { FileItem, PrefKey, PrefValue } from '@standardnotes/models'
export type SuperConverterHTMLOptions = {
addLineBreaks?: boolean
}
export interface SuperConverterServiceInterface {
isValidSuperString(superString: string): boolean
convertSuperStringToOtherFormat: (
@@ -18,9 +22,7 @@ export interface SuperConverterServiceInterface {
otherFormatString: string,
fromFormat: 'txt' | 'md' | 'html' | 'json',
options?: {
html?: {
addLineBreaks?: boolean
}
html?: SuperConverterHTMLOptions
},
) => string
getEmbeddedFileIDsFromSuperString(superString: string): string[]

View File

@@ -1,6 +1,7 @@
import { NoteType } from '@standardnotes/features'
import { DecryptedItemInterface, FileItem, ItemContent, NoteContent, SNNote, SNTag } from '@standardnotes/models'
import { ConversionResult } from './ConversionResult'
import { SuperConverterHTMLOptions } from '@standardnotes/snjs'
export interface Converter {
getImportType(): string
@@ -18,7 +19,7 @@ export interface Converter {
canUploadFiles: boolean
uploadFile: UploadFileFn
canUseSuper: boolean
convertHTMLToSuper: (html: string) => string
convertHTMLToSuper: (html: string, options?: SuperConverterHTMLOptions) => string
convertMarkdownToSuper: (markdown: string) => string
readFileAsText: (file: File) => Promise<string>
linkItems(

View File

@@ -75,13 +75,15 @@ export class EvernoteConverter implements Converter {
.filter(Boolean) as EvernoteResource[]
const contentNode = xmlNote.getElementsByTagName('content')[0]
const contentXmlString = this.getXmlStringFromContentElement(contentNode)
let contentXmlString = this.getXmlStringFromContentElement(contentNode)
if (!contentXmlString) {
continue
}
const contentXml = this.loadXMLString(contentXmlString, 'html')
// Convert any en-media self-closing tags to normal closing tags
contentXmlString = contentXmlString.replace(/<((en-media)[^<>]+)\/>/g, '<$1></$2>')
const content = this.loadXMLString(contentXmlString, 'html')
const noteElement = contentXml.getElementsByTagName('en-note')[0] as HTMLElement
const noteElement = content.getElementsByTagName('en-note')[0] as HTMLElement
const unorderedLists = Array.from(noteElement.getElementsByTagName('ul'))
@@ -92,16 +94,7 @@ export class EvernoteConverter implements Converter {
}
this.removeEmptyAndOrphanListElements(noteElement)
this.removeUnnecessaryTopLevelBreaks(noteElement)
const mediaElements = Array.from(noteElement.getElementsByTagName('en-media'))
const { uploadedFiles } = await this.replaceMediaElementsWithResources(
mediaElements,
resources,
canUploadFiles,
uploadFile,
)
filesToPotentiallyCleanup.push(...uploadedFiles)
this.unwrapTopLevelBreaks(noteElement)
// Some notes have <font> tags that contain separate <span> tags with text
// which causes broken paragraphs in the note.
@@ -113,13 +106,26 @@ export class EvernoteConverter implements Converter {
fontElement.innerText = fontElement.textContent || ''
}
const mediaElements = Array.from(noteElement.getElementsByTagName('en-media'))
const { uploadedFiles } = await this.replaceMediaElementsWithResources(
mediaElements,
resources,
canUploadFiles,
uploadFile,
)
filesToPotentiallyCleanup.push(...uploadedFiles)
let contentHTML = noteElement.innerHTML
if (!canUseSuper) {
contentHTML = contentHTML.replace(/<\/div>/g, '</div>\n')
contentHTML = contentHTML.replace(/<li[^>]*>/g, '\n')
contentHTML = contentHTML.trim()
}
const text = !canUseSuper ? this.stripHTML(contentHTML) : convertHTMLToSuper(contentHTML)
const text = !canUseSuper
? this.stripHTML(contentHTML)
: convertHTMLToSuper(contentHTML, {
addLineBreaks: false,
})
const createdAtDate = created ? dayjs.utc(created, dateFormat).toDate() : new Date()
const updatedAtDate = updated ? dayjs.utc(updated, dateFormat).toDate() : createdAtDate
@@ -285,11 +291,13 @@ export class EvernoteConverter implements Converter {
})
}
removeUnnecessaryTopLevelBreaks(noteElement: HTMLElement) {
Array.from(noteElement.querySelectorAll('* > p > br')).forEach((br) => {
unwrapTopLevelBreaks(noteElement: HTMLElement) {
Array.from(noteElement.querySelectorAll('* > p > br, * > div > br')).forEach((br) => {
const parent = br.parentElement!
if (parent.children.length === 1) {
parent.remove()
const children = Array.from(parent.children)
const isEveryChildBR = children.every((child) => child.tagName === 'BR')
if (isEveryChildBR) {
parent.replaceWith(children[0])
}
})
}

View File

@@ -27,7 +27,7 @@ import { HTMLConverter } from './HTMLConverter/HTMLConverter'
import { SuperConverter } from './SuperConverter/SuperConverter'
import { CleanupItemsFn, Converter, InsertNoteFn, InsertTagFn, LinkItemsFn, UploadFileFn } from './Converter'
import { ConversionResult } from './ConversionResult'
import { FilesClientInterface, SuperConverterServiceInterface } from '@standardnotes/files'
import { FilesClientInterface, SuperConverterHTMLOptions, SuperConverterServiceInterface } from '@standardnotes/files'
import { ContentType } from '@standardnotes/domain-core'
const BytesInOneMegabyte = 1_000_000
@@ -207,12 +207,14 @@ export class Importer {
)
}
convertHTMLToSuper = (html: string): string => {
convertHTMLToSuper = (html: string, options?: SuperConverterHTMLOptions): string => {
if (!this.canUseSuper()) {
return html
}
return this.superConverterService.convertOtherFormatToSuperString(html, 'html')
return this.superConverterService.convertOtherFormatToSuperString(html, 'html', {
html: options,
})
}
convertMarkdownToSuper = (markdown: string): string => {