feat: Notes from Evernote ENEX files are now correctly imported as Super notes with attachments (#2467)

This commit is contained in:
Aman Harwara
2023-08-30 00:32:54 +05:30
committed by GitHub
parent 631972ca9c
commit 600afa5382
11 changed files with 730 additions and 93 deletions

View File

@@ -4,10 +4,11 @@
import { ContentType } from '@standardnotes/domain-core'
import { DecryptedTransferPayload, NoteContent, TagContent } from '@standardnotes/models'
import { EvernoteConverter } from './EvernoteConverter'
import data from './testData'
import { EvernoteConverter, EvernoteResource } from './EvernoteConverter'
import { createTestResourceElement, enex, enexWithNoNoteOrTag } from './testData'
import { PureCryptoInterface } from '@standardnotes/sncrypto-common'
import { GenerateUuid } from '@standardnotes/services'
import { SuperConverterServiceInterface } from '@standardnotes/files'
// Mock dayjs so dayjs.extend() doesn't throw an error in EvernoteConverter.ts
jest.mock('dayjs', () => {
@@ -22,52 +23,167 @@ jest.mock('dayjs', () => {
}
})
describe('EvernoteConverter', () => {
const crypto = {
generateUUID: () => String(Math.random()),
} as unknown as PureCryptoInterface
const superConverterService: SuperConverterServiceInterface = {
isValidSuperString: () => true,
convertOtherFormatToSuperString: (data: string) => data,
convertSuperStringToOtherFormat: (data: string) => data,
}
const generateUuid = new GenerateUuid(crypto)
it('should parse and strip html', () => {
const converter = new EvernoteConverter(generateUuid)
it('should throw error if DOMParser is not available', () => {
const converter = new EvernoteConverter(superConverterService, generateUuid)
const result = converter.parseENEXData(data, true)
const originalDOMParser = window.DOMParser
// @ts-ignore
window.DOMParser = undefined
expect(() => converter.parseENEXData(enex)).toThrowError()
window.DOMParser = originalDOMParser
})
it('should throw error if no note or tag in enex', () => {
const converter = new EvernoteConverter(superConverterService, generateUuid)
expect(() => converter.parseENEXData(enexWithNoNoteOrTag)).toThrowError()
})
it('should parse and strip html', () => {
const converter = new EvernoteConverter(superConverterService, generateUuid)
const result = converter.parseENEXData(enex, false)
expect(result).not.toBeNull()
expect(result?.length).toBe(3)
expect(result?.[0].content_type).toBe(ContentType.TYPES.Note)
expect((result?.[0] as DecryptedTransferPayload<NoteContent>).content.text).toBe('This is a test.')
expect((result?.[0] as DecryptedTransferPayload<NoteContent>).content.text).toBe('This is a test.\nh e ')
expect(result?.[1].content_type).toBe(ContentType.TYPES.Note)
expect((result?.[1] as DecryptedTransferPayload<NoteContent>).content.text).toBe(
'Lorem ipsum dolor sit amet, consectetur adipiscing elit.',
)
expect(result?.[2].content_type).toBe(ContentType.TYPES.Tag)
expect((result?.[2] as DecryptedTransferPayload<TagContent>).content.title).toBe('evernote')
expect((result?.[2] as DecryptedTransferPayload<TagContent>).content.title).toBe('distant reading')
expect((result?.[2] as DecryptedTransferPayload<TagContent>).content.references.length).toBe(2)
expect((result?.[2] as DecryptedTransferPayload<TagContent>).content.references[0].uuid).toBe(result?.[0].uuid)
expect((result?.[2] as DecryptedTransferPayload<TagContent>).content.references[1].uuid).toBe(result?.[1].uuid)
})
it('should parse and not strip html', () => {
const converter = new EvernoteConverter(generateUuid)
const converter = new EvernoteConverter(superConverterService, generateUuid)
const result = converter.parseENEXData(data, false)
const result = converter.parseENEXData(enex, true)
expect(result).not.toBeNull()
expect(result?.length).toBe(3)
expect(result?.[0].content_type).toBe(ContentType.TYPES.Note)
expect((result?.[0] as DecryptedTransferPayload<NoteContent>).content.text).toBe('<div>This is a test.</div>')
expect((result?.[0] as DecryptedTransferPayload<NoteContent>).content.text).toBe(
'<div>This is a test.</div><font><span>h </span><span>e </span></font>',
)
expect(result?.[1].content_type).toBe(ContentType.TYPES.Note)
expect((result?.[1] as DecryptedTransferPayload<NoteContent>).content.text).toBe(
'<div>Lorem ipsum dolor sit amet, consectetur adipiscing elit.</div>',
)
expect(result?.[2].content_type).toBe(ContentType.TYPES.Tag)
expect((result?.[2] as DecryptedTransferPayload<TagContent>).content.title).toBe('evernote')
expect((result?.[2] as DecryptedTransferPayload<TagContent>).content.title).toBe('distant reading')
expect((result?.[2] as DecryptedTransferPayload<TagContent>).content.references.length).toBe(2)
expect((result?.[2] as DecryptedTransferPayload<TagContent>).content.references[0].uuid).toBe(result?.[0].uuid)
expect((result?.[2] as DecryptedTransferPayload<TagContent>).content.references[1].uuid).toBe(result?.[1].uuid)
})
it('should convert lists to super format if applicable', () => {
const unorderedList1 = document.createElement('ul')
unorderedList1.style.setProperty('--en-todo', 'true')
const listItem1 = document.createElement('li')
listItem1.style.setProperty('--en-checked', 'true')
const listItem2 = document.createElement('li')
listItem2.style.setProperty('--en-checked', 'false')
unorderedList1.appendChild(listItem1)
unorderedList1.appendChild(listItem2)
const unorderedList2 = document.createElement('ul')
const array = [unorderedList1, unorderedList2]
const converter = new EvernoteConverter(superConverterService, generateUuid)
converter.convertListsToSuperFormatIfApplicable(array)
expect(unorderedList1.getAttribute('__lexicallisttype')).toBe('check')
expect(listItem1.getAttribute('aria-checked')).toBe('true')
expect(listItem2.getAttribute('aria-checked')).toBe('false')
expect(unorderedList2.getAttribute('__lexicallisttype')).toBeFalsy()
})
it('should replace media elements with resources', () => {
const resources: EvernoteResource[] = [
{
hash: 'hash1',
mimeType: 'image/png',
data: 'data1',
fileName: 'file1',
},
]
const parentElement = document.createElement('div')
const mediaElement1 = document.createElement('en-media')
mediaElement1.setAttribute('hash', 'hash1')
const mediaElement2 = document.createElement('en-media')
mediaElement2.setAttribute('hash', 'hash2')
const mediaElement3 = document.createElement('en-media')
mediaElement3.setAttribute('hash', 'hash1')
parentElement.appendChild(mediaElement1)
parentElement.appendChild(mediaElement2)
const array = [mediaElement1, mediaElement2, mediaElement3]
const converter = new EvernoteConverter(superConverterService, generateUuid)
const replacedCount = converter.replaceMediaElementsWithResources(array, resources)
expect(replacedCount).toBe(1)
})
describe('getResourceFromElement', () => {
const converter = new EvernoteConverter(superConverterService, generateUuid)
it('should return undefined if no mime type is present', () => {
const resourceElementWithoutMimeType = createTestResourceElement(false)
converter.getMD5HashFromBase64 = jest.fn().mockReturnValue('hash')
expect(converter.getResourceFromElement(resourceElementWithoutMimeType)).toBeUndefined()
})
it('should generate md5 hash from base64 data if no source url is present', () => {
const resourceElementWithoutSourceUrl = createTestResourceElement(true, false)
converter.getMD5HashFromBase64 = jest.fn().mockReturnValue('hash')
const resourceWithoutSourceUrl = converter.getResourceFromElement(resourceElementWithoutSourceUrl)
expect(resourceWithoutSourceUrl).toBeDefined()
expect(converter.getMD5HashFromBase64).toHaveBeenCalled()
})
it('should not generate md5 hash from base64 data if source url is present', () => {
const resourceElementWithSourceUrl = createTestResourceElement(true, true)
converter.getMD5HashFromBase64 = jest.fn().mockReturnValue('hash')
const resourceWithSourceUrl = converter.getResourceFromElement(resourceElementWithSourceUrl)
expect(resourceWithSourceUrl).toBeDefined()
expect(converter.getMD5HashFromBase64).not.toHaveBeenCalled()
})
it('should return undefined if no data is present', () => {
const resourceElementWithoutData = createTestResourceElement(true, false, true, false)
converter.getMD5HashFromBase64 = jest.fn().mockReturnValue('hash')
const resourceWithoutData = converter.getResourceFromElement(resourceElementWithoutData)
expect(resourceWithoutData).toBeUndefined()
})
it('should return undefined if no source url and encoding is not base64', () => {
const resourceElementWithoutSourceOrBase64 = createTestResourceElement(true, false, true, true, 'hex')
converter.getMD5HashFromBase64 = jest.fn().mockReturnValue('hash')
const resourceWithoutSourceOrBase64 = converter.getResourceFromElement(resourceElementWithoutSourceOrBase64)
expect(resourceWithoutSourceOrBase64).toBeUndefined()
})
})
})

View File

@@ -5,46 +5,41 @@ import customParseFormat from 'dayjs/plugin/customParseFormat'
import utc from 'dayjs/plugin/utc'
import { ContentType } from '@standardnotes/domain-core'
import { GenerateUuid } from '@standardnotes/services'
import { SuperConverterServiceInterface } from '@standardnotes/files'
import { NativeFeatureIdentifier, NoteType } from '@standardnotes/features'
import MD5 from 'crypto-js/md5'
import Base64 from 'crypto-js/enc-base64'
dayjs.extend(customParseFormat)
dayjs.extend(utc)
const dateFormat = 'YYYYMMDDTHHmmss'
export class EvernoteConverter {
constructor(private _generateUuid: GenerateUuid) {}
export type EvernoteResource = {
hash: string
data: string
fileName: string
mimeType: string
}
async convertENEXFileToNotesAndTags(file: File, stripHTML: boolean): Promise<DecryptedTransferPayload[]> {
export class EvernoteConverter {
constructor(
private superConverterService: SuperConverterServiceInterface,
private _generateUuid: GenerateUuid,
) {}
async convertENEXFileToNotesAndTags(file: File, isEntitledToSuper: boolean): Promise<DecryptedTransferPayload[]> {
const content = await readFileAsText(file)
const notesAndTags = this.parseENEXData(content, stripHTML)
const notesAndTags = this.parseENEXData(content, isEntitledToSuper)
return notesAndTags
}
parseENEXData(data: string, stripHTML = false, defaultTagName = 'evernote') {
parseENEXData(data: string, isEntitledToSuper = false) {
const xmlDoc = this.loadXMLString(data, 'xml')
const xmlNotes = xmlDoc.getElementsByTagName('note')
const notes: DecryptedTransferPayload<NoteContent>[] = []
const tags: DecryptedTransferPayload<TagContent>[] = []
let defaultTag: DecryptedTransferPayload<TagContent> | undefined
if (defaultTagName) {
const now = new Date()
defaultTag = {
created_at: now,
created_at_timestamp: now.getTime(),
updated_at: now,
updated_at_timestamp: now.getTime(),
uuid: this._generateUuid.execute().getValue(),
content_type: ContentType.TYPES.Tag,
content: {
title: defaultTagName,
expanded: false,
iconString: '',
references: [],
},
}
}
function findTag(title: string | null) {
return tags.filter(function (tag) {
@@ -58,31 +53,70 @@ export class EvernoteConverter {
for (const [index, xmlNote] of Array.from(xmlNotes).entries()) {
const title = xmlNote.getElementsByTagName('title')[0].textContent
const created = xmlNote.getElementsByTagName('created')[0].textContent
const created = xmlNote.getElementsByTagName('created')[0]?.textContent
const updatedNodes = xmlNote.getElementsByTagName('updated')
const updated = updatedNodes.length ? updatedNodes[0].textContent : null
const resources = Array.from(xmlNote.getElementsByTagName('resource'))
.map(this.getResourceFromElement)
.filter(Boolean) as EvernoteResource[]
const contentNode = xmlNote.getElementsByTagName('content')[0]
let contentXmlString
/** Find the node with the content */
for (const node of Array.from(contentNode.childNodes)) {
if (node instanceof CDATASection) {
contentXmlString = node.nodeValue
break
}
}
const contentXmlString = this.getXmlStringFromContentElement(contentNode)
if (!contentXmlString) {
continue
}
const contentXml = this.loadXMLString(contentXmlString, 'html')
let contentHTML = contentXml.getElementsByTagName('en-note')[0].innerHTML
if (stripHTML) {
const noteElement = contentXml.getElementsByTagName('en-note')[0]
const unorderedLists = Array.from(noteElement.getElementsByTagName('ul'))
if (isEntitledToSuper) {
this.convertListsToSuperFormatIfApplicable(unorderedLists)
}
// Remove empty lists and orphan list items
Array.from(noteElement.getElementsByTagName('ul')).forEach((ul) => {
if (ul.children.length === 0) {
ul.remove()
}
})
Array.from(noteElement.getElementsByTagName('ol')).forEach((ol) => {
if (ol.children.length === 0) {
ol.remove()
}
})
Array.from(noteElement.getElementsByTagName('li')).forEach((li) => {
if (li.children.length === 0 || li.closest('ul, ol') === null) {
li.remove()
}
})
const mediaElements = Array.from(noteElement.getElementsByTagName('en-media'))
this.replaceMediaElementsWithResources(mediaElements, resources)
// Some notes have <font> tags that contain separate <span> tags with text
// which causes broken paragraphs in the note.
const fontElements = Array.from(noteElement.getElementsByTagName('font'))
for (const fontElement of fontElements) {
fontElement.childNodes.forEach((childNode) => {
childNode.textContent += ' '
})
fontElement.innerText = fontElement.textContent || ''
}
let contentHTML = noteElement.innerHTML
if (!isEntitledToSuper) {
contentHTML = contentHTML.replace(/<\/div>/g, '</div>\n')
contentHTML = contentHTML.replace(/<li[^>]*>/g, '\n')
contentHTML = contentHTML.trim()
}
const text = stripHTML ? this.stripHTML(contentHTML) : contentHTML
const text = !isEntitledToSuper
? this.stripHTML(contentHTML)
: this.superConverterService.convertOtherFormatToSuperString(contentHTML, 'html')
const createdAtDate = created ? dayjs.utc(created, dateFormat).toDate() : new Date()
const updatedAtDate = updated ? dayjs.utc(updated, dateFormat).toDate() : createdAtDate
const note: DecryptedTransferPayload<NoteContent> = {
created_at: createdAtDate,
created_at_timestamp: createdAtDate.getTime(),
@@ -94,16 +128,15 @@ export class EvernoteConverter {
title: !title ? `Imported note ${index + 1} from Evernote` : title,
text,
references: [],
...(isEntitledToSuper
? {
noteType: NoteType.Super,
editorIdentifier: NativeFeatureIdentifier.TYPES.SuperEditor,
}
: {}),
},
}
if (defaultTag) {
defaultTag.content.references.push({
content_type: ContentType.TYPES.Note,
uuid: note.uuid,
})
}
const xmlTags = xmlNote.getElementsByTagName('tag')
for (const tagXml of Array.from(xmlTags)) {
const tagName = tagXml.childNodes[0].nodeValue
@@ -138,13 +171,125 @@ export class EvernoteConverter {
if (allItems.length === 0) {
throw new Error('Could not parse any notes or tags from Evernote file.')
}
if (defaultTag) {
allItems.push(defaultTag)
}
return allItems
}
getXmlStringFromContentElement(contentElement: Element) {
let contentXmlString
/** Find the node with the content */
for (const node of Array.from(contentElement.childNodes)) {
if (node instanceof CDATASection) {
contentXmlString = node.nodeValue
break
}
}
return contentXmlString
}
getMD5HashFromBase64(b64Data: string) {
const bytes = Base64.parse(b64Data)
return MD5(bytes).toString()
}
getResourceFromElement = (element: Element): EvernoteResource | undefined => {
const mimeType = element.getElementsByTagName('mime')[0]?.textContent
if (!mimeType) {
return
}
const attributes = element.getElementsByTagName('resource-attributes')[0]
const sourceUrl = attributes.getElementsByTagName('source-url')[0]?.textContent
const fileName =
attributes.getElementsByTagName('file-name')[0]?.textContent || this._generateUuid.execute().getValue()
const dataElement = element.getElementsByTagName('data')[0]
const encoding = dataElement.getAttribute('encoding')
const dataContentWithoutNewLines = dataElement.textContent?.replace(/\n/g, '')
if (!dataContentWithoutNewLines) {
return
}
const data = 'data:' + mimeType + ';' + encoding + ',' + dataContentWithoutNewLines
let hash = ''
if (sourceUrl && sourceUrl.startsWith('en-cache')) {
const splitSourceUrl = sourceUrl.split('+')
hash = splitSourceUrl[splitSourceUrl.length - 2]
} else if (encoding === 'base64') {
hash = this.getMD5HashFromBase64(dataContentWithoutNewLines)
}
if (!hash) {
return
}
return {
hash,
data,
fileName,
mimeType,
} as EvernoteResource
}
convertListsToSuperFormatIfApplicable(unorderedLists: HTMLUListElement[]) {
for (const unorderedList of unorderedLists) {
if (unorderedList.style.getPropertyValue('--en-todo') !== 'true') {
continue
}
unorderedList.setAttribute('__lexicallisttype', 'check')
const listItems = unorderedList.getElementsByTagName('li')
for (const listItem of Array.from(listItems)) {
listItem.setAttribute('aria-checked', listItem.style.getPropertyValue('--en-checked'))
}
}
}
replaceMediaElementsWithResources(mediaElements: Element[], resources: EvernoteResource[]): number {
let replacedElements = 0
for (const mediaElement of mediaElements) {
const hash = mediaElement.getAttribute('hash')
const resource = resources.find((resource) => resource && resource.hash === hash)
if (!resource) {
continue
}
let resourceElement: HTMLElement = document.createElement('object')
resourceElement.setAttribute('type', resource.mimeType)
resourceElement.setAttribute('data', resource.data)
if (resource.mimeType.startsWith('image/')) {
resourceElement = document.createElement('img')
resourceElement.setAttribute('src', resource.data)
resourceElement.setAttribute('data-mime-type', resource.mimeType)
} else if (resource.mimeType.startsWith('audio/')) {
resourceElement = document.createElement('audio')
resourceElement.setAttribute('controls', 'controls')
const sourceElement = document.createElement('source')
sourceElement.setAttribute('src', resource.data)
sourceElement.setAttribute('type', resource.mimeType)
resourceElement.appendChild(sourceElement)
} else if (resource.mimeType.startsWith('video/')) {
resourceElement = document.createElement('video')
resourceElement.setAttribute('controls', 'controls')
const sourceElement = document.createElement('source')
sourceElement.setAttribute('src', resource.data)
sourceElement.setAttribute('type', resource.mimeType)
resourceElement.appendChild(sourceElement)
}
resourceElement.setAttribute('data-filename', resource.fileName)
if (!mediaElement.parentNode) {
continue
}
mediaElement.parentNode.replaceChild(resourceElement, mediaElement)
replacedElements++
}
return replacedElements
}
loadXMLString(string: string, type: 'html' | 'xml') {
let xmlDoc
if (window.DOMParser) {

View File

@@ -1,15 +1,28 @@
export default `<?xml version="1.0" encoding="UTF-8"?>
export const enex = `<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export3.dtd">
<en-export export-date="20210408T052957Z" application="Evernote" version="10.8.5">
<note>
<title>Testing 1</title>
<created>20210308T051614Z</created>
<updated>20210308T051855Z</updated>
<tag>distant reading</tag>
<note-attributes>
</note-attributes>
<content>
<![CDATA[<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd"><en-note><div>This is a test.</div></en-note> ]]>
<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd"><en-note><div>This is a test.</div><ul></ul><li></li><ol></ol><font><span>h</span><span>e</span></font></en-note> ]]>
</content>
</note>
<note>
<title></title>
<created>20200508T234829Z</created>
<updated>20200508T235233Z</updated>
<tag>distant reading</tag>
<note-attributes>
</note-attributes>
<content>
<![CDATA[<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd"><en-note><div>Lorem ipsum dolor sit amet, consectetur adipiscing elit.</div></en-note> ]]>
</content>
</note>
<note>
@@ -19,8 +32,53 @@ export default `<?xml version="1.0" encoding="UTF-8"?>
<note-attributes>
</note-attributes>
<content>
<![CDATA[<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd"><en-note><div>Lorem ipsum dolor sit amet, consectetur adipiscing elit.</div></en-note> ]]>
</content>
</note>
</en-export>`
export const enexWithNoNoteOrTag = `<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export3.dtd">
<en-export export-date="20210408T052957Z" application="Evernote" version="10.8.5">
</en-export>`
export function createTestResourceElement(
shouldHaveMimeType = true,
shouldHaveSourceUrl = false,
shouldHaveFileName = true,
shouldHaveData = true,
encoding = 'base64',
): Element {
const resourceElement = document.createElement('resource')
if (shouldHaveMimeType) {
const mimeTypeElement = document.createElement('mime')
mimeTypeElement.textContent = 'image/png'
resourceElement.appendChild(mimeTypeElement)
}
const attributesElement = document.createElement('resource-attributes')
if (shouldHaveSourceUrl) {
const sourceUrlElement = document.createElement('source-url')
sourceUrlElement.textContent =
'en-cache://tokenKey%3D%22AuthToken%3AUser%3A212093785%22+8596a26a-92b0-4dd8-9ded-16266ccbf3f3+8eb2fb2aeb08edb45f78512f3b8e9d35+https://www.evernote.com/shard/s609/res/e8cf9bb5-90b7-440c-a333-c2910afaa65b'
attributesElement.appendChild(sourceUrlElement)
}
if (shouldHaveFileName) {
const fileNameElement = document.createElement('file-name')
fileNameElement.textContent = 'image.png'
attributesElement.appendChild(fileNameElement)
}
resourceElement.appendChild(attributesElement)
const dataElement = document.createElement('data')
if (shouldHaveData) {
dataElement.setAttribute('encoding', encoding)
dataElement.textContent = 'data:text/plain;base64,SAo='
}
resourceElement.appendChild(dataElement)
return resourceElement
}

View File

@@ -40,7 +40,7 @@ export class Importer {
this.googleKeepConverter = new GoogleKeepConverter(this.superConverterService, _generateUuid)
this.simplenoteConverter = new SimplenoteConverter(_generateUuid)
this.plaintextConverter = new PlaintextConverter(_generateUuid)
this.evernoteConverter = new EvernoteConverter(_generateUuid)
this.evernoteConverter = new EvernoteConverter(this.superConverterService, _generateUuid)
this.htmlConverter = new HTMLConverter(this.superConverterService, _generateUuid)
this.superConverter = new SuperConverter(this.superConverterService, _generateUuid)
}
@@ -108,7 +108,7 @@ export class Importer {
} else if (type === 'simplenote') {
return await this.simplenoteConverter.convertSimplenoteBackupFileToNotes(file)
} else if (type === 'evernote') {
return await this.evernoteConverter.convertENEXFileToNotesAndTags(file, false)
return await this.evernoteConverter.convertENEXFileToNotesAndTags(file, isEntitledToSuper)
} else if (type === 'plaintext') {
return [await this.plaintextConverter.convertPlaintextFileToNote(file)]
} else if (type === 'html') {