Merge pull request gram-js#15 from watzon/master

Add HTMLParser and more tests
brilacasck · Jan 5, 2020 · bf96832 · bf96832
2 parents e3d7a5e + d637087
commit bf96832
Show file tree

Hide file tree

Showing 15 changed files with 2,077 additions and 452 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,6 +9,7 @@
 /gramjs/tl/AllTLObjects.js
 /gramjs/errors/RPCErrorList.js
 /dist/
+/coverage/
 
 # User session
 *.session

diff --git a/__tests__/AES.spec.js → __tests__/crypto/AES.spec.js b/__tests__/AES.spec.js → __tests__/crypto/AES.spec.js
@@ -1,5 +1,5 @@
-const AES = require('../gramjs/crypto/AES')
-const AESModeCTR = require('../gramjs/crypto/AESCTR')
+const AES = require('../../gramjs/crypto/AES')
+const AESModeCTR = require('../../gramjs/crypto/AESCTR')
 describe('IGE encrypt function', () => {
     test('it should return 4a657a834edc2956ec95b2a42ec8c1f2d1f0a6028ac26fd830ed23855574b4e69dd1a2be2ba18a53a49b879b2' +
         '45e1065e14b6e8ac5ba9b24befaff3209b77b5f', () => {

diff --git a/__tests__/calcKey.spec.js → __tests__/crypto/calcKey.spec.js b/__tests__/calcKey.spec.js → __tests__/crypto/calcKey.spec.js
@@ -1,4 +1,4 @@
-const MTProtoState = require('../gramjs/network/MTProtoState')
+const MTProtoState = require('../../gramjs/network/MTProtoState')
 
 describe('calcKey function', () => {
     test('it should return 0x93355e3f1f50529b6fb93eaf97f29b69c16345f53621e9d45cd9a11ddfbebac9 and' +

diff --git a/__tests__/factorizator.spec.js → __tests__/crypto/factorizator.spec.js b/__tests__/factorizator.spec.js → __tests__/crypto/factorizator.spec.js
@@ -1,4 +1,4 @@
-const Factorizator = require('../gramjs/crypto/Factorizator')
+const Factorizator = require('../../gramjs/crypto/Factorizator')
 
 describe('calcKey function', () => {
     test('it should return 0x20a13b25e1726bfc', () => {

diff --git a/__tests__/readBuffer.spec.js → __tests__/crypto/readBuffer.spec.js b/__tests__/readBuffer.spec.js → __tests__/crypto/readBuffer.spec.js
@@ -1,4 +1,4 @@
-const Helpers = require('../gramjs/Helpers')
+const Helpers = require('../../gramjs/Helpers')
 
 describe('readBufferFromBigInt 8 bytes function', () => {
     test('it should return 0x20a13b25e1726bfc', () => {

diff --git a/__tests__/extensions/HTML.spec.js b/__tests__/extensions/HTML.spec.js
@@ -0,0 +1,107 @@
+const { HTMLParser } = require('../../gramjs/extensions/HTML')
+const types = require('../../gramjs/tl/types')
+
+describe('HTMLParser', () => {
+    test('it should construct a new HTMLParser', () => {
+        const parser = new HTMLParser('Hello world')
+        expect(parser.text).toEqual('')
+        expect(parser.entities).toEqual([])
+    })
+
+    describe('.parse', () => {
+        test('it should parse bold entities', () => {
+            const parser = new HTMLParser('Hello <strong>world</strong>')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityBold)
+        })
+
+        test('it should parse italic entities', () => {
+            const parser = new HTMLParser('Hello <em>world</em>')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityItalic)
+        })
+
+        test('it should parse code entities', () => {
+            const parser = new HTMLParser('Hello <code>world</code>')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityCode)
+        })
+
+        test('it should parse pre entities', () => {
+            const parser = new HTMLParser('Hello <pre>world</pre>')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityPre)
+        })
+
+        test('it should parse strike entities', () => {
+            const parser = new HTMLParser('Hello <del>world</del>')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityStrike)
+        })
+
+        test('it should parse link entities', () => {
+            const parser = new HTMLParser('Hello <a href="https://hello.world">world</a>')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityTextUrl)
+            expect(entities[0].url).toEqual('https://hello.world')
+        })
+
+        test('it should parse nested entities', () => {
+            const parser = new HTMLParser('Hello <strong><em>world</em></strong>')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(2)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityItalic)
+            expect(entities[1]).toBeInstanceOf(types.MessageEntityBold)
+        })
+
+        test('it should parse multiple entities', () => {
+            const parser = new HTMLParser('<em>Hello</em> <strong>world</strong>')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(2)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityItalic)
+            expect(entities[1]).toBeInstanceOf(types.MessageEntityBold)
+        })
+    })
+
+    describe('.unparse', () => {
+        test('it should create a markdown string from raw text and entities', () => {
+            const unparsed = '<strong>hello</strong> <em>hello</em> <del>hello</del> <code>hello</code> <pre>hello</pre> <a href="https://hello.world">hello</a>'
+            const strippedText = 'hello hello hello hello hello hello'
+            const rawEntities = [
+                new types.MessageEntityBold({ offset: 0, length: 5 }),
+                new types.MessageEntityItalic({ offset: 6, length: 5 }),
+                new types.MessageEntityStrike({ offset: 12, length: 5 }),
+                new types.MessageEntityCode({ offset: 18, length: 5 }),
+                new types.MessageEntityPre({ offset: 24, length: 5 }),
+                new types.MessageEntityTextUrl({ offset: 30, length: 5, url: 'https://hello.world' }),
+            ]
+            const text = HTMLParser.unparse(strippedText, rawEntities)
+            expect(text).toEqual(unparsed)
+        })
+
+        test('it should unparse nested entities', () => {
+            const unparsed = '<strong><em>Hello world</em></strong>'
+            const strippedText = 'Hello world'
+            const rawEntities = [
+                new types.MessageEntityBold({ offset: 0, length: 11 }),
+                new types.MessageEntityItalic({ offset: 0, length: 11 }),
+            ]
+            const text = HTMLParser.unparse(strippedText, rawEntities)
+            expect(text).toEqual(unparsed)
+        })
+    })
+})
diff --git a/__tests__/extensions/Markdown.spec.js b/__tests__/extensions/Markdown.spec.js
@@ -0,0 +1,95 @@
+const { MarkdownParser } = require('../../gramjs/extensions/Markdown')
+const types = require('../../gramjs/tl/types')
+
+describe('MarkdownParser', () => {
+    test('it should construct a new MarkdownParser', () => {
+        const parser = new MarkdownParser('Hello world')
+        expect(parser.text).toEqual('')
+        expect(parser.entities).toEqual([])
+    })
+
+    describe('.parse', () => {
+        test('it should parse bold entities', () => {
+            const parser = new MarkdownParser('Hello **world**')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityBold)
+        })
+
+        test('it should parse italic entities', () => {
+            const parser = new MarkdownParser('Hello __world__')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityItalic)
+        })
+
+        test('it should parse code entities', () => {
+            const parser = new MarkdownParser('Hello `world`')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityCode)
+        })
+
+        test('it should parse pre entities', () => {
+            const parser = new MarkdownParser('Hello ```world```')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityPre)
+        })
+
+        test('it should parse strike entities', () => {
+            const parser = new MarkdownParser('Hello ~~world~~')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityStrike)
+        })
+
+        test('it should parse link entities', () => {
+            const parser = new MarkdownParser('Hello [world](https://hello.world)')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityTextUrl)
+            expect(entities[0].url).toEqual('https://hello.world')
+        })
+
+        test('it should not parse nested entities', () => {
+            const parser = new MarkdownParser('Hello **__world__**')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello __world__')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityBold)
+        })
+
+        test('it should parse multiple entities', () => {
+            const parser = new MarkdownParser('__Hello__ **world**')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(2)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityItalic)
+            expect(entities[1]).toBeInstanceOf(types.MessageEntityBold)
+        })
+    })
+
+    describe('.unparse', () => {
+        test('it should create a markdown string from raw text and entities', () => {
+            const unparsed = '**hello** __hello__ ~~hello~~ `hello` ```hello``` [hello](https://hello.world)'
+            const strippedText = 'hello hello hello hello hello hello'
+            const rawEntities = [
+                new types.MessageEntityBold({ offset: 0, length: 5 }),
+                new types.MessageEntityItalic({ offset: 6, length: 5 }),
+                new types.MessageEntityStrike({ offset: 12, length: 5 }),
+                new types.MessageEntityCode({ offset: 18, length: 5 }),
+                new types.MessageEntityPre({ offset: 24, length: 5 }),
+                new types.MessageEntityTextUrl({ offset: 30, length: 5, url: 'https://hello.world' }),
+            ]
+            const text = MarkdownParser.unparse(strippedText, rawEntities)
+            expect(text).toEqual(unparsed)
+        })
+    })
+})
diff --git a/__tests__/extensions/Scanner.spec.js b/__tests__/extensions/Scanner.spec.js
@@ -0,0 +1,100 @@
+const Scanner = require('../../gramjs/extensions/Scanner')
+
+const helloScanner = new Scanner('Hello world')
+
+describe('Scanner', () => {
+    beforeEach(() => helloScanner.reset())
+
+    test('it should construct a new Scanner', () => {
+        expect(helloScanner.str).toEqual('Hello world')
+        expect(helloScanner.pos).toEqual(0)
+        expect(helloScanner.lastMatch).toBeNull()
+    })
+
+    describe('.chr', () => {
+        test('it should return the character at the current pos', () => {
+            expect(helloScanner.chr).toEqual('H')
+        })
+    })
+
+    describe('.peek', () => {
+        test('it should return the character at the current pos', () => {
+            expect(helloScanner.peek()).toEqual('H')
+        })
+
+        test('it should return the next n characters', () => {
+            expect(helloScanner.peek(3)).toEqual('Hel')
+            expect(helloScanner.peek(5)).toEqual('Hello')
+        })
+    })
+
+    describe('.consume', () => {
+        test('it should consume the current character', () => {
+            const char = helloScanner.consume()
+            expect(char).toEqual('H')
+            expect(helloScanner.pos).toEqual(1)
+        })
+
+        test('it should consume the next n characters', () => {
+            const chars = helloScanner.consume(5)
+            expect(chars).toEqual('Hello')
+            expect(helloScanner.pos).toEqual(5)
+        })
+    })
+
+    describe('.reverse', () => {
+        test('it should set pos back n characters', () => {
+            helloScanner.consume(5)
+            helloScanner.reverse(5)
+            expect(helloScanner.pos).toEqual(0)
+        })
+
+        test('it should not go back further than 0', () => {
+            helloScanner.reverse(10)
+            expect(helloScanner.pos).toEqual(0)
+        })
+    })
+
+    describe('.scanUntil', () => {
+        test('it should scan the string for a regular expression starting at the current pos', () => {
+            helloScanner.scanUntil(/w/)
+            expect(helloScanner.pos).toEqual(6)
+        })
+
+        test('it should do nothing if the pattern is not found', () => {
+            helloScanner.scanUntil(/G/)
+            expect(helloScanner.pos).toEqual(0)
+        })
+    })
+
+    describe('.rest', () => {
+        test('it should return the unconsumed input', () => {
+            helloScanner.consume(6)
+            expect(helloScanner.rest).toEqual('world')
+        })
+    })
+
+    describe('.reset', () => {
+        test('it should reset the pos to 0', () => {
+            helloScanner.consume(5)
+            helloScanner.reset()
+            expect(helloScanner.pos).toEqual(0)
+        })
+    })
+
+    describe('.eof', () => {
+        test('it should return true if the scanner has reached the end of the input', () => {
+            expect(helloScanner.eof()).toBe(false)
+            helloScanner.consume(11)
+            expect(helloScanner.eof()).toBe(true)
+        })
+    })
+
+    describe('.bof', () => {
+        test('it should return true if pos is 0', () => {
+            expect(helloScanner.bof()).toBe(true)
+            helloScanner.consume(11)
+            expect(helloScanner.bof()).toBe(false)
+        })
+    })
+})
diff --git a/gramjs/Utils.js b/gramjs/Utils.js
@@ -1,7 +1,7 @@
 const path = require('path')
 const mime = require('mime-types')
 const struct = require('python-struct')
-const { MarkdownParser, HTMLParser } = require('./extensions')
+const { markdown, html } = require('./extensions')
 const { types } = require('./tl')
 
 const USERNAME_RE = new RegExp('@|(?:https?:\\/\\/)?(?:www\\.)?' +
@@ -892,10 +892,10 @@ function sanitizeParseMode(mode) {
         switch (mode.toLowerCase()) {
         case 'md':
         case 'markdown':
-            return MarkdownParser
+            return markdown
         case 'htm':
         case 'html':
-            return HTMLParser
+            return html
         default:
             throw new Error(`Unknown parse mode ${mode}`)
         }