Skip to content

Commit

Permalink
Merge pull request gram-js#15 from watzon/master
Browse files Browse the repository at this point in the history
Add HTMLParser and more tests
  • Loading branch information
painor authored Jan 5, 2020
2 parents e3d7a5e + d637087 commit bf96832
Show file tree
Hide file tree
Showing 15 changed files with 2,077 additions and 452 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
/gramjs/tl/AllTLObjects.js
/gramjs/errors/RPCErrorList.js
/dist/
/coverage/

# User session
*.session
Expand Down
4 changes: 2 additions & 2 deletions __tests__/AES.spec.js → __tests__/crypto/AES.spec.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
const AES = require('../gramjs/crypto/AES')
const AESModeCTR = require('../gramjs/crypto/AESCTR')
const AES = require('../../gramjs/crypto/AES')
const AESModeCTR = require('../../gramjs/crypto/AESCTR')
describe('IGE encrypt function', () => {
test('it should return 4a657a834edc2956ec95b2a42ec8c1f2d1f0a6028ac26fd830ed23855574b4e69dd1a2be2ba18a53a49b879b2' +
'45e1065e14b6e8ac5ba9b24befaff3209b77b5f', () => {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const MTProtoState = require('../gramjs/network/MTProtoState')
const MTProtoState = require('../../gramjs/network/MTProtoState')

describe('calcKey function', () => {
test('it should return 0x93355e3f1f50529b6fb93eaf97f29b69c16345f53621e9d45cd9a11ddfbebac9 and' +
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const Factorizator = require('../gramjs/crypto/Factorizator')
const Factorizator = require('../../gramjs/crypto/Factorizator')

describe('calcKey function', () => {
test('it should return 0x20a13b25e1726bfc', () => {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const Helpers = require('../gramjs/Helpers')
const Helpers = require('../../gramjs/Helpers')

describe('readBufferFromBigInt 8 bytes function', () => {
test('it should return 0x20a13b25e1726bfc', () => {
Expand Down
107 changes: 107 additions & 0 deletions __tests__/extensions/HTML.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
const { HTMLParser } = require('../../gramjs/extensions/HTML')
const types = require('../../gramjs/tl/types')

describe('HTMLParser', () => {
test('it should construct a new HTMLParser', () => {
const parser = new HTMLParser('Hello world')
expect(parser.text).toEqual('')
expect(parser.entities).toEqual([])
})

describe('.parse', () => {
test('it should parse bold entities', () => {
const parser = new HTMLParser('Hello <strong>world</strong>')
const [text, entities] = parser.parse()
expect(text).toEqual('Hello world')
expect(entities.length).toEqual(1)
expect(entities[0]).toBeInstanceOf(types.MessageEntityBold)
})

test('it should parse italic entities', () => {
const parser = new HTMLParser('Hello <em>world</em>')
const [text, entities] = parser.parse()
expect(text).toEqual('Hello world')
expect(entities.length).toEqual(1)
expect(entities[0]).toBeInstanceOf(types.MessageEntityItalic)
})

test('it should parse code entities', () => {
const parser = new HTMLParser('Hello <code>world</code>')
const [text, entities] = parser.parse()
expect(text).toEqual('Hello world')
expect(entities.length).toEqual(1)
expect(entities[0]).toBeInstanceOf(types.MessageEntityCode)
})

test('it should parse pre entities', () => {
const parser = new HTMLParser('Hello <pre>world</pre>')
const [text, entities] = parser.parse()
expect(text).toEqual('Hello world')
expect(entities.length).toEqual(1)
expect(entities[0]).toBeInstanceOf(types.MessageEntityPre)
})

test('it should parse strike entities', () => {
const parser = new HTMLParser('Hello <del>world</del>')
const [text, entities] = parser.parse()
expect(text).toEqual('Hello world')
expect(entities.length).toEqual(1)
expect(entities[0]).toBeInstanceOf(types.MessageEntityStrike)
})

test('it should parse link entities', () => {
const parser = new HTMLParser('Hello <a href="https://hello.world">world</a>')
const [text, entities] = parser.parse()
expect(text).toEqual('Hello world')
expect(entities.length).toEqual(1)
expect(entities[0]).toBeInstanceOf(types.MessageEntityTextUrl)
expect(entities[0].url).toEqual('https://hello.world')
})

test('it should parse nested entities', () => {
const parser = new HTMLParser('Hello <strong><em>world</em></strong>')
const [text, entities] = parser.parse()
expect(text).toEqual('Hello world')
expect(entities.length).toEqual(2)
expect(entities[0]).toBeInstanceOf(types.MessageEntityItalic)
expect(entities[1]).toBeInstanceOf(types.MessageEntityBold)
})

test('it should parse multiple entities', () => {
const parser = new HTMLParser('<em>Hello</em> <strong>world</strong>')
const [text, entities] = parser.parse()
expect(text).toEqual('Hello world')
expect(entities.length).toEqual(2)
expect(entities[0]).toBeInstanceOf(types.MessageEntityItalic)
expect(entities[1]).toBeInstanceOf(types.MessageEntityBold)
})
})

describe('.unparse', () => {
test('it should create a markdown string from raw text and entities', () => {
const unparsed = '<strong>hello</strong> <em>hello</em> <del>hello</del> <code>hello</code> <pre>hello</pre> <a href="https://hello.world">hello</a>'
const strippedText = 'hello hello hello hello hello hello'
const rawEntities = [
new types.MessageEntityBold({ offset: 0, length: 5 }),
new types.MessageEntityItalic({ offset: 6, length: 5 }),
new types.MessageEntityStrike({ offset: 12, length: 5 }),
new types.MessageEntityCode({ offset: 18, length: 5 }),
new types.MessageEntityPre({ offset: 24, length: 5 }),
new types.MessageEntityTextUrl({ offset: 30, length: 5, url: 'https://hello.world' }),
]
const text = HTMLParser.unparse(strippedText, rawEntities)
expect(text).toEqual(unparsed)
})

test('it should unparse nested entities', () => {
const unparsed = '<strong><em>Hello world</em></strong>'
const strippedText = 'Hello world'
const rawEntities = [
new types.MessageEntityBold({ offset: 0, length: 11 }),
new types.MessageEntityItalic({ offset: 0, length: 11 }),
]
const text = HTMLParser.unparse(strippedText, rawEntities)
expect(text).toEqual(unparsed)
})
})
})
95 changes: 95 additions & 0 deletions __tests__/extensions/Markdown.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
const { MarkdownParser } = require('../../gramjs/extensions/Markdown')
const types = require('../../gramjs/tl/types')

describe('MarkdownParser', () => {
test('it should construct a new MarkdownParser', () => {
const parser = new MarkdownParser('Hello world')
expect(parser.text).toEqual('')
expect(parser.entities).toEqual([])
})

describe('.parse', () => {
test('it should parse bold entities', () => {
const parser = new MarkdownParser('Hello **world**')
const [text, entities] = parser.parse()
expect(text).toEqual('Hello world')
expect(entities.length).toEqual(1)
expect(entities[0]).toBeInstanceOf(types.MessageEntityBold)
})

test('it should parse italic entities', () => {
const parser = new MarkdownParser('Hello __world__')
const [text, entities] = parser.parse()
expect(text).toEqual('Hello world')
expect(entities.length).toEqual(1)
expect(entities[0]).toBeInstanceOf(types.MessageEntityItalic)
})

test('it should parse code entities', () => {
const parser = new MarkdownParser('Hello `world`')
const [text, entities] = parser.parse()
expect(text).toEqual('Hello world')
expect(entities.length).toEqual(1)
expect(entities[0]).toBeInstanceOf(types.MessageEntityCode)
})

test('it should parse pre entities', () => {
const parser = new MarkdownParser('Hello ```world```')
const [text, entities] = parser.parse()
expect(text).toEqual('Hello world')
expect(entities.length).toEqual(1)
expect(entities[0]).toBeInstanceOf(types.MessageEntityPre)
})

test('it should parse strike entities', () => {
const parser = new MarkdownParser('Hello ~~world~~')
const [text, entities] = parser.parse()
expect(text).toEqual('Hello world')
expect(entities.length).toEqual(1)
expect(entities[0]).toBeInstanceOf(types.MessageEntityStrike)
})

test('it should parse link entities', () => {
const parser = new MarkdownParser('Hello [world](https://hello.world)')
const [text, entities] = parser.parse()
expect(text).toEqual('Hello world')
expect(entities.length).toEqual(1)
expect(entities[0]).toBeInstanceOf(types.MessageEntityTextUrl)
expect(entities[0].url).toEqual('https://hello.world')
})

test('it should not parse nested entities', () => {
const parser = new MarkdownParser('Hello **__world__**')
const [text, entities] = parser.parse()
expect(text).toEqual('Hello __world__')
expect(entities.length).toEqual(1)
expect(entities[0]).toBeInstanceOf(types.MessageEntityBold)
})

test('it should parse multiple entities', () => {
const parser = new MarkdownParser('__Hello__ **world**')
const [text, entities] = parser.parse()
expect(text).toEqual('Hello world')
expect(entities.length).toEqual(2)
expect(entities[0]).toBeInstanceOf(types.MessageEntityItalic)
expect(entities[1]).toBeInstanceOf(types.MessageEntityBold)
})
})

describe('.unparse', () => {
test('it should create a markdown string from raw text and entities', () => {
const unparsed = '**hello** __hello__ ~~hello~~ `hello` ```hello``` [hello](https://hello.world)'
const strippedText = 'hello hello hello hello hello hello'
const rawEntities = [
new types.MessageEntityBold({ offset: 0, length: 5 }),
new types.MessageEntityItalic({ offset: 6, length: 5 }),
new types.MessageEntityStrike({ offset: 12, length: 5 }),
new types.MessageEntityCode({ offset: 18, length: 5 }),
new types.MessageEntityPre({ offset: 24, length: 5 }),
new types.MessageEntityTextUrl({ offset: 30, length: 5, url: 'https://hello.world' }),
]
const text = MarkdownParser.unparse(strippedText, rawEntities)
expect(text).toEqual(unparsed)
})
})
})
100 changes: 100 additions & 0 deletions __tests__/extensions/Scanner.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
const Scanner = require('../../gramjs/extensions/Scanner')

const helloScanner = new Scanner('Hello world')

describe('Scanner', () => {
beforeEach(() => helloScanner.reset())

test('it should construct a new Scanner', () => {
expect(helloScanner.str).toEqual('Hello world')
expect(helloScanner.pos).toEqual(0)
expect(helloScanner.lastMatch).toBeNull()
})

describe('.chr', () => {
test('it should return the character at the current pos', () => {
expect(helloScanner.chr).toEqual('H')
})
})

describe('.peek', () => {
test('it should return the character at the current pos', () => {
expect(helloScanner.peek()).toEqual('H')
})

test('it should return the next n characters', () => {
expect(helloScanner.peek(3)).toEqual('Hel')
expect(helloScanner.peek(5)).toEqual('Hello')
})
})

describe('.consume', () => {
test('it should consume the current character', () => {
const char = helloScanner.consume()
expect(char).toEqual('H')
expect(helloScanner.pos).toEqual(1)
})

test('it should consume the next n characters', () => {
const chars = helloScanner.consume(5)
expect(chars).toEqual('Hello')
expect(helloScanner.pos).toEqual(5)
})
})

describe('.reverse', () => {
test('it should set pos back n characters', () => {
helloScanner.consume(5)
helloScanner.reverse(5)
expect(helloScanner.pos).toEqual(0)
})

test('it should not go back further than 0', () => {
helloScanner.reverse(10)
expect(helloScanner.pos).toEqual(0)
})
})

describe('.scanUntil', () => {
test('it should scan the string for a regular expression starting at the current pos', () => {
helloScanner.scanUntil(/w/)
expect(helloScanner.pos).toEqual(6)
})

test('it should do nothing if the pattern is not found', () => {
helloScanner.scanUntil(/G/)
expect(helloScanner.pos).toEqual(0)
})
})

describe('.rest', () => {
test('it should return the unconsumed input', () => {
helloScanner.consume(6)
expect(helloScanner.rest).toEqual('world')
})
})

describe('.reset', () => {
test('it should reset the pos to 0', () => {
helloScanner.consume(5)
helloScanner.reset()
expect(helloScanner.pos).toEqual(0)
})
})

describe('.eof', () => {
test('it should return true if the scanner has reached the end of the input', () => {
expect(helloScanner.eof()).toBe(false)
helloScanner.consume(11)
expect(helloScanner.eof()).toBe(true)
})
})

describe('.bof', () => {
test('it should return true if pos is 0', () => {
expect(helloScanner.bof()).toBe(true)
helloScanner.consume(11)
expect(helloScanner.bof()).toBe(false)
})
})
})
6 changes: 3 additions & 3 deletions gramjs/Utils.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
const path = require('path')
const mime = require('mime-types')
const struct = require('python-struct')
const { MarkdownParser, HTMLParser } = require('./extensions')
const { markdown, html } = require('./extensions')
const { types } = require('./tl')

const USERNAME_RE = new RegExp('@|(?:https?:\\/\\/)?(?:www\\.)?' +
Expand Down Expand Up @@ -892,10 +892,10 @@ function sanitizeParseMode(mode) {
switch (mode.toLowerCase()) {
case 'md':
case 'markdown':
return MarkdownParser
return markdown
case 'htm':
case 'html':
return HTMLParser
return html
default:
throw new Error(`Unknown parse mode ${mode}`)
}
Expand Down
Loading

0 comments on commit bf96832

Please sign in to comment.