Skip to content

Commit

Permalink
Make determining whether a code point represents a combining mark fas…
Browse files Browse the repository at this point in the history
…ter (#1719)
  • Loading branch information
p-e-w authored Jun 12, 2020
1 parent efb38b8 commit 5ce26cc
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ bench-compare:
for i in 1 2 3; do \
go test -bench=. ./internal/...; \
done > benchmark_results
benchstat benchmark_results_baseline benchmark_results
benchstat -alpha 0.15 benchmark_results_baseline benchmark_results

clean:
rm -f micro
18 changes: 14 additions & 4 deletions internal/util/unicode.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@ import (
// For rendering, micro will display the combining characters. It's not perfect
// but it's pretty good.

var minMark = rune(unicode.Mark.R16[0].Lo)

func isMark(r rune) bool {
// Fast path
if r < minMark {
return false
}
return unicode.In(r, unicode.Mark)
}

// DecodeCharacter returns the next character from an array of bytes
// A character is a rune along with any accompanying combining runes
func DecodeCharacter(b []byte) (rune, []rune, int) {
Expand All @@ -24,7 +34,7 @@ func DecodeCharacter(b []byte) (rune, []rune, int) {
c, s := utf8.DecodeRune(b)

var combc []rune
for unicode.In(c, unicode.Mark) {
for isMark(c) {
combc = append(combc, c)
size += s

Expand All @@ -43,7 +53,7 @@ func DecodeCharacterInString(str string) (rune, []rune, int) {
c, s := utf8.DecodeRuneInString(str)

var combc []rune
for unicode.In(c, unicode.Mark) {
for isMark(c) {
combc = append(combc, c)
size += s

Expand All @@ -61,7 +71,7 @@ func CharacterCount(b []byte) int {

for len(b) > 0 {
r, size := utf8.DecodeRune(b)
if !unicode.In(r, unicode.Mark) {
if !isMark(r) {
s++
}

Expand All @@ -77,7 +87,7 @@ func CharacterCountInString(str string) int {
s := 0

for _, r := range str {
if !unicode.In(r, unicode.Mark) {
if !isMark(r) {
s++
}
}
Expand Down
18 changes: 14 additions & 4 deletions pkg/highlight/unicode.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ import (
"unicode/utf8"
)

var minMark = rune(unicode.Mark.R16[0].Lo)

func isMark(r rune) bool {
// Fast path
if r < minMark {
return false
}
return unicode.In(r, unicode.Mark)
}

// DecodeCharacter returns the next character from an array of bytes
// A character is a rune along with any accompanying combining runes
func DecodeCharacter(b []byte) (rune, []rune, int) {
Expand All @@ -13,7 +23,7 @@ func DecodeCharacter(b []byte) (rune, []rune, int) {
c, s := utf8.DecodeRune(b)

var combc []rune
for unicode.In(c, unicode.Mark) {
for isMark(c) {
combc = append(combc, c)
size += s

Expand All @@ -32,7 +42,7 @@ func DecodeCharacterInString(str string) (rune, []rune, int) {
c, s := utf8.DecodeRuneInString(str)

var combc []rune
for unicode.In(c, unicode.Mark) {
for isMark(c) {
combc = append(combc, c)
size += s

Expand All @@ -50,7 +60,7 @@ func CharacterCount(b []byte) int {

for len(b) > 0 {
r, size := utf8.DecodeRune(b)
if !unicode.In(r, unicode.Mark) {
if !isMark(r) {
s++
}

Expand All @@ -66,7 +76,7 @@ func CharacterCountInString(str string) int {
s := 0

for _, r := range str {
if !unicode.In(r, unicode.Mark) {
if !isMark(r) {
s++
}
}
Expand Down

0 comments on commit 5ce26cc

Please sign in to comment.