Skip to content

Commit

Permalink
Return correct byte offset from IndexIn
Browse files Browse the repository at this point in the history
According to the [LSP Spec](https://microsoft.github.io/language-server-protocol/specification#textDocuments)
Character Offsets are based on UTF-16 string representations. The
current IndexIn assumes the Character is a byte offset leading to
incorrect state in the Language Server.
  • Loading branch information
Shezad Khan committed Aug 18, 2021
1 parent 98d1a22 commit 69f9b44
Showing 1 changed file with 49 additions and 1 deletion.
50 changes: 49 additions & 1 deletion protocol_3_16/base-structures.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package protocol
import (
"encoding/json"
"strings"
"unicode/utf8"
)

// https://microsoft.github.io/language-server-protocol/specification#uri
Expand Down Expand Up @@ -52,6 +53,15 @@ type Position struct {
}

func (self Position) IndexIn(content string) int {
// This code is modified from the gopls implementation found:
// https://cs.opensource.google/go/x/tools/+/refs/tags/v0.1.5:internal/span/utf16.go;l=70

// In accordance with the LSP Spec:
// https://microsoft.github.io/language-server-protocol/specification#textDocuments
// self.Character represents utf-16 code units, not bytes and so we need to
// convert utf-16 code units to a byte offset.

// Find the byte offset for the line
index := 0
for row := UInteger(0); row < self.Line; row++ {
content_ := content[index:]
Expand All @@ -61,7 +71,45 @@ func (self Position) IndexIn(content string) int {
return 0
}
}
return index + int(self.Character)

// The index represents the byte offset from the beginning of the line
// count self.Character utf-16 code units from the index byte offset.

byteOffset := index
remains := content[index:]
chr := int(self.Character)

for count := 1; count <= chr; count++ {

if len(remains) <= 0 {
// char goes past content
// this a error
return 0
}

r, w := utf8.DecodeRuneInString(remains)
if r == '\n' {
// Per the LSP spec:
//
// > If the character value is greater than the line length it
// > defaults back to the line length.
break
}

remains = remains[w:]
if r >= 0x10000 {
// a two point rune
count++
// if we finished in a two point rune, do not advance past the first
if count > chr {
break
}
}
byteOffset += w

}

return byteOffset
}

func (self Position) EndOfLineIn(content string) Position {
Expand Down

0 comments on commit 69f9b44

Please sign in to comment.