Skip to content

Commit

Permalink
textencoding: generate table for WinAnsi encoding from CP1252
Browse files Browse the repository at this point in the history
  • Loading branch information
Denys Smirnov committed Jan 1, 2019
1 parent ac76966 commit 622ae56
Show file tree
Hide file tree
Showing 6 changed files with 292 additions and 228 deletions.
19 changes: 18 additions & 1 deletion Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ module github.com/unidoc/unidoc
require (
github.com/boombuler/barcode v1.0.0
golang.org/x/image v0.0.0-20181116024801-cd38e8056d9b
golang.org/x/text v0.3.0
)
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@ github.com/boombuler/barcode v1.0.0 h1:s1TvRnXwL2xJRaccrdcBQMZxq6X7DvsMogtmJeHDd
github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
golang.org/x/image v0.0.0-20181116024801-cd38e8056d9b h1:VHyIDlv3XkfCa5/a81uzaoDkHH4rr81Z62g+xlnO8uM=
golang.org/x/image v0.0.0-20181116024801-cd38e8056d9b/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
227 changes: 0 additions & 227 deletions pdf/internal/textencoding/simple.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,233 +283,6 @@ func ToFontDifferences(differences map[CharCode]GlyphName) *core.PdfObjectArray
return core.MakeArray(diffList...)
}

var winAnsiEncoding = map[CharCode]rune{ // 224 entries
0x20: 0x0020, // "space"
0x21: 0x0021, // ! "exclam"
0x22: 0x0022, // " "quotedbl"
0x23: 0x0023, // # "numbersign"
0x24: 0x0024, // $ "dollar"
0x25: 0x0025, // % "percent"
0x26: 0x0026, // & "ampersand"
0x27: 0x0027, // \' "quotesingle"
0x28: 0x0028, // ( "parenleft"
0x29: 0x0029, // ) "parenright"
0x2a: 0x002a, // * "asterisk"
0x2b: 0x002b, // + "plus"
0x2c: 0x002c, // , "comma"
0x2d: 0x002d, // - "hyphen"
0x2e: 0x002e, // . "period"
0x2f: 0x002f, // / "slash"
0x30: 0x0030, // 0 "zero"
0x31: 0x0031, // 1 "one"
0x32: 0x0032, // 2 "two"
0x33: 0x0033, // 3 "three"
0x34: 0x0034, // 4 "four"
0x35: 0x0035, // 5 "five"
0x36: 0x0036, // 6 "six"
0x37: 0x0037, // 7 "seven"
0x38: 0x0038, // 8 "eight"
0x39: 0x0039, // 9 "nine"
0x3a: 0x003a, // : "colon"
0x3b: 0x003b, // ; "semicolon"
0x3c: 0x003c, // < "less"
0x3d: 0x003d, // = "equal"
0x3e: 0x003e, // > "greater"
0x3f: 0x003f, // ? "question"
0x40: 0x0040, // @ "at"
0x41: 0x0041, // A "A"
0x42: 0x0042, // B "B"
0x43: 0x0043, // C "C"
0x44: 0x0044, // D "D"
0x45: 0x0045, // E "E"
0x46: 0x0046, // F "F"
0x47: 0x0047, // G "G"
0x48: 0x0048, // H "H"
0x49: 0x0049, // I "I"
0x4a: 0x004a, // J "J"
0x4b: 0x004b, // K "K"
0x4c: 0x004c, // L "L"
0x4d: 0x004d, // M "M"
0x4e: 0x004e, // N "N"
0x4f: 0x004f, // O "O"
0x50: 0x0050, // P "P"
0x51: 0x0051, // Q "Q"
0x52: 0x0052, // R "R"
0x53: 0x0053, // S "S"
0x54: 0x0054, // T "T"
0x55: 0x0055, // U "U"
0x56: 0x0056, // V "V"
0x57: 0x0057, // W "W"
0x58: 0x0058, // X "X"
0x59: 0x0059, // Y "Y"
0x5a: 0x005a, // Z "Z"
0x5b: 0x005b, // [ "bracketleft"
0x5c: 0x005c, // \\ "backslash"
0x5d: 0x005d, // ] "bracketright"
0x5e: 0x005e, // ^ "asciicircum"
0x5f: 0x005f, // _ "underscore"
0x60: 0x0060, // ` "grave"
0x61: 0x0061, // a "a"
0x62: 0x0062, // b "b"
0x63: 0x0063, // c "c"
0x64: 0x0064, // d "d"
0x65: 0x0065, // e "e"
0x66: 0x0066, // f "f"
0x67: 0x0067, // g "g"
0x68: 0x0068, // h "h"
0x69: 0x0069, // i "i"
0x6a: 0x006a, // j "j"
0x6b: 0x006b, // k "k"
0x6c: 0x006c, // l "l"
0x6d: 0x006d, // m "m"
0x6e: 0x006e, // n "n"
0x6f: 0x006f, // o "o"
0x70: 0x0070, // p "p"
0x71: 0x0071, // q "q"
0x72: 0x0072, // r "r"
0x73: 0x0073, // s "s"
0x74: 0x0074, // t "t"
0x75: 0x0075, // u "u"
0x76: 0x0076, // v "v"
0x77: 0x0077, // w "w"
0x78: 0x0078, // x "x"
0x79: 0x0079, // y "y"
0x7a: 0x007a, // z "z"
0x7b: 0x007b, // { "braceleft"
0x7c: 0x007c, // | "bar"
0x7d: 0x007d, // } "braceright"
0x7e: 0x007e, // ~ "asciitilde"
0x7f: 0x2022, // • "bullet"
0x80: 0x20ac, // € "Euro"
0x81: 0x2022, // • "bullet"
0x82: 0x201a, // ‚ "quotesinglbase"
0x83: 0x0192, // ƒ "florin"
0x84: 0x201e, // „ "quotedblbase"
0x85: 0x2026, // … "ellipsis"
0x86: 0x2020, // † "dagger"
0x87: 0x2021, // ‡ "daggerdbl"
0x88: 0x02c6, // ˆ "circumflex"
0x89: 0x2030, // ‰ "perthousand"
0x8a: 0x0160, // Š "Scaron"
0x8b: 0x2039, // ‹ "guilsinglleft"
0x8c: 0x0152, // Œ "OE"
0x8d: 0x2022, // • "bullet"
0x8e: 0x017d, // Ž "Zcaron"
0x8f: 0x2022, // • "bullet"
0x90: 0x2022, // • "bullet"
0x91: 0x2018, // ‘ "quoteleft"
0x92: 0x2019, // ’ "quoteright"
0x93: 0x201c, // “ "quotedblleft"
0x94: 0x201d, // ” "quotedblright"
0x95: 0x2022, // • "bullet"
0x96: 0x2013, // – "endash"
0x97: 0x2014, // — "emdash"
0x98: 0x02dc, // ˜ "tilde"
0x99: 0x2122, // ™ "trademark"
0x9a: 0x0161, // š "scaron"
0x9b: 0x203a, // › "guilsinglright"
0x9c: 0x0153, // œ "oe"
0x9d: 0x2022, // • "bullet"
0x9e: 0x017e, // ž "zcaron"
0x9f: 0x0178, // Ÿ "Ydieresis"
0xa0: 0x0020, // "space"
0xa1: 0x00a1, // ¡ "exclamdown"
0xa2: 0x00a2, // ¢ "cent"
0xa3: 0x00a3, // £ "sterling"
0xa4: 0x00a4, // ¤ "currency"
0xa5: 0x00a5, // ¥ "yen"
0xa6: 0x00a6, // ¦ "brokenbar"
0xa7: 0x00a7, // § "section"
0xa8: 0x00a8, // ¨ "dieresis"
0xa9: 0x00a9, // © "copyright"
0xaa: 0x00aa, // ª "ordfeminine"
0xab: 0x00ab, // « "guillemotleft"
0xac: 0x00ac, // ¬ "logicalnot"
0xad: 0x002d, // - "hyphen"
0xae: 0x00ae, // ® "registered"
0xaf: 0x00af, // ¯ "macron"
0xb0: 0x00b0, // ° "degree"
0xb1: 0x00b1, // ± "plusminus"
0xb2: 0x00b2, // ² "twosuperior"
0xb3: 0x00b3, // ³ "threesuperior"
0xb4: 0x00b4, // ´ "acute"
0xb5: 0x00b5, // µ "mu"
0xb6: 0x00b6, // ¶ "paragraph"
0xb7: 0x00b7, // · "periodcentered"
0xb8: 0x00b8, // ¸ "cedilla"
0xb9: 0x00b9, // ¹ "onesuperior"
0xba: 0x00ba, // º "ordmasculine"
0xbb: 0x00bb, // » "guillemotright"
0xbc: 0x00bc, // ¼ "onequarter"
0xbd: 0x00bd, // ½ "onehalf"
0xbe: 0x00be, // ¾ "threequarters"
0xbf: 0x00bf, // ¿ "questiondown"
0xc0: 0x00c0, // À "Agrave"
0xc1: 0x00c1, // Á "Aacute"
0xc2: 0x00c2, // Â "Acircumflex"
0xc3: 0x00c3, // Ã "Atilde"
0xc4: 0x00c4, // Ä "Adieresis"
0xc5: 0x00c5, // Å "Aring"
0xc6: 0x00c6, // Æ "AE"
0xc7: 0x00c7, // Ç "Ccedilla"
0xc8: 0x00c8, // È "Egrave"
0xc9: 0x00c9, // É "Eacute"
0xca: 0x00ca, // Ê "Ecircumflex"
0xcb: 0x00cb, // Ë "Edieresis"
0xcc: 0x00cc, // Ì "Igrave"
0xcd: 0x00cd, // Í "Iacute"
0xce: 0x00ce, // Î "Icircumflex"
0xcf: 0x00cf, // Ï "Idieresis"
0xd0: 0x00d0, // Ð "Eth"
0xd1: 0x00d1, // Ñ "Ntilde"
0xd2: 0x00d2, // Ò "Ograve"
0xd3: 0x00d3, // Ó "Oacute"
0xd4: 0x00d4, // Ô "Ocircumflex"
0xd5: 0x00d5, // Õ "Otilde"
0xd6: 0x00d6, // Ö "Odieresis"
0xd7: 0x00d7, // × "multiply"
0xd8: 0x00d8, // Ø "Oslash"
0xd9: 0x00d9, // Ù "Ugrave"
0xda: 0x00da, // Ú "Uacute"
0xdb: 0x00db, // Û "Ucircumflex"
0xdc: 0x00dc, // Ü "Udieresis"
0xdd: 0x00dd, // Ý "Yacute"
0xde: 0x00de, // Þ "Thorn"
0xdf: 0x00df, // ß "germandbls"
0xe0: 0x00e0, // à "agrave"
0xe1: 0x00e1, // á "aacute"
0xe2: 0x00e2, // â "acircumflex"
0xe3: 0x00e3, // ã "atilde"
0xe4: 0x00e4, // ä "adieresis"
0xe5: 0x00e5, // å "aring"
0xe6: 0x00e6, // æ "ae"
0xe7: 0x00e7, // ç "ccedilla"
0xe8: 0x00e8, // è "egrave"
0xe9: 0x00e9, // é "eacute"
0xea: 0x00ea, // ê "ecircumflex"
0xeb: 0x00eb, // ë "edieresis"
0xec: 0x00ec, // ì "igrave"
0xed: 0x00ed, // í "iacute"
0xee: 0x00ee, // î "icircumflex"
0xef: 0x00ef, // ï "idieresis"
0xf0: 0x00f0, // ð "eth"
0xf1: 0x00f1, // ñ "ntilde"
0xf2: 0x00f2, // ò "ograve"
0xf3: 0x00f3, // ó "oacute"
0xf4: 0x00f4, // ô "ocircumflex"
0xf5: 0x00f5, // õ "otilde"
0xf6: 0x00f6, // ö "odieresis"
0xf7: 0x00f7, // ÷ "divide"
0xf8: 0x00f8, // ø "oslash"
0xf9: 0x00f9, // ù "ugrave"
0xfa: 0x00fa, // ú "uacute"
0xfb: 0x00fb, // û "ucircumflex"
0xfc: 0x00fc, // ü "udieresis"
0xfd: 0x00fd, // ý "yacute"
0xfe: 0x00fe, // þ "thorn"
0xff: 0x00ff, // ÿ "ydieresis"
}

// simpleEncodings is a map of the standard 8 bit character encodings.
var simpleEncodings = map[string]map[CharCode]rune{
"MacExpertEncoding": { // 165 entries
Expand Down
35 changes: 35 additions & 0 deletions pdf/internal/textencoding/winansi.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,41 @@

package textencoding

import "golang.org/x/text/encoding/charmap"

var winAnsiEncoding = make(map[CharCode]rune, 256)

func init() {
// WinAnsiEncoding is also known as CP1252
enc := charmap.Windows1252

// in WinAnsiEncoding all unused and non-visual codes map to the '•' character
const bullet = '•'
replace := map[byte]rune{
127: bullet, // DEL

// unused
129: bullet,
141: bullet,
143: bullet,
144: bullet,
157: bullet,

// typographically similar
160: ' ', // non-breaking space -> space
173: '-', // soft hyphen -> hyphen
}

for i := int(' '); i < 256; i++ {
b := byte(i)
r := enc.DecodeByte(b)
if rp, ok := replace[b]; ok {
r = rp
}
winAnsiEncoding[CharCode(b)] = r
}
}

// NewWinAnsiTextEncoder returns a SimpleEncoder that implements WinAnsiEncoding.
func NewWinAnsiTextEncoder() *SimpleEncoder {
const baseName = "WinAnsiEncoding"
Expand Down
Loading

0 comments on commit 622ae56

Please sign in to comment.