diff --git a/commands/hugo.go b/commands/hugo.go
index b0e4964c4de..718a3e6f66e 100644
--- a/commands/hugo.go
+++ b/commands/hugo.go
@@ -168,6 +168,7 @@ func LoadDefaultSettings() {
viper.SetDefault("RSSUri", "index.xml")
viper.SetDefault("SectionPagesMenu", "")
viper.SetDefault("DisablePathToLower", false)
+ viper.SetDefault("HasCJKLanguage", false)
}
// InitializeConfig initializes a config file with sensible default configuration flags.
diff --git a/helpers/content.go b/helpers/content.go
index 8c5c9cc7b2e..847d4dcbc28 100644
--- a/helpers/content.go
+++ b/helpers/content.go
@@ -19,9 +19,9 @@ package helpers
import (
"bytes"
- "unicode/utf8"
"html/template"
"os/exec"
+ "unicode/utf8"
"github.com/miekg/mmark"
"github.com/russross/blackfriday"
@@ -178,7 +178,6 @@ func GetHTMLRenderer(defaultFlags int, ctx *RenderingContext) blackfriday.Render
}
}
-
func getMarkdownExtensions(ctx *RenderingContext) int {
flags := 0 | blackfriday.EXTENSION_NO_INTRA_EMPHASIS |
blackfriday.EXTENSION_TABLES | blackfriday.EXTENSION_FENCED_CODE |
@@ -385,61 +384,51 @@ func TruncateWords(s string, max int) string {
return strings.Join(words[:max], " ")
}
-// TruncateWordsToWholeSentence takes content and an int
-// and returns entire sentences from content, delimited by the int
-// and whether it's truncated or not.
-func TruncateWordsToWholeSentence(words []string, max int) (string, bool) {
+func TruncateWordsByRune(words []string, max int) (string, bool) {
count := 0
- index, word := 0, ""
- truncated := false
-
- for index, word = range words {
+ for index, word := range words {
+ if count >= max {
+ return strings.Join(words[:index], " "), true
+ }
runeCount := utf8.RuneCountInString(word)
if len(word) == runeCount {
- count++;
+ count++
+ } else if count+runeCount < max {
+ count += runeCount
} else {
- if count + runeCount <= max {
- count += runeCount
- } else {
- offset := 0
- for count < max {
- _, width := utf8.DecodeRuneInString(word[offset:])
- offset += width
+ for ri, _ := range word {
+ if count >= max {
+ truncatedWords := append(words[:index], word[:ri])
+ return strings.Join(truncatedWords, " "), true
+ } else {
count++
}
- words[index] = word[:offset]
- truncated = true
- }
- }
-
- if count >= max {
- if index < len(words) - 1 {
- truncated = true
}
- break
}
}
-
- index += 1
-
- if index < len(words) {
- for counter, word := range words[index:] {
- if len(word) != utf8.RuneCountInString(word) {
- break
- }
- if strings.HasSuffix(word, ".") ||
- strings.HasSuffix(word, "?") ||
- strings.HasSuffix(word, ".\"") ||
- strings.HasSuffix(word, "!") {
- upper := index + counter + 1
- return strings.Join(words[:upper], " "), (upper < len(words))
- }
+
+ return strings.Join(words, " "), false
+}
+
+// TruncateWordsToWholeSentence takes content and an int
+// and returns entire sentences from content, delimited by the int
+// and whether it's truncated or not.
+func TruncateWordsToWholeSentence(words []string, max int) (string, bool) {
+ if max >= len(words) {
+ return strings.Join(words, " "), false
+ }
+
+ for counter, word := range words[max:] {
+ if strings.HasSuffix(word, ".") ||
+ strings.HasSuffix(word, "?") ||
+ strings.HasSuffix(word, ".\"") ||
+ strings.HasSuffix(word, "!") {
+ upper := max + counter + 1
+ return strings.Join(words[:upper], " "), (upper < len(words))
}
- } else if index > len(words) {
- return strings.Join(words, " "), truncated
}
-
- return strings.Join(words[:index], " "), truncated
+
+ return strings.Join(words[:max], " "), true
}
// GetAsciidocContent calls asciidoctor or asciidoc as an external helper
diff --git a/helpers/content_test.go b/helpers/content_test.go
index f614011c0e6..f0d76b6cea6 100644
--- a/helpers/content_test.go
+++ b/helpers/content_test.go
@@ -1,10 +1,11 @@
package helpers
import (
- "github.com/stretchr/testify/assert"
"html/template"
"strings"
"testing"
+
+ "github.com/stretchr/testify/assert"
)
const tstHTMLContent = "
content foobar. Follow upThis is some text.
And some more.
"
@@ -54,8 +55,6 @@ func TestTruncateWordsToWholeSentence(t *testing.T) {
{"a b c", "a b c", 12, false},
{"a b c", "a b c", 3, false},
{"a", "a", 1, false},
- {"Hello 中国", "Hello 中", 2, true},
- {"Hello 中国", "Hello 中国", 3, false},
{"This is a sentence.", "This is a sentence.", 5, false},
{"This is also a sentence!", "This is also a sentence!", 1, false},
{"To be. Or not to be. That's the question.", "To be.", 1, true},
@@ -72,3 +71,36 @@ func TestTruncateWordsToWholeSentence(t *testing.T) {
}
}
}
+
+func TestTruncateWordsByRune(t *testing.T) {
+ type test struct {
+ input, expected string
+ max int
+ truncated bool
+ }
+ data := []test{
+ {"", "", 1, false},
+ {"a b c", "a b c", 12, false},
+ {"a b c", "a b c", 3, false},
+ {"a", "a", 1, false},
+ {"Hello 中国", "", 0, true},
+ {"这是中文,全中文。", "这是中文,", 5, true},
+ {"Hello 中国", "Hello 中", 2, true},
+ {"Hello 中国", "Hello 中国", 3, false},
+ {"Hello中国 Good 好的", "Hello中国 Good 好", 9, true},
+ {"This is a sentence.", "This is", 2, true},
+ {"This is also a sentence!", "This", 1, true},
+ {"To be. Or not to be. That's the question.", "To be. Or not", 4, true},
+ {" \nThis is not a sentence\n ", "This is not", 3, true},
+ }
+ for i, d := range data {
+ output, truncated := TruncateWordsByRune(strings.Fields(d.input), d.max)
+ if d.expected != output {
+ t.Errorf("Test %d failed. Expected %q got %q", i, d.expected, output)
+ }
+
+ if d.truncated != truncated {
+ t.Errorf("Test %d failed. Expected truncated=%t got %t", i, d.truncated, truncated)
+ }
+ }
+}
diff --git a/hugolib/page.go b/hugolib/page.go
index c50e2da18b2..e08e764af95 100644
--- a/hugolib/page.go
+++ b/hugolib/page.go
@@ -28,6 +28,7 @@ import (
"net/url"
"path"
"path/filepath"
+ "regexp"
"strings"
"sync"
"time"
@@ -42,6 +43,10 @@ import (
"github.com/spf13/viper"
)
+var (
+ cjk = regexp.MustCompile(`\p{Han}|\p{Hangul}|\p{Hiragana}|\p{Katakana}`)
+)
+
type Page struct {
Params map[string]interface{}
Content template.HTML
@@ -67,7 +72,6 @@ type Page struct {
contentShortCodes map[string]string
plain string // TODO should be []byte
plainWords []string
- plainRuneCount int
plainInit sync.Once
plainSecondaryInit sync.Once
renderingConfig *helpers.Blackfriday
@@ -78,6 +82,7 @@ type Page struct {
Node
pageMenus PageMenus
pageMenusInit sync.Once
+ isCJKLanguage bool
}
type Source struct {
@@ -111,12 +116,6 @@ func (p *Page) PlainWords() []string {
return p.plainWords
}
-// RuneCount returns the rune count, excluding any whitespace, of the plain content.
-func (p *Page) RuneCount() int {
- p.initPlainSecondary()
- return p.plainRuneCount
-}
-
func (p *Page) initPlain() {
p.plainInit.Do(func() {
p.plain = helpers.StripHTML(string(p.Content))
@@ -125,20 +124,6 @@ func (p *Page) initPlain() {
})
}
-func (p *Page) initPlainSecondary() {
- p.plainSecondaryInit.Do(func() {
- p.initPlain()
- runeCount := 0
- for _, r := range p.plain {
- if !helpers.IsWhitespace(r) {
- runeCount++
- }
- }
- p.plainRuneCount = runeCount
- return
- })
-}
-
func (p *Page) IsNode() bool {
return false
}
@@ -218,7 +203,13 @@ func (p *Page) setSummary() {
} else {
// If hugo defines split:
// render, strip html, then split
- summary, truncated := helpers.TruncateWordsToWholeSentence(p.PlainWords(), helpers.SummaryLength)
+ var summary string
+ var truncated bool
+ if p.isCJKLanguage {
+ summary, truncated = helpers.TruncateWordsByRune(p.PlainWords(), helpers.SummaryLength)
+ } else {
+ summary, truncated = helpers.TruncateWordsToWholeSentence(p.PlainWords(), helpers.SummaryLength)
+ }
p.Summary = template.HTML(summary)
p.Truncated = truncated
@@ -363,18 +354,27 @@ func (p *Page) ReadFrom(buf io.Reader) (int64, error) {
}
func (p *Page) analyzePage() {
- p.WordCount = 0
- for _, word := range p.PlainWords() {
- runeCount := utf8.RuneCountInString(word)
- if len(word) == runeCount {
- p.WordCount++
- } else {
- p.WordCount += runeCount
+ if p.isCJKLanguage {
+ p.WordCount = 0
+ for _, word := range p.PlainWords() {
+ runeCount := utf8.RuneCountInString(word)
+ if len(word) == runeCount {
+ p.WordCount++
+ } else {
+ p.WordCount += runeCount
+ }
}
+ } else {
+ p.WordCount = len(p.PlainWords())
}
-
+
p.FuzzyWordCount = int((p.WordCount+100)/100) * 100
- p.ReadingTime = int((p.WordCount + 212) / 213)
+
+ if p.isCJKLanguage {
+ p.ReadingTime = int((p.WordCount + 500) / 501)
+ } else {
+ p.ReadingTime = int((p.WordCount + 212) / 213)
+ }
}
func (p *Page) permalink() (*url.URL, error) {
@@ -481,7 +481,7 @@ func (p *Page) update(f interface{}) error {
}
m := f.(map[string]interface{})
var err error
- var draft, published *bool
+ var draft, published, isCJKLanguage *bool
for k, v := range m {
loki := strings.ToLower(k)
switch loki {
@@ -542,6 +542,9 @@ func (p *Page) update(f interface{}) error {
p.Status = cast.ToString(v)
case "sitemap":
p.Sitemap = parseSitemap(cast.ToStringMap(v))
+ case "iscjklanguage":
+ isCJKLanguage = new(bool)
+ *isCJKLanguage = cast.ToBool(v)
default:
// If not one of the explicit values, store in Params
switch vv := v.(type) {
@@ -596,6 +599,16 @@ func (p *Page) update(f interface{}) error {
p.Lastmod = p.Date
}
+ if isCJKLanguage != nil {
+ p.isCJKLanguage = *isCJKLanguage
+ } else if viper.GetBool("HasCJKLanguage") {
+ if cjk.Match(p.rawContent) {
+ p.isCJKLanguage = true
+ } else {
+ p.isCJKLanguage = false
+ }
+ }
+
return nil
}
@@ -766,6 +779,8 @@ func (p *Page) parse(reader io.Reader) error {
p.renderable = psr.IsRenderable()
p.frontmatter = psr.FrontMatter()
+ p.rawContent = psr.Content()
+
meta, err := psr.Metadata()
if meta != nil {
if err != nil {
@@ -778,8 +793,6 @@ func (p *Page) parse(reader io.Reader) error {
}
}
- p.rawContent = psr.Content()
-
return nil
}
diff --git a/hugolib/page_test.go b/hugolib/page_test.go
index c3506d48d4b..9134ba6c644 100644
--- a/hugolib/page_test.go
+++ b/hugolib/page_test.go
@@ -146,16 +146,67 @@ Summary Same Line
Some more text
`
- SIMPLE_PAGE_WITH_FIVE_MULTIBYTE_UFT8_RUNES = `---
+ SIMPLE_PAGE_WITH_ALL_CJK_RUNES = `---
title: Simple
---
€ € € € €
+你好
+도형이
+カテゴリー
`
+ SIMPLE_PAGE_WITH_MAIN_ENGLISH_WITH_CJK_RUNES = `---
+title: Simple
+---
+
+
+In Chinese, 好 means good. In Chinese, 好 means good.
+In Chinese, 好 means good. In Chinese, 好 means good.
+In Chinese, 好 means good. In Chinese, 好 means good.
+In Chinese, 好 means good. In Chinese, 好 means good.
+In Chinese, 好 means good. In Chinese, 好 means good.
+In Chinese, 好 means good. In Chinese, 好 means good.
+In Chinese, 好 means good. In Chinese, 好 means good.
+More then 70 words.
+
+
+`
+ SIMPLE_PAGE_WITH_MAIN_ENGLISH_WITH_CJK_RUNES_SUMMARY = "In Chinese, 好 means good. In Chinese, 好 means good. " +
+ "In Chinese, 好 means good. In Chinese, 好 means good. " +
+ "In Chinese, 好 means good. In Chinese, 好 means good. " +
+ "In Chinese, 好 means good. In Chinese, 好 means good. " +
+ "In Chinese, 好 means good. In Chinese, 好 means good. " +
+ "In Chinese, 好 means good. In Chinese, 好 means good. " +
+ "In Chinese, 好 means good. In Chinese, 好 means good."
+
+ SIMPLE_PAGE_WITH_ISCJKLANGUAGE_FALSE = `---
+title: Simple
+isCJKLanguage: false
+---
+
+In Chinese, 好的啊 means good. In Chinese, 好的呀 means good.
+In Chinese, 好的啊 means good. In Chinese, 好的呀 means good.
+In Chinese, 好的啊 means good. In Chinese, 好的呀 means good.
+In Chinese, 好的啊 means good. In Chinese, 好的呀 means good.
+In Chinese, 好的啊 means good. In Chinese, 好的呀 means good.
+In Chinese, 好的啊 means good. In Chinese, 好的呀 means good.
+In Chinese, 好的啊 means good. In Chinese, 好的呀呀 means good enough.
+More then 70 words.
+
+
+`
+ SIMPLE_PAGE_WITH_ISCJKLANGUAGE_FALSE_SUMMARY = "In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +
+ "In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +
+ "In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +
+ "In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +
+ "In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +
+ "In Chinese, 好的啊 means good. In Chinese, 好的呀 means good. " +
+ "In Chinese, 好的啊 means good. In Chinese, 好的呀呀 means good enough."
+
SIMPLE_PAGE_WITH_LONG_CONTENT = `---
title: Simple
---
@@ -584,18 +635,86 @@ func TestPageWithDate(t *testing.T) {
checkPageDate(t, p, d)
}
-func TestRuneCount(t *testing.T) {
+func TestWordCountWithAllCJKRunesWithoutHasCJKLanguage(t *testing.T) {
+ viper.Reset()
+
p, _ := NewPage("simple.md")
- _, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_FIVE_MULTIBYTE_UFT8_RUNES))
+ _, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_ALL_CJK_RUNES))
p.Convert()
p.analyzePage()
if err != nil {
t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)
}
- if p.RuneCount() != 5 {
- t.Fatalf("incorrect rune count for content '%s'. expected %v, got %v", p.plain, 5, p.RuneCount())
+ if p.WordCount != 8 {
+ t.Fatalf("incorrect word count for content '%s'. expected %v, got %v", p.plain, 8, p.WordCount)
+ }
+}
+
+func TestWordCountWithAllCJKRunesHasCJKLanguage(t *testing.T) {
+ viper.Reset()
+ defer viper.Reset()
+
+ viper.Set("HasCJKLanguage", true)
+
+ p, _ := NewPage("simple.md")
+ _, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_ALL_CJK_RUNES))
+ p.Convert()
+ p.analyzePage()
+ if err != nil {
+ t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)
+ }
+
+ if p.WordCount != 15 {
+ t.Fatalf("incorrect word count for content '%s'. expected %v, got %v", p.plain, 15, p.WordCount)
+ }
+}
+
+func TestWordCountWithMainEnglishWithCJKRunes(t *testing.T) {
+ viper.Reset()
+ defer viper.Reset()
+
+ viper.Set("HasCJKLanguage", true)
+
+ p, _ := NewPage("simple.md")
+ _, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_MAIN_ENGLISH_WITH_CJK_RUNES))
+ p.Convert()
+ p.analyzePage()
+ if err != nil {
+ t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)
+ }
+
+ if p.WordCount != 74 {
+ t.Fatalf("incorrect word count for content '%s'. expected %v, got %v", p.plain, 74, p.WordCount)
+ }
+
+ if p.Summary != SIMPLE_PAGE_WITH_MAIN_ENGLISH_WITH_CJK_RUNES_SUMMARY {
+ t.Fatalf("incorrect Summary for content '%s'. expected %v, got %v", p.plain,
+ SIMPLE_PAGE_WITH_MAIN_ENGLISH_WITH_CJK_RUNES_SUMMARY, p.Summary)
+ }
+}
+
+func TestWordCountWithIsCJKLanguageFalse(t *testing.T) {
+ viper.Reset()
+ defer viper.Reset()
+
+ viper.Set("HasCJKLanguage", true)
+
+ p, _ := NewPage("simple.md")
+ _, err := p.ReadFrom(strings.NewReader(SIMPLE_PAGE_WITH_ISCJKLANGUAGE_FALSE))
+ p.Convert()
+ p.analyzePage()
+ if err != nil {
+ t.Fatalf("Unable to create a page with frontmatter and body content: %s", err)
+ }
+
+ if p.WordCount != 75 {
+ t.Fatalf("incorrect word count for content '%s'. expected %v, got %v", p.plain, 75, p.WordCount)
+ }
+ if p.Summary != SIMPLE_PAGE_WITH_ISCJKLANGUAGE_FALSE_SUMMARY {
+ t.Fatalf("incorrect Summary for content '%s'. expected %v, got %v", p.plain,
+ SIMPLE_PAGE_WITH_ISCJKLANGUAGE_FALSE_SUMMARY, p.Summary)
}
}