Skip to content

Commit

Permalink
Adds a link fixer/checker to mungedocs.
Browse files Browse the repository at this point in the history
Links that don't work yet can be prefixed with "TODO:" to avoid the check.
  • Loading branch information
lavalamp committed Jul 10, 2015
1 parent affba42 commit c4aab16
Show file tree
Hide file tree
Showing 4 changed files with 189 additions and 36 deletions.
143 changes: 143 additions & 0 deletions cmd/mungedocs/links.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"fmt"
"net/url"
"os"
"path"
"regexp"
"strings"
)

var (
// Finds markdown links of the form [foo](bar "alt-text").
linkRE = regexp.MustCompile(`\[([^]]*)\]\(([^)]*)\)`)
// Splits the link target into link target and alt-text.
altTextRE = regexp.MustCompile(`(.*)( ".*")`)
)

// checkLinks assumes fileBytes has links in markdown syntax, and verifies that
// any relative links actually point to files that exist.
func checkLinks(filePath string, fileBytes []byte) ([]byte, error) {
dir := path.Dir(filePath)
errors := []string{}

output := linkRE.ReplaceAllFunc(fileBytes, func(in []byte) (out []byte) {
match := linkRE.FindSubmatch(in)
// match[0] is the entire expression; [1] is the visible text and [2] is the link text.
visibleText := string(match[1])
linkText := string(match[2])
altText := ""
if parts := altTextRE.FindStringSubmatch(linkText); parts != nil {
linkText = parts[1]
altText = parts[2]
}

// clean up some random garbage I found in our docs.
linkText = strings.Trim(linkText, " ")
linkText = strings.Trim(linkText, "\n")
linkText = strings.Trim(linkText, " ")

u, err := url.Parse(linkText)
if err != nil {
errors = append(
errors,
fmt.Sprintf("%v, link %q is unparsable: %v", filePath, linkText, err),
)
return in
}

if u.Host != "" {
// We only care about relative links.
return in
}

suggestedVisibleText := visibleText
if u.Path != "" && !strings.HasPrefix(linkText, "TODO:") {
newPath, targetExists := checkPath(filePath, path.Clean(u.Path))
if !targetExists {
errors = append(
errors,
fmt.Sprintf("%v, %q: target not found\n", filePath, linkText),
)
}
u.Path = newPath
// Make the visible text show the absolute path if it's
// not nested in or beneath the current directory.
if strings.HasPrefix(u.Path, "..") {
suggestedVisibleText = makeRepoRelative(path.Join(dir, u.Path))
} else {
suggestedVisibleText = u.Path
}
if unescaped, err := url.QueryUnescape(u.String()); err != nil {
// Remove %28 type stuff, be nice to humans.
// And don't fight with the toc generator.
linkText = unescaped
} else {
linkText = u.String()
}
}
// If the current visible text is trying to be a file name, use
// the correct file name.
if (strings.Contains(visibleText, ".md") || strings.Contains(visibleText, "/")) && !strings.ContainsAny(visibleText, ` '"`+"`") {
visibleText = suggestedVisibleText
}

return []byte(fmt.Sprintf("[%s](%s)", visibleText, linkText+altText))
})
err := error(nil)
if len(errors) != 0 {
err = fmt.Errorf("%s", strings.Join(errors, "\n"))
}
return output, err
}

func makeRepoRelative(path string) string {
parts := strings.Split(path, "github.com/GoogleCloudPlatform/kubernetes/")
if len(parts) > 1 {
// Take out anything that is specific to the local filesystem.
return parts[1]
}
return path
}

func checkPath(filePath, linkPath string) (newPath string, ok bool) {
dir := path.Dir(filePath)
if strings.HasPrefix(linkPath, "/") {
if !strings.HasPrefix(linkPath, "/GoogleCloudPlatform") {
// Any absolute paths that aren't relative to github.com are wrong.
// Try to fix.
linkPath = linkPath[1:]
}
}

newPath = linkPath
for i := 0; i < 5; i++ {
// The file must exist.
target := path.Join(dir, newPath)
if info, err := os.Stat(target); err == nil {
if info.IsDir() {
return newPath + "/", true
}
return newPath, true
}
newPath = path.Join("..", newPath)
}
return linkPath, false
}
78 changes: 44 additions & 34 deletions cmd/mungedocs/mungedocs.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,52 +33,62 @@ var (
rootDir = flag.String("root-dir", "", "Root directory containing documents to be processed.")

ErrChangesNeeded = errors.New("mungedocs: changes required")

// TODO: allow selection from command line. (e.g., just check links in the examples directory.)
mungesToMake = munges{
munger(updateTOC),
munger(checkLinks),
}
)

func visitAndVerify(path string, i os.FileInfo, e error) error {
return visitAndChangeOrVerify(path, i, e, false)
}
// Munger processes a document, returning an updated document xor an error.
// Munger is NOT allowed to mutate 'before', if changes are needed it must copy
// data into a new byte array.
type munger func(filePath string, before []byte) (after []byte, err error)

type munges []munger

func visitAndChange(path string, i os.FileInfo, e error) error {
return visitAndChangeOrVerify(path, i, e, true)
type fileProcessor struct {
// Which munge functions should we call?
munges munges

// Are we allowed to make changes?
verifyOnly bool
}

// Either change a file or verify that it needs no changes (according to modify argument)
func visitAndChangeOrVerify(path string, i os.FileInfo, e error, modify bool) error {
func (f fileProcessor) visit(path string, i os.FileInfo, e error) error {
if !strings.HasSuffix(path, ".md") {
return nil
}
file, err := os.Open(path)
if err != nil {
return err
}
defer file.Close()

before, err := ioutil.ReadAll(file)
fileBytes, err := ioutil.ReadFile(path)
if err != nil {
return err
}

after, err := updateTOC(before)
if err != nil {
return err
}
if modify {
// Write out new file with any changes.
if !bytes.Equal(after, before) {
file.Close()
ioutil.WriteFile(path, after, 0644)
modificationsMade := false
for _, munge := range f.munges {
after, err := munge(path, fileBytes)
if err != nil {
return err
}
} else {
// Just verify that there are no changes.
if !bytes.Equal(after, before) {
return ErrChangesNeeded
if !modificationsMade {
if !bytes.Equal(after, fileBytes) {
modificationsMade = true
if f.verifyOnly {
// We're not allowed to make changes.
return ErrChangesNeeded
}
}
}
fileBytes = after
}

// TODO(erictune): more types of passes, such as:
// Linkify terms
// Verify links point to files.
// Write out new file with any changes.
if modificationsMade {
ioutil.WriteFile(path, fileBytes, 0644)
}

return nil
}
Expand All @@ -91,19 +101,19 @@ func main() {
os.Exit(1)
}

fp := fileProcessor{
munges: mungesToMake,
verifyOnly: *verify,
}

// For each markdown file under source docs root, process the doc.
// If any error occurs, will exit with failure.
// If verify is true, then status is 0 for no changes needed, 1 for changes needed
// and >1 for an error during processing.
// If verify is false, then status is 0 if changes successfully made or no changes needed,
// 1 if changes were needed but require human intervention, and >1 for an unexpected
// error during processing.
var err error
if *verify {
err = filepath.Walk(*rootDir, visitAndVerify)
} else {
err = filepath.Walk(*rootDir, visitAndChange)
}
err := filepath.Walk(*rootDir, fp.visit)
if err != nil {
if err == ErrChangesNeeded {
if *verify {
Expand Down
2 changes: 1 addition & 1 deletion cmd/mungedocs/toc.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
// the ToC, thereby updating any previously inserted ToC.
//
// TODO(erictune): put this in own package with tests
func updateTOC(markdown []byte) ([]byte, error) {
func updateTOC(filePath string, markdown []byte) ([]byte, error) {
toc, err := buildTOC(markdown)
if err != nil {
return nil, err
Expand Down
2 changes: 1 addition & 1 deletion cmd/mungedocs/toc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func Test_updateTOC(t *testing.T) {
"# Title\nLorem ipsum \n**table of contents**\n<!-- BEGIN GENERATED TOC -->\n- [Title](#title)\n - [Section Heading](#section-heading)\n\n<!-- END GENERATED TOC -->\n## Section Heading\ndolor sit amet\n"},
}
for _, c := range cases {
actual, err := updateTOC([]byte(c.in))
actual, err := updateTOC("filename.md", []byte(c.in))
assert.NoError(t, err)
if c.out != string(actual) {
t.Errorf("Expected TOC '%v' but got '%v'", c.out, string(actual))
Expand Down

0 comments on commit c4aab16

Please sign in to comment.