diff --git a/cmd/mungedocs/links.go b/cmd/mungedocs/links.go new file mode 100644 index 0000000000000..1ecdebf6418ee --- /dev/null +++ b/cmd/mungedocs/links.go @@ -0,0 +1,143 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "fmt" + "net/url" + "os" + "path" + "regexp" + "strings" +) + +var ( + // Finds markdown links of the form [foo](bar "alt-text"). + linkRE = regexp.MustCompile(`\[([^]]*)\]\(([^)]*)\)`) + // Splits the link target into link target and alt-text. + altTextRE = regexp.MustCompile(`(.*)( ".*")`) +) + +// checkLinks assumes fileBytes has links in markdown syntax, and verifies that +// any relative links actually point to files that exist. +func checkLinks(filePath string, fileBytes []byte) ([]byte, error) { + dir := path.Dir(filePath) + errors := []string{} + + output := linkRE.ReplaceAllFunc(fileBytes, func(in []byte) (out []byte) { + match := linkRE.FindSubmatch(in) + // match[0] is the entire expression; [1] is the visible text and [2] is the link text. + visibleText := string(match[1]) + linkText := string(match[2]) + altText := "" + if parts := altTextRE.FindStringSubmatch(linkText); parts != nil { + linkText = parts[1] + altText = parts[2] + } + + // clean up some random garbage I found in our docs. + linkText = strings.Trim(linkText, " ") + linkText = strings.Trim(linkText, "\n") + linkText = strings.Trim(linkText, " ") + + u, err := url.Parse(linkText) + if err != nil { + errors = append( + errors, + fmt.Sprintf("%v, link %q is unparsable: %v", filePath, linkText, err), + ) + return in + } + + if u.Host != "" { + // We only care about relative links. + return in + } + + suggestedVisibleText := visibleText + if u.Path != "" && !strings.HasPrefix(linkText, "TODO:") { + newPath, targetExists := checkPath(filePath, path.Clean(u.Path)) + if !targetExists { + errors = append( + errors, + fmt.Sprintf("%v, %q: target not found\n", filePath, linkText), + ) + } + u.Path = newPath + // Make the visible text show the absolute path if it's + // not nested in or beneath the current directory. + if strings.HasPrefix(u.Path, "..") { + suggestedVisibleText = makeRepoRelative(path.Join(dir, u.Path)) + } else { + suggestedVisibleText = u.Path + } + if unescaped, err := url.QueryUnescape(u.String()); err != nil { + // Remove %28 type stuff, be nice to humans. + // And don't fight with the toc generator. + linkText = unescaped + } else { + linkText = u.String() + } + } + // If the current visible text is trying to be a file name, use + // the correct file name. + if (strings.Contains(visibleText, ".md") || strings.Contains(visibleText, "/")) && !strings.ContainsAny(visibleText, ` '"`+"`") { + visibleText = suggestedVisibleText + } + + return []byte(fmt.Sprintf("[%s](%s)", visibleText, linkText+altText)) + }) + err := error(nil) + if len(errors) != 0 { + err = fmt.Errorf("%s", strings.Join(errors, "\n")) + } + return output, err +} + +func makeRepoRelative(path string) string { + parts := strings.Split(path, "github.com/GoogleCloudPlatform/kubernetes/") + if len(parts) > 1 { + // Take out anything that is specific to the local filesystem. + return parts[1] + } + return path +} + +func checkPath(filePath, linkPath string) (newPath string, ok bool) { + dir := path.Dir(filePath) + if strings.HasPrefix(linkPath, "/") { + if !strings.HasPrefix(linkPath, "/GoogleCloudPlatform") { + // Any absolute paths that aren't relative to github.com are wrong. + // Try to fix. + linkPath = linkPath[1:] + } + } + + newPath = linkPath + for i := 0; i < 5; i++ { + // The file must exist. + target := path.Join(dir, newPath) + if info, err := os.Stat(target); err == nil { + if info.IsDir() { + return newPath + "/", true + } + return newPath, true + } + newPath = path.Join("..", newPath) + } + return linkPath, false +} diff --git a/cmd/mungedocs/mungedocs.go b/cmd/mungedocs/mungedocs.go index ef0e0943a4f25..60f05849c7e99 100644 --- a/cmd/mungedocs/mungedocs.go +++ b/cmd/mungedocs/mungedocs.go @@ -33,52 +33,62 @@ var ( rootDir = flag.String("root-dir", "", "Root directory containing documents to be processed.") ErrChangesNeeded = errors.New("mungedocs: changes required") + + // TODO: allow selection from command line. (e.g., just check links in the examples directory.) + mungesToMake = munges{ + munger(updateTOC), + munger(checkLinks), + } ) -func visitAndVerify(path string, i os.FileInfo, e error) error { - return visitAndChangeOrVerify(path, i, e, false) -} +// Munger processes a document, returning an updated document xor an error. +// Munger is NOT allowed to mutate 'before', if changes are needed it must copy +// data into a new byte array. +type munger func(filePath string, before []byte) (after []byte, err error) + +type munges []munger -func visitAndChange(path string, i os.FileInfo, e error) error { - return visitAndChangeOrVerify(path, i, e, true) +type fileProcessor struct { + // Which munge functions should we call? + munges munges + + // Are we allowed to make changes? + verifyOnly bool } // Either change a file or verify that it needs no changes (according to modify argument) -func visitAndChangeOrVerify(path string, i os.FileInfo, e error, modify bool) error { +func (f fileProcessor) visit(path string, i os.FileInfo, e error) error { if !strings.HasSuffix(path, ".md") { return nil } - file, err := os.Open(path) - if err != nil { - return err - } - defer file.Close() - before, err := ioutil.ReadAll(file) + fileBytes, err := ioutil.ReadFile(path) if err != nil { return err } - after, err := updateTOC(before) - if err != nil { - return err - } - if modify { - // Write out new file with any changes. - if !bytes.Equal(after, before) { - file.Close() - ioutil.WriteFile(path, after, 0644) + modificationsMade := false + for _, munge := range f.munges { + after, err := munge(path, fileBytes) + if err != nil { + return err } - } else { - // Just verify that there are no changes. - if !bytes.Equal(after, before) { - return ErrChangesNeeded + if !modificationsMade { + if !bytes.Equal(after, fileBytes) { + modificationsMade = true + if f.verifyOnly { + // We're not allowed to make changes. + return ErrChangesNeeded + } + } } + fileBytes = after } - // TODO(erictune): more types of passes, such as: - // Linkify terms - // Verify links point to files. + // Write out new file with any changes. + if modificationsMade { + ioutil.WriteFile(path, fileBytes, 0644) + } return nil } @@ -91,6 +101,11 @@ func main() { os.Exit(1) } + fp := fileProcessor{ + munges: mungesToMake, + verifyOnly: *verify, + } + // For each markdown file under source docs root, process the doc. // If any error occurs, will exit with failure. // If verify is true, then status is 0 for no changes needed, 1 for changes needed @@ -98,12 +113,7 @@ func main() { // If verify is false, then status is 0 if changes successfully made or no changes needed, // 1 if changes were needed but require human intervention, and >1 for an unexpected // error during processing. - var err error - if *verify { - err = filepath.Walk(*rootDir, visitAndVerify) - } else { - err = filepath.Walk(*rootDir, visitAndChange) - } + err := filepath.Walk(*rootDir, fp.visit) if err != nil { if err == ErrChangesNeeded { if *verify { diff --git a/cmd/mungedocs/toc.go b/cmd/mungedocs/toc.go index c5de6f98bb18f..0636c8ba76a07 100644 --- a/cmd/mungedocs/toc.go +++ b/cmd/mungedocs/toc.go @@ -30,7 +30,7 @@ import ( // the ToC, thereby updating any previously inserted ToC. // // TODO(erictune): put this in own package with tests -func updateTOC(markdown []byte) ([]byte, error) { +func updateTOC(filePath string, markdown []byte) ([]byte, error) { toc, err := buildTOC(markdown) if err != nil { return nil, err diff --git a/cmd/mungedocs/toc_test.go b/cmd/mungedocs/toc_test.go index e7d502b508884..f52df24df75b1 100644 --- a/cmd/mungedocs/toc_test.go +++ b/cmd/mungedocs/toc_test.go @@ -92,7 +92,7 @@ func Test_updateTOC(t *testing.T) { "# Title\nLorem ipsum \n**table of contents**\n\n- [Title](#title)\n - [Section Heading](#section-heading)\n\n\n## Section Heading\ndolor sit amet\n"}, } for _, c := range cases { - actual, err := updateTOC([]byte(c.in)) + actual, err := updateTOC("filename.md", []byte(c.in)) assert.NoError(t, err) if c.out != string(actual) { t.Errorf("Expected TOC '%v' but got '%v'", c.out, string(actual))