-
-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathcmd_html2text.go
92 lines (71 loc) · 1.84 KB
/
cmd_html2text.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
package main
import (
"bufio"
"fmt"
"os"
"strings"
"github.com/skx/subcommands"
"golang.org/x/net/html"
)
// Structure for our options and state.
type html2TextCommand struct {
// We embed the NoFlags option, because we accept no command-line flags.
subcommands.NoFlags
}
// Info returns the name of this subcommand.
func (h2t *html2TextCommand) Info() (string, string) {
return "html2text", `HTML to text conversion.
This command converts the contents of STDIN, or the named files,
from HTML to text, and prints them to the console.
Examples:
$ curl --silent https://steve.fi/ | sysbox html2text | less
$ sysbox html2text /usr/share/doc/gdisk/gdisk.html |less
`
}
func (h2t *html2TextCommand) process(reader *bufio.Reader) {
domDocTest := html.NewTokenizer(reader)
previousStartTokenTest := domDocTest.Token()
loopDomTest:
for {
tt := domDocTest.Next()
switch {
case tt == html.ErrorToken:
break loopDomTest // End of the document, done
case tt == html.StartTagToken:
previousStartTokenTest = domDocTest.Token()
case tt == html.TextToken:
if previousStartTokenTest.Data == "script" ||
previousStartTokenTest.Data == "style" {
continue
}
TxtContent := strings.TrimSpace(html.UnescapeString(string(domDocTest.Text())))
if len(TxtContent) > 0 {
fmt.Printf("%s\n", TxtContent)
}
}
}
}
// Execute is invoked if the user specifies `html2text` as the subcommand.
func (h2t *html2TextCommand) Execute(args []string) int {
//
// Read from STDIN
//
if len(args) == 0 {
scanner := bufio.NewReader(os.Stdin)
h2t.process(scanner)
return 0
}
//
// Otherwise each named file
//
for _, file := range args {
handle, err := os.Open(file)
if err != nil {
fmt.Printf("error opening %s : %s\n", file, err.Error())
return 1
}
reader := bufio.NewReader(handle)
h2t.process(reader)
}
return 0
}