forked from liuzl/et
-
Notifications
You must be signed in to change notification settings - Fork 0
/
types.go
43 lines (38 loc) · 1.22 KB
/
types.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
package et
import (
"golang.org/x/net/html"
)
// Rule extract a specific key by xpath, regexp and js sequentially.
// Five types for now: url, dom, text, html and attr
type Rule struct {
Type string `json:"type"`
Key string `json:"key"`
Xpath string `json:"xpath"`
Re []string `json:"re"`
Js string `json:"js"`
}
// Parser contains a set of cascaded rule and an optional js code to parse
// corresponding htmls
type Parser struct {
Name string `json:"name"`
DefaultFields bool `json:"default_fields"`
ZipContent bool `json:"zip_content"`
ExampleUrl string `json:"example_url"`
UA string `json:"ua"`
Urls []string `json:"urls"`
Rules map[string][]*Rule `json:"rules"`
Js string `json:"js"`
}
// DomNode is for internal usage
type DomNode struct {
Name string
Node *html.Node
Item map[string]interface{}
}
// UrlTask contains a crawling task of Url that should be parsed by ParserName
type UrlTask struct {
ParserName string `json:"parser_name"`
Url string `json:"url"`
TaskName string `json:"task_name"`
Ext interface{} `json:"ext"`
}