diff --git a/extractors/jwplayer/jwplayer.go b/extractors/jwplayer/jwplayer.go new file mode 100644 index 0000000..4b55870 --- /dev/null +++ b/extractors/jwplayer/jwplayer.go @@ -0,0 +1,176 @@ +package jwplayer + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "mime/multipart" + "net/http" + "net/textproto" + "net/url" + "regexp" + "strings" + + "github.com/gan-of-culture/get-sauce/parsers/hls" + "github.com/gan-of-culture/get-sauce/request" + "github.com/gan-of-culture/get-sauce/static" +) + +type mediaData struct { + Status bool `json:"status"` + Data struct { + Image string `json:"image"` + Mosaic string `json:"mosaic"` + Sources []struct { + Src string `json:"src"` + Type string `json:"type"` + Label string `json:"label"` + } `json:"sources"` + } `json:"data"` +} + +var reJWPlayerURL = regexp.MustCompile(`[^"]+/wp-content/plugins/player-logic/player\.php[^"]+`) +var reMultiPartParams = regexp.MustCompile(`append\('([abc])', ?([^\)]*)`) //1=a : some string b : some other string +var reAPIURL = regexp.MustCompile("fetch\\(['`\"]([^'`\"]+api\\.php)") +var reVariable = regexp.MustCompile(`\$\{\w+\}`) + +const findVarible = `var %s = '([^']+)` + +type extractor struct{} + +// New returns a jwplayer extractor +func New() static.Extractor { + return &extractor{} +} + +// Extract from URL +func (e *extractor) Extract(URL string) ([]*static.Data, error) { + + htmlString, err := request.Get(URL) + if err != nil { + return nil, err + } + + u, err := url.Parse(URL) + if err != nil { + return nil, err + } + + site := fmt.Sprintf("https://%s/", u.Host) + + matchedAPIURL := reAPIURL.FindStringSubmatch(htmlString) + if len(matchedAPIURL) < 2 { + return nil, static.ErrURLParseFailed + } + + if variable := reVariable.FindString(htmlString); variable != "" { + variableValue, err := findVariable(variable, &htmlString) + if err != nil { + return nil, err + } + + matchedAPIURL[1] = strings.ReplaceAll(matchedAPIURL[1], variable, variableValue) + } + + apiURL := matchedAPIURL[1] + + body := &bytes.Buffer{} + writer := multipart.NewWriter(body) + + vals := [][]string{{"", "action", "zarat_get_data_player_ajax"}} + vals = append(vals, reMultiPartParams.FindAllStringSubmatch(htmlString, -1)...) + + for _, v := range vals { + mimeHeader := textproto.MIMEHeader{} + mimeHeader.Set("Content-Disposition", fmt.Sprintf("form-data; name=\"%s\"", v[1])) + part, _ := writer.CreatePart(mimeHeader) + + variableValue, _ := findVariable(v[2], &htmlString) + if variableValue != "" { + v[2] = variableValue + } + + part.Write([]byte(v[2])) + } + writer.Close() + // --- End of multipart creation + // This will create some thing like this + //------WebKitFormBoundaryDyxVGG0MJMgqpBFh + //Content-Disposition: form-data; name="action" + // + //zarat_get_data_player_ajax + //------WebKitFormBoundaryDyxVGG0MJMgqpBFh + //Content-Disposition: form-data; name="a" + // + //NaRHayKOyzVTAkNnrg9SLSoYh2BTyYfgWfGO2jWz0NrecL/Vo55dZ8aXX9VztkUcSl8qKRd6GF/8SFfC47WyQEi+Z/Ii4n2FzPzmJwKlefvLxcLZBAJfopxo8M1XfEljw5E9fNOaL/5KMklhF+zwWOvI+lfu0A/hT2Sv5jFPn3k= + //------WebKitFormBoundaryDyxVGG0MJMgqpBFh + //Content-Disposition: form-data; name="b" + // + //RklZWG9ub0hiWnl5VUR2Y2tSYUpMdz09 + //------WebKitFormBoundaryDyxVGG0MJMgqpBFh-- + + res, err := request.Request(http.MethodPost, apiURL, map[string]string{ + "Content-Type": writer.FormDataContentType(), + }, body) + if err != nil { + return nil, err + } + defer res.Body.Close() + + respBody, err := io.ReadAll(res.Body) + if err != nil { + return nil, err + } + + sources := mediaData{} + err = json.Unmarshal(respBody, &sources) + if err != nil { + return nil, err + } + + if !sources.Status { + return nil, errors.New("the jwplayer api request for the streams did not return successful for") + } + + streams, err := hls.Extract(sources.Data.Sources[0].Src, nil) + if err != nil { + return nil, err + } + + for _, stream := range streams { + stream.Ext = "mp4" + } + + return []*static.Data{ + { + Site: site, + Title: "jwplayer video", + Type: static.DataTypeVideo, + Streams: streams, + }, + }, nil +} + +// FindJWPlayerURL in HTML page +func FindJWPlayerURL(htmlString *string) string { + return reJWPlayerURL.FindString(*htmlString) +} + +func findVariable(variable string, htmlString *string) (string, error) { + variable = strings.ReplaceAll(variable, "$", "") + variable = strings.ReplaceAll(variable, "{", "") + variable = strings.ReplaceAll(variable, "}", "") + + re, err := regexp.Compile(fmt.Sprintf(findVarible, variable)) + if err != nil { + return "", err + } + matchedVariable := re.FindStringSubmatch(*htmlString) + if len(matchedVariable) < 1 { + return "", fmt.Errorf("could not match any for variable '%s'", variable) + } + + return matchedVariable[1], nil +} diff --git a/extractors/jwplayer/jwplayer_test.go b/extractors/jwplayer/jwplayer_test.go new file mode 100644 index 0000000..aa39355 --- /dev/null +++ b/extractors/jwplayer/jwplayer_test.go @@ -0,0 +1,53 @@ +package jwplayer + +import ( + "testing" + + "github.com/gan-of-culture/get-sauce/test" +) + +func TestExtract(t *testing.T) { + tests := []struct { + Name string + Args test.Args + }{ + { + Name: "Single Episode hentaihaven.com", + Args: test.Args{ + URL: "https://hentaihaven.com/wp-content/plugins/player-logic/player.php?data=RWsra254MXRSelpiUFRnNmNESjJ5ZXg2QWlHQ0xGOE9TVHRrcU9LeTUxQXN3NmJ5NVNZaXNlb015OVQ5THBjb1M0eGc2dFVOR2hHdDNNRjJFVnd6KzFrUVJGTlBkOG9hWHNlUU1nTStJRFI3QkF4ZFNxaFY1QVFTandLWGo2ZXVZeXpkR1JPMTMxcFdVRkp2TWIwTGMwZTBCZW55M053aGtkTEtUcVNFWmtCK25DYmRIc0hmcHF6ZTQvRXQ5WHNIRkQ5RGlMOEdVdS85cXBKK3ZkQVZ6VkZDbE1Vd0o2bU9pZmJhVzY1UHdJVDdjL2I4ZThWdVhNUWllUEgya29IY3c0aGQvWkpYYlhMQXdPTlduRjJ3UFE9PTp8Ojp8OjJrTG42T2ZSbEM5L3FiUnp6Mmphb2c9PQ==", + Title: "jwplayer video", + Quality: "1920x1080", + Size: 465565080, + }, + }, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + data, err := New().Extract(tt.Args.URL) + test.CheckError(t, err) + test.Check(t, tt.Args, data[0]) + }) + } +} + +func TestFindJWPlayerURL(t *testing.T) { + tests := []struct { + Name string + URL string + Want string + }{ + { + Name: "HTML string", + URL: ``, + Want: "https://hentaihaven.com/wp-content/plugins/player-logic/player.php?data=RWsra254MXRSelpiUFRnNmNESjJ5ZXg2QWlHQ0xGOE9TVHRrcU9LeTUxQXN3NmJ5NVNZaXNlb015OVQ5THBjb1M0eGc2dFVOR2hHdDNNRjJFVnd6KzFrUVJGTlBkOG9hWHNlUU1nTStJRFI3QkF4ZFNxaFY1QVFTandLWGo2ZXVZeXpkR1JPMTMxcFdVRkp2TWIwTGMwZTBCZW55M053aGtkTEtUcVNFWmtCK25DYmRIc0hmcHF6ZTQvRXQ5WHNIRkQ5RGlMOEdVdS85cXBKK3ZkQVZ6VkZDbE1Vd0o2bU9pZmJhVzY1UHdJVDdjL2I4ZThWdVhNUWllUEgya29IY3c0aGQvWkpYYlhMQXdPTlduRjJ3UFE9PTp8Ojp8OjJrTG42T2ZSbEM5L3FiUnp6Mmphb2c9PQ==", + }, + } + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + u := FindJWPlayerURL(&tt.URL) + if u == "" { + t.Errorf("Got: %v - Want: %v", u, tt.Want) + } + }) + } +} diff --git a/go.mod b/go.mod index 89bf473..a3aafa9 100755 --- a/go.mod +++ b/go.mod @@ -8,8 +8,6 @@ require ( github.com/schollz/progressbar/v3 v3.13.1 ) -require github.com/klauspost/compress v1.17.9 // indirect - require ( github.com/mattn/go-runewidth v0.0.15 // indirect github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect diff --git a/request/request.go b/request/request.go index f6cf6cc..a5b6b0c 100755 --- a/request/request.go +++ b/request/request.go @@ -14,7 +14,6 @@ import ( "github.com/andybalholm/brotli" "github.com/gan-of-culture/get-sauce/config" "github.com/gan-of-culture/get-sauce/utils" - "github.com/klauspost/compress/zstd" "github.com/pkg/errors" ) @@ -392,14 +391,8 @@ func DecompressHttpResponse(body io.ReadCloser, contentEncoding string) (io.Read } case "deflate": reader = flate.NewReader(body) - case "zstd": - //TODO: replace with impl of standard lib when change is landed (https://github.com/golang/go/issues/62513) - d, err := zstd.NewReader(body) - if err != nil { - return nil, errors.WithStack(err) - } - - reader = io.NopCloser(d) + //case "zstd": + //TODO: impl of standard lib when change is landed (https://github.com/golang/go/issues/62513) default: reader = body }