Skip to content

Commit

Permalink
refactor: streamline file service retrieval and enhance spider templa…
Browse files Browse the repository at this point in the history
…te handling

- Replaced direct calls to getBaseFileFsSvc with a new method fs.GetBaseFileFsSvc in base_file.go for improved clarity and maintainability.
- Introduced SpiderTemplateService interface and implemented registry service for managing spider templates, enhancing template handling in the spider controller.
- Added template-related fields to the Spider model to support template functionality.
- Created utility functions for string case conversions in utils/string.go to facilitate consistent formatting across the codebase.
- Updated environment configuration to retrieve the Python path dynamically, improving flexibility in the task runner's setup.
  • Loading branch information
Marvin Zhang committed Jan 6, 2025
1 parent f5d9ccf commit 8d8b47e
Show file tree
Hide file tree
Showing 9 changed files with 110 additions and 28 deletions.
34 changes: 10 additions & 24 deletions core/controllers/base_file.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,16 @@ import (
"errors"
"fmt"
"github.com/crawlab-team/crawlab/core/fs"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/utils"
"github.com/gin-gonic/gin"
"io"
"os"
"path/filepath"
"sync"
)

func GetBaseFileListDir(rootPath string, c *gin.Context) {
path := c.Query("path")

fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorBadRequest(c, err)
return
Expand All @@ -36,7 +33,7 @@ func GetBaseFileListDir(rootPath string, c *gin.Context) {
func GetBaseFileFile(rootPath string, c *gin.Context) {
path := c.Query("path")

fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorBadRequest(c, err)
return
Expand All @@ -54,7 +51,7 @@ func GetBaseFileFile(rootPath string, c *gin.Context) {
func GetBaseFileFileInfo(rootPath string, c *gin.Context) {
path := c.Query("path")

fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorBadRequest(c, err)
return
Expand All @@ -70,7 +67,7 @@ func GetBaseFileFileInfo(rootPath string, c *gin.Context) {
}

func PostBaseFileSaveFile(rootPath string, c *gin.Context) {
fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorInternalServerError(c, err)
return
Expand Down Expand Up @@ -120,7 +117,7 @@ func PostBaseFileSaveFile(rootPath string, c *gin.Context) {
}

func PostBaseFileSaveFiles(rootPath string, c *gin.Context) {
fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorInternalServerError(c, err)
return
Expand Down Expand Up @@ -181,7 +178,7 @@ func PostBaseFileSaveDir(rootPath string, c *gin.Context) {
return
}

fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorBadRequest(c, err)
return
Expand All @@ -205,7 +202,7 @@ func PostBaseFileRenameFile(rootPath string, c *gin.Context) {
return
}

fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorBadRequest(c, err)
return
Expand All @@ -229,7 +226,7 @@ func DeleteBaseFileFile(rootPath string, c *gin.Context) {
payload.Path = "."
}

fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorBadRequest(c, err)
return
Expand Down Expand Up @@ -257,7 +254,7 @@ func PostBaseFileCopyFile(rootPath string, c *gin.Context) {
return
}

fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorBadRequest(c, err)
return
Expand All @@ -272,7 +269,7 @@ func PostBaseFileCopyFile(rootPath string, c *gin.Context) {
}

func PostBaseFileExport(rootPath string, c *gin.Context) {
fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorBadRequest(c, err)
return
Expand All @@ -289,14 +286,3 @@ func PostBaseFileExport(rootPath string, c *gin.Context) {
c.Header("Content-Disposition", fmt.Sprintf("attachment; filename=%s", zipFilePath))
c.File(zipFilePath)
}

func GetBaseFileFsSvc(rootPath string) (svc interfaces.FsService, err error) {
return getBaseFileFsSvc(rootPath)
}

func getBaseFileFsSvc(rootPath string) (svc interfaces.FsService, err error) {
workspacePath := utils.GetWorkspace()
fsSvc := fs.NewFsService(filepath.Join(workspacePath, rootPath))

return fsSvc, nil
}
12 changes: 12 additions & 0 deletions core/controllers/spider.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"github.com/crawlab-team/crawlab/core/constants"
"github.com/crawlab-team/crawlab/core/models/models"
mongo2 "github.com/crawlab-team/crawlab/core/mongo"
"github.com/crawlab-team/crawlab/core/spider"
"math"
"os"
"path/filepath"
Expand Down Expand Up @@ -293,6 +294,17 @@ func PostSpider(c *gin.Context) {
return
}

// create template if available
if utils.IsPro() && s.Template != "" {
if templateSvc := spider.GetSpiderTemplateRegistryService(); templateSvc != nil {
err = templateSvc.CreateTemplate(s.Id)
if err != nil {
HandleErrorInternalServerError(c, err)
return
}
}
}

HandleSuccessWithData(c, s)
}

Expand Down
14 changes: 14 additions & 0 deletions core/fs/utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package fs

import (
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/utils"
"path/filepath"
)

func GetBaseFileFsSvc(rootPath string) (svc interfaces.FsService, err error) {
workspacePath := utils.GetWorkspace()
fsSvc := NewFsService(filepath.Join(workspacePath, rootPath))

return fsSvc, nil
}
7 changes: 7 additions & 0 deletions core/interfaces/spider_template_service.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package interfaces

import "go.mongodb.org/mongo-driver/bson/primitive"

type SpiderTemplateService interface {
CreateTemplate(id primitive.ObjectID) (err error)
}
6 changes: 6 additions & 0 deletions core/models/models/spider.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ type Spider struct {
GitId primitive.ObjectID `json:"git_id" bson:"git_id"` // related Git.Id
GitRootPath string `json:"git_root_path" bson:"git_root_path"`
Git *Git `json:"git,omitempty" bson:"-"`
Template string `json:"template,omitempty" bson:"template,omitempty"` // spider template
TemplateParams *struct {
SpiderName string `json:"spider_name,omitempty" bson:"spider_name,omitempty"`
StartUrls string `json:"start_urls,omitempty" bson:"start_urls,omitempty"`
Domains string `json:"domains,omitempty" bson:"domains,omitempty"`
} `json:"template_params,omitempty" bson:"template_params,omitempty"`

// stats
Stat *SpiderStat `json:"stat,omitempty" bson:"-"`
Expand Down
13 changes: 13 additions & 0 deletions core/spider/registry_service.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package spider

import "github.com/crawlab-team/crawlab/core/interfaces"

var templateSvcInstance interfaces.SpiderTemplateService

func SetSpiderTemplateRegistryService(svc interfaces.SpiderTemplateService) {
templateSvcInstance = svc
}

func GetSpiderTemplateRegistryService() interfaces.SpiderTemplateService {
return templateSvcInstance
}
20 changes: 17 additions & 3 deletions core/task/handler/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,19 @@ func (r *Runner) startHealthCheck() {
}
}

// configurePythonPath sets up the Python environment paths, handling both pyenv and default installations
func (r *Runner) configurePythonPath() {
// Configure global node_modules path
pyenvRoot := utils.GetPyenvPath()
pyenvShimsPath := pyenvRoot + "/shims"
pyenvBinPath := pyenvRoot + "/bin"

// Configure global pyenv path
_ = os.Setenv("PYENV_ROOT", pyenvRoot)
_ = os.Setenv("PATH", pyenvShimsPath+":"+os.Getenv("PATH"))
_ = os.Setenv("PATH", pyenvBinPath+":"+os.Getenv("PATH"))
}

// configureNodePath sets up the Node.js environment paths, handling both nvm and default installations
func (r *Runner) configureNodePath() {
// Configure nvm-based Node.js paths
Expand All @@ -366,7 +379,10 @@ func (r *Runner) configureGoPath() {
// - Crawlab-specific variables
// - Global environment variables from the system
func (r *Runner) configureEnv() {
// Configure Node.js paths
// Configure Python path
r.configurePythonPath()

// Configure Node.js path
r.configureNodePath()

// Configure Go path
Expand All @@ -375,8 +391,6 @@ func (r *Runner) configureEnv() {
// Default envs
r.cmd.Env = os.Environ()
r.cmd.Env = append(r.cmd.Env, "CRAWLAB_TASK_ID="+r.tid.Hex())
r.cmd.Env = append(r.cmd.Env, "PYENV_ROOT="+utils.PyenvRoot)
r.cmd.Env = append(r.cmd.Env, "PATH="+os.Getenv("PATH")+":"+utils.PyenvRoot+"/shims:"+utils.PyenvRoot+"/bin")

// Global environment variables
envs, err := client.NewModelService[models.Environment]().GetMany(nil, nil)
Expand Down
9 changes: 8 additions & 1 deletion core/utils/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ const (
DefaultInstallRoot = "/app/install"
MetadataConfigDirName = ".crawlab"
MetadataConfigName = "config.json"
PyenvRoot = "/root/.pyenv"
DefaultPyenvPath = "/root/.pyenv"
DefaultNodeModulesPath = "/usr/lib/node_modules"
DefaultGoPath = "/root/go"
)
Expand Down Expand Up @@ -250,6 +250,13 @@ func GetInstallRoot() string {
return DefaultInstallRoot
}

func GetPyenvPath() string {
if res := viper.GetString("install.pyenv.path"); res != "" {
return res
}
return DefaultPyenvPath
}

func GetNodeModulesPath() string {
if res := viper.GetString("install.node.path"); res != "" {
return res
Expand Down
23 changes: 23 additions & 0 deletions core/utils/string.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package utils

import (
"golang.org/x/text/cases"
"golang.org/x/text/language"
"strings"
)

func ToSnakeCase(s string) string {
s = strings.TrimSpace(s)
s = strings.ToLower(s)
s = strings.ReplaceAll(s, " ", "_")
s = strings.ReplaceAll(s, "-", "_")
return s
}

func ToPascalCase(s string) string {
s = strings.TrimSpace(s)
s = strings.ReplaceAll(s, "_", " ")
s = cases.Title(language.English).String(s)
s = strings.ReplaceAll(s, " ", "")
return s
}

0 comments on commit 8d8b47e

Please sign in to comment.