Skip to content

Instantly share code, notes, and snippets.

@douo
Last active June 11, 2023 12:22
Show Gist options
  • Save douo/3e37bc863c5af9cbf393fe8724be6bb4 to your computer and use it in GitHub Desktop.
Save douo/3e37bc863c5af9cbf393fe8724be6bb4 to your computer and use it in GitHub Desktop.
Inoreader to Miniflux/v2

Backup

Preferences -> API access

复制 App ID 和 App Key 填入 backup.py 的 client_idclient_secret

rye pin 3.11
rye add requests
rye sync
rye run python backup.py

手撸 oauth2 验证,要手动在浏览器授权,token 和 subscription 列表会被缓存,出错需要手动清理掉。

Restore

restore 脚本用 golang 实现,需要依赖于 miniflux/v2 ,直接依赖是不行,因为 miniflux/v2 的 module name 是 miniflux.app 直接依赖会报错。需要复制到仓库根目录下执行。

git clone https://github.com/miniflux/v2.git
git checkout 2.0.43 # 与生产环境版本一致
cp restore.go . # 复制到仓库根目录
sudo -u postgres pg_dump -F c miniflux  # 备份数据库
# sudo -u postgres pg_restore -d miniflux miniflux # 恢复数据库
go run restore.go <backup> <username>  # 需要有访问 /etc/miniflux.conf 的权限
import os
import pickle
import json
import threading
import uuid
from contextlib import redirect_stdout
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from pathlib import Path
from urllib.parse import parse_qs, urlsplit
from requests import Session, get, post, utils
host = "https://www.inoreader.com"
client_id = "[client_id]"
client_secret = "[client_secret]"
csrf_protection = str(uuid.uuid4())
redirect_uri = utils.quote("http://localhost:6894/oauth/redirect", safe="")
def redirect_server(result):
PORT = 6894
def handle_request(s):
nonlocal httpd, result
query = urlsplit(s.path).query
params = parse_qs(query)
if "state" in params and params["state"][0] == csrf_protection:
result["code"] = params["code"][0]
s.send_response(200)
s.send_header("Content-type", "text/plain")
s.end_headers()
s.wfile.write(bytes("Success!", encoding="utf8"))
httpd.shutdown()
else:
s.send_response(400)
s.send_header("Content-type", "text/plain")
s.end_headers()
Handler = type("", (BaseHTTPRequestHandler,), {"do_GET": handle_request})
with ThreadingHTTPServer(("", PORT), Handler) as httpd:
print("serving on port", PORT)
httpd.serve_forever()
def request_code():
url = f"https://www.inoreader.com/oauth2/auth?client_id={client_id}&redirect_uri={redirect_uri}&response_type=code&scope=read&state={csrf_protection}\
"
print("Navigate to the following URL in your browser:")
print(url)
result = {}
t = threading.Thread(target=redirect_server, args=(result,), daemon=True)
t.start()
t.join()
return result["code"]
def restore_token():
p_file = "token.pickle"
if os.path.exists(p_file):
with open(p_file, "rb") as f:
token = pickle.load(f)
return token
else:
with open(p_file, "wb") as f:
code = request_code()
token = get_token(code)
pickle.dump(token, f)
return token
def get_token(code):
url = f"{host}/oauth2/token"
response = post(
url,
data={
"code": code,
"redirect_uri": redirect_uri,
"client_id": client_id,
"client_secret": client_secret,
"scope": "read",
"grant_type": "authorization_code",
},
)
return response.json()
def user_info(s):
url = f"{host}/reader/api/0/user-info"
rep = s.get(url)
return rep.json()
def subscription_list(s):
url = f"{host}/reader/api/0/subscription/list"
rep = s.get(url)
return rep.json()
def backup_subscription(s):
p_file = "subscription.pickle"
if os.path.exists(p_file):
with open(p_file, "rb") as f:
data = pickle.load(f)
else:
with open(p_file, "wb") as f:
data = subscription_list(s)
pickle.dump(data, f)
for sub in data["subscriptions"]:
process_single_sub(s, sub)
def process_single_sub(s, sub):
f_name = utils.quote(sub["id"], safe="")
folder = Path(f"backup/{f_name}")
info = folder.joinpath("info.json")
if not info.exists():
folder.mkdir(parents=True, exist_ok=True)
with info.open("w") as f:
json.dump(sub, f)
icon = folder.joinpath("icon.png")
if not icon.exists():
download_file(sub["iconUrl"], s, local_filename=str(icon))
contents = list(folder.glob("content_*.json"))
if l := len(contents):
c_f = sorted(contents, key=str)[-1] # 预期不超过 10
with c_f.open("r") as f:
content = json.load(f)
if "continuation" in content:
stream(s, sub["id"], folder.joinpath(f"content_{l}.json"), cont=content["continuation"])
else:
stream(s, sub["id"], folder.joinpath(f"content_{l}.json"))
def stream(s, stream_id, content, cont=None):
stream_id = utils.quote(stream_id, safe="")
url = f"{host}/reader/api/0/stream/contents/{stream_id}"
data = {"n": 2000, # Number of items to return [20, 1000]
"annotations": 1
}
if cont:
data.update({"c": cont})
download_file(url,
s,
data = data,
local_filename=str(content)
)
def download_file(url, session, data=None, local_filename=None):
local_filename = local_filename or url.split("/")[-1]
with session.get(url, params=data, stream=True) as r:
r.raise_for_status()
print(f"{url} {r.status_code}")
with open(local_filename, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
return local_filename
if __name__ == "__main__":
token = restore_token()
s = Session()
s.headers.update({"Authorization": f"Bearer {token['access_token']}"})
backup_subscription(s)
package main
import (
"fmt"
"encoding/json"
"os"
"time"
"strings"
"strconv"
"path/filepath"
"database/sql"
"miniflux.app/config"
"miniflux.app/logger"
"miniflux.app/model"
"miniflux.app/crypto"
"miniflux.app/database"
"miniflux.app/storage"
"github.com/lib/pq"
)
type jsonFeedInfo struct {
ID string `json:"id"`
FeedType string `json:"feedType"`
Title string `json:"title"`
Categories []struct {
ID string `json:"id"`
Label string `json:"label"`
} `json:"categories"`
SortID string `json:"sortid"`
FirstItemMsec int64 `json:"firstitemmsec"`
URL string `json:"url"`
HTMLURL string `json:"htmlUrl"`
IconURL string `json:"iconUrl"`
}
type jsonFeed struct {
Direction string `json:"direction"`
ID string `json:"id"`
Title string `json:"title"`
Description string `json:"description"`
Self struct {
Href string `json:"href"`
} `json:"self"`
Updated int64 `json:"updated"`
UpdatedUsec string `json:"updatedUsec"`
Items []jsonItem `json:"items"`
}
type jsonItem struct {
CrawlTimeMsec string `json:"crawlTimeMsec"`
TimestampUsec string `json:"timestampUsec"`
ID string `json:"id"`
Categories []string `json:"categories"`
Title string `json:"title"`
Published int64 `json:"published"`
Updated int64 `json:"updated"`
Canonical []struct {
Href string `json:"href"`
} `json:"canonical"`
Alternate []struct {
Href string `json:"href"`
Type string `json:"type"`
} `json:"alternate"`
Summary struct {
Direction string `json:"direction"`
Content string `json:"content"`
} `json:"summary"`
Author string `json:"author"`
LikingUsers []struct{} `json:"likingUsers"`
Comments []struct{} `json:"comments"`
CommentsNum int `json:"commentsNum"`
Annotations []struct {
ID int64 `json:"id"`
Start int `json:"start"`
End int `json:"end"`
AddedOn int64 `json:"added_on"`
Text string `json:"text"`
Note string `json:"note"`
UserID int `json:"user_id"`
UserName string `json:"user_name"`
UserProfilePic string `json:"user_profile_picture"`
} `json:"annotations"`
Origin struct {
StreamID string `json:"streamId"`
Title string `json:"title"`
HTMLUrl string `json:"htmlUrl"`
} `json:"origin"`
}
func (j *jsonItem) GetCreateAt() time.Time {
timestampUsec, _ := strconv.ParseInt(j.TimestampUsec, 10, 64)
sec := timestampUsec / 1e6
usec := timestampUsec % 1e6
return time.Unix(sec, usec*1000)
}
func (j *jsonItem) GetHash() string {
for _, value := range []string{j.ID} {
if value != "" {
return crypto.Hash(value)
}
}
return ""
}
func (j *jsonItem) GetContent() string {
return j.Summary.Content
}
func (j *jsonItem) Transform() *model.Entry {
entry := new(model.Entry)
entry.URL = j.Canonical[0].Href
entry.Date = time.Unix(j.Published, 0)
entry.CreatedAt = j.GetCreateAt()
entry.Author = j.Author
entry.Hash = j.GetHash()
entry.Content = j.GetContent()
entry.Title = strings.TrimSpace(j.Title)
// entry.Tags = j.Categories
return entry
}
func findByFeedURL(feeds model.Feeds, url string) *model.Feed{
for i := range feeds {
if feeds[i].FeedURL == url {
return feeds[i]
}
}
return nil
}
func restoreBackup(feeds model.Feeds, s *storage.Storage, db *sql.DB, root string) {
entries, err := os.ReadDir(root)
if err != nil {
fmt.Println("Error:", err)
return
}
for _, entry := range entries {
if entry.IsDir() {
fmt.Println("Directory:", entry.Name())
path := filepath.Join(root, entry.Name(), "info.json")
file, err := os.Open(path)
if err != nil {
fmt.Println("打开文件失败:", err)
return
}
defer file.Close()
feedInfo := new(jsonFeedInfo)
decoder := json.NewDecoder(file)
err = decoder.Decode(&feedInfo)
if err != nil {
fmt.Println("解码 JSON 数据失败:", err)
} else {
fmt.Println("Title: " + feedInfo.Title)
fmt.Println("URL: " + feedInfo.URL)
feed := findByFeedURL(feeds, feedInfo.URL)
if feed != nil {
fmt.Println("Feed ID: "+ strconv.Itoa(int(feed.ID)))
restoreEntries(feed, filepath.Join(root, entry.Name()), s, db)
}else{
fmt.Println("Ignore: feed 不存在")
}
}
}
}
}
func restoreEntries(feed *model.Feed, root string, s *storage.Storage, db *sql.DB){
files, err := filepath.Glob((filepath.Join(root, "content_*.json")))
if err != nil {
fmt.Println("打开文件失败:", err)
return
}
for _, file := range files{
content, err := os.Open(file)
if err != nil {
fmt.Println("打开文件失败:", err)
return
}
defer content.Close()
decoder := json.NewDecoder(content)
jsonfeed := new(jsonFeed)
err = decoder.Decode(&jsonfeed)
if err != nil {
fmt.Println("解码 JSON 数据失败:", err)
} else {
fmt.Println("Restore: " + file)
var entries model.Entries = []*model.Entry{}
urls := []string{}
for _, item := range jsonfeed.Items{
entry := item.Transform()
entries = append(entries, entry)
urls = append(urls, entry.URL)
}
filter, existAll := entriesFilter(feed.ID, urls, db)
if !existAll {
tx, err := db.Begin()
if err != nil {
logger.Fatal(`store: unable to start transaction: %v`, err)
return
}
for _, entry := range entries{
if !filter[entry.URL]{
fmt.Println("Insert: " + entry.Title)
entry.FeedID = feed.ID
entry.UserID = feed.UserID
err := createEntry(s, tx, entry)
if err != nil {
tx.Rollback()
logger.Fatal("%v", err)
return
}
}else{
fmt.Println("Exist: " +entry.Title)
}
}
if err := tx.Commit(); err != nil {
logger.Fatal(`store: unable to commit transaction: %v`, err)
return
}
}else{
fmt.Println("Ingore: All Exist")
}
}
}
}
func entriesFilter(feedID int64, urls []string, db *sql.DB) (map[string]bool, bool){
query := "SELECT url, (SELECT COUNT(*) FROM entries WHERE feed_id = $1 AND url = urls.url) > 0 AS exists FROM (SELECT unnest($2::text[]) AS url) AS urls"
type Exist struct{
url string
b bool
}
exists := []Exist{}
rows, err := db.Query(query, feedID, pq.Array(urls))
if err != nil {
panic(err)
}
defer rows.Close()
for rows.Next() {
var e Exist
if err := rows.Scan(&e.url, &e.b); err != nil {
panic(err)
}
exists = append(exists, e)
}
if err := rows.Err(); err != nil {
panic(err)
}
existMap := make(map[string]bool)
existAll := true
for _, e := range exists {
existMap[e.url] = e.b
existAll = existAll && e.b
}
return existMap, existAll
}
// createEntry add a new entry.
func createEntry(s *storage.Storage, tx *sql.Tx, entry *model.Entry) error {
query := `
INSERT INTO entries
(
title,
hash,
url,
comments_url,
published_at,
content,
author,
user_id,
feed_id,
reading_time,
changed_at,
document_vectors,
tags
)
VALUES
(
$1,
$2,
$3,
$4,
$5,
$6,
$7,
$8,
$9,
$10,
now(),
setweight(to_tsvector(left(coalesce($1, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce($6, ''), 500000)), 'B'),
$11
)
RETURNING
id, status
`
err := tx.QueryRow(
query,
entry.Title,
entry.Hash,
entry.URL,
entry.CommentsURL,
entry.Date,
entry.Content,
entry.Author,
entry.UserID,
entry.FeedID,
entry.ReadingTime,
pq.Array(entry.Tags),
).Scan(&entry.ID, &entry.Status)
if err != nil {
return fmt.Errorf(`store: unable to create entry %q (feed #%d): %v`, entry.URL, entry.FeedID, err)
}
return nil
}
func initFeeds(s *storage.Storage, username string) model.Feeds{
fmt.Println(username)
user, err := s.UserByUsername(username)
if err != nil {
logger.Fatal("%v", err)
}
fmt.Println(user)
feeds, err := s.Feeds(user.ID)
if err != nil {
logger.Fatal("%v", err)
}
return feeds
}
func main() {
flagConfigFile := "/etc/miniflux.conf"
var err error
cfg := config.NewParser()
config.Opts, err = cfg.ParseFile(flagConfigFile)
if err != nil {
logger.Fatal("%v", err)
}
config.Opts, err = cfg.ParseEnvironmentVariables()
db, err := database.NewConnectionPool(
config.Opts.DatabaseURL(),
config.Opts.DatabaseMinConns(),
config.Opts.DatabaseMaxConns(),
config.Opts.DatabaseConnectionLifetime(),
)
if err != nil {
logger.Fatal("Unable to initialize database connection pool: %v", err)
}
defer db.Close()
store := storage.NewStorage(db)
if err := store.Ping(); err != nil {
logger.Fatal("Unable to connect to the database: %v", err)
}
username := os.Args[len(os.Args)-1]
root := os.Args[len(os.Args)-2]
feeds := initFeeds(store, username)
restoreBackup(feeds, store, db, root)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment