Skip to content

Commit

Permalink
add toc
Browse files Browse the repository at this point in the history
  • Loading branch information
fenixsoft committed Dec 26, 2021
1 parent 7855a57 commit 3a92c28
Show file tree
Hide file tree
Showing 3 changed files with 286 additions and 0 deletions.
174 changes: 174 additions & 0 deletions .vuepress/dist/pdf/generate_pdf_with_toc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
import json
import toml
from pdftocgen.recipe import extract_toc, Recipe
from fitzutils import ToCEntry, dump_toc, open_pdf
from pdftocio.tocio import write_toc

# requiremnets
# ----------------------------
# ruquire `pdf.tocgen`
# https://github.com/Krasjet/pdf.tocgen
# pip install -U pdf.tocgen

# usage
# ----------------------------
# 1. make sure `pdf.tocgen` is installed
# 2. put `exportPages.json`, `sidebar.json`, `the-fenix-project.pdf` into the folder containing the script
# 3. run the script
# 4. 2 files will be generated: `the-fenix-project-with-toc.pdf` and `toc.txt`
# to apply the toc, use `pdftocio {path_to_pdf} < toc.txt`

# prepare `exportPages.json` & `sidebar.json`
#
# add the following line after line 93 in `.vuepress/plugins/export/index.js`
# replace {path_to_folder} with the path to the folder containing the script!
#
# fs.writeFileSync("{path_to_folder}" + "/sidebar.json", JSON.stringify(sidebar));
# fs.writeFileSync("{path_to_folder}" + "/exportPages.json", JSON.stringify(exportPages));
# return


# constants & paths
# ----------------------------
pdf_path = "the-fenix-project.pdf"
toc_pdf_path = "the-fenix-project-with-toc.pdf"
final_toc_path = "toc.txt"


recipe_str = """[[heading]]
# 前端工程
level = 1
greedy = true
font.size = 26.411094665527344
font.size_tolerance = 1"""

# helpers
# -----------------------------
def remove_multiple_suffix(s, suffixes):
triggered = True

while triggered:
triggered = False
for suffix in suffixes:
if s.endswith(suffix):
triggered = True
s = s[:-len(suffix)]
break
return s

def normalize_path(path):
path = path.lower()
path = remove_multiple_suffix(path, ["/", ".md", ".html"])
return path


def walk_tree(item, level, path_title_level_list):
if isinstance(item, list):
for subitem in item:
walk_tree(subitem, level+1,path_title_level_list)
elif isinstance(item, dict):
# print(item)
if "path" in item:
path_title_level_list.append([item["path"], item["title"], level])
else:
path_title_level_list.append([None, item["title"], level])

if "children" in item:
for subitem in item["children"]:
walk_tree(subitem, level+1,path_title_level_list)
elif isinstance(item, str):
path_title_level_list.append([item, None, level])

# steps
# -----------------------------

def generate_hierarchy():

# load url & title
with open("exportPages.json", "r", encoding="u8") as f:
export_pages = json.load(f)

export_pages[0]["title"] = ""

url_to_title = {normalize_path(page["url"]):page["title"] for page in export_pages}

# load sidebar (for hierarchy)
with open("sidebar.json", "r", encoding="u8") as f:
sidebar = json.load(f)

path_title_level_list = []
walk_tree(sidebar, 0, path_title_level_list)

# find title for childrens in sidebar
for idx, (path, title, level) in enumerate(path_title_level_list):
if title is None:
url = normalize_path(path)
title = url_to_title[url]
path_title_level_list[idx][1] = title

print("load from website, length", len(path_title_level_list))

return path_title_level_list


def find_title_pages():
recipe = toml.loads(recipe_str)

with open_pdf(pdf_path) as doc:
toc = extract_toc(doc, Recipe(recipe))

print("load from pdf, length", len(toc))

return toc

def check_toc_length(path_title_level_list, toc):
if len(toc) != len([i for i in path_title_level_list if i[0] is not None]):
print("WARNING: missing some chapters, the PDF provided might not be the most up-to-date version.")
print("警告:部分存在于网站中的章节不存在于 PDF 中。这可能是因为 PDF 构建后网站中增加了新章节。重新构建 PDF 可以解决这一问题。")



def build_final_toc(path_title_level_list, toc):
idx1, idx2 = 0, 0
last_page_num = 1
final_toc = []
for idx1, (path, title, level) in enumerate(path_title_level_list):

title_match = "".join(title.split()) == "".join(toc[idx2].title.split())


if path is None or (path is not None or title_match):
final_toc.append(ToCEntry(level, title, toc[idx2].pagenum))

if path is not None:
if title_match:
idx2 += 1
else:
print("missing chapter: ", title)
final_toc.pop() # remove missing ones


return final_toc

def save_toc(final_toc):
with open_pdf(pdf_path) as doc:
write_toc(doc, final_toc)
doc.save(toc_pdf_path)

with open(final_toc_path, "w") as f:
f.write(dump_toc(final_toc))

# main
# -----------------------------

def main():
path_title_level_list = generate_hierarchy()
toc = find_title_pages()
check_toc_length(path_title_level_list, toc)
final_toc = build_final_toc(path_title_level_list, toc)
save_toc(final_toc)


if __name__ == "__main__":
main()

1 change: 1 addition & 0 deletions .vuepress/dist/pdf/tips.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# 生成PDF耗时较长,请在必要时候执行npm run export来更新
111 changes: 111 additions & 0 deletions .vuepress/dist/pdf/toc.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"Ŀ¼" 6
"ǰ��" 11
"��������" 11
"����ֽ����" 13
"ʲô�ǡ���˼ܹ���" 15
"̽����" 20
"�Ķ�ָ��" 20
"������־" 20
"��ο�ʼ" 27
"������ʾ����" 32
"ǰ�˹���" 33
"����ܹ���Spring Boot" 39
"΢����Spring Cloud" 44
"΢����Kubernetes" 51
"��������Istio" 57
"�޷���AWS Lambda" 64
"�ݽ��еļܹ�" 68
"����ܹ��ݽ�ʷ" 68
"ԭʼ�ֲ�ʽʱ��" 69
"����ϵͳʱ��" 73
"SOA ʱ��" 78
"΢����ʱ��" 83
"��΢����ʱ��" 88
"�޷���ʱ��" 93
"�ܹ�ʦ���ӽ�" 97
"����Զ�̷���" 97
"Զ�̷������" 98
"REST ��Ʒ��" 110
"������" 128
"��������" 130
"ȫ������" 140
"��������" 146
"�ֲ�ʽ����" 148
"͸���༶����ϵͳ" 161
"�ͻ��˻���" 163
"��������" 169
"������·" 173
"���ݷַ�����" 184
"���ؾ���" 192
"����˻���" 204
"�ܹ���ȫ��" 219
"��֤" 220
"��Ȩ" 232
"ƾ֤" 245
"����" 254
"����" 260
"��֤" 273
"�ֲ�ʽ�Ļ�ʯ" 279
"�ֲ�ʽ��ʶ�㷨" 279
"Paxos" 282
"Multi Paxos" 291
"Gossip ��" 296
"����⵽����" 299
"������" 301
"����·��" 308
"�ͻ��˸��ؾ���" 314
"��������" 322
"�����ݴ�" 323
"��������" 337
"�ɿ�ͨѶ" 346
"����������" 347
"����ȫ" 353
"�ɹ۲���" 363
"�¼���־" 368
"��·׷��" 375
"�ۺ϶���" 383
"���ɱ������ʩ" 392
"��΢������ԭ��" 392
"���⻯����" 394
"����������" 397
"����������ϵͳ" 410
"��Ӧ��Ϊ���ĵķ�װ" 421
"����������" 436
"Linux �������⻯" 437
"������������̬" 459
"�־û��洢" 468
"Kubernetes �洢���" 469
"�����洢����̬" 482
"��Դ�����" 495
"��������" 507
"͸��ͨ�ŵ�����" 509
"������������̬" 526
"����������" 534
"��΢��������" 534
"Ŀ�ģ�΢�����������" 535
"ǰ�᣺΢������Ҫ������" 538
"�߽磺΢���������" 544
"����������ϵͳ������" 548
"�������" 555
"2020��" 555
"Graal VM" 555
"��һ����ʱ������" 558
"��ԭ������" 560
"û��������� Java" 564
"Spring over Graal" 567
"QCon2020 �����ݽ�����ԭ��ʱ����Java ��Σ���" 575
"2021��" 587
"OpenJDK with CLion ���˰�" 587
"����Ա֮·" 594
"Fenix-CLI������ʽ��ԭ���ͻ���" 600
"ArchSummit2021�����ݽ�������������ʷ���ܹ���δ��" 610
"��¼" 617
"���𻷾�" 617
"���� Docker CE ��������" 618
"����Kubernetes��Ⱥ" 627
"ʹ�� Kubeadm ����" 628
"ʹ�� Rancher ����" 639
"ʹ�� Minikube ����" 646
"���� Istio" 649
"���� Elastic Stack" 653
"���� Prometheus" 658

0 comments on commit 3a92c28

Please sign in to comment.