add toc

zhangqinghua-fork · Dec 26, 2021 · 3a92c28 · 3a92c28
1 parent 7855a57
commit 3a92c28
Show file tree

Hide file tree

Showing 3 changed files with 286 additions and 0 deletions.
diff --git a/.vuepress/dist/pdf/generate_pdf_with_toc.py b/.vuepress/dist/pdf/generate_pdf_with_toc.py
@@ -0,0 +1,174 @@
+import json
+import toml
+from pdftocgen.recipe import extract_toc, Recipe
+from fitzutils import ToCEntry, dump_toc, open_pdf
+from pdftocio.tocio import write_toc
+
+# requiremnets
+# ----------------------------
+# ruquire `pdf.tocgen` 
+# https://github.com/Krasjet/pdf.tocgen
+# pip install -U pdf.tocgen
+
+# usage
+# ----------------------------
+# 1. make sure `pdf.tocgen` is installed
+# 2. put `exportPages.json`, `sidebar.json`, `the-fenix-project.pdf` into the folder containing the script
+# 3. run the script
+# 4. 2 files will be generated: `the-fenix-project-with-toc.pdf` and `toc.txt`
+# to apply the toc, use `pdftocio {path_to_pdf} < toc.txt`
+
+# prepare `exportPages.json` & `sidebar.json`
+#
+# add the following line after line 93 in `.vuepress/plugins/export/index.js`
+# replace {path_to_folder} with the path to the folder containing the script!
+#
+#    fs.writeFileSync("{path_to_folder}" + "/sidebar.json", JSON.stringify(sidebar));
+#    fs.writeFileSync("{path_to_folder}" + "/exportPages.json", JSON.stringify(exportPages));
+#    return
+
+
+# constants & paths
+# ----------------------------
+pdf_path = "the-fenix-project.pdf"
+toc_pdf_path = "the-fenix-project-with-toc.pdf"
+final_toc_path = "toc.txt"
+
+
+recipe_str = """[[heading]]
+# 前端工程
+level = 1
+greedy = true
+font.size = 26.411094665527344
+font.size_tolerance = 1"""
+
+# helpers
+# -----------------------------
+def remove_multiple_suffix(s, suffixes):
+    triggered = True 
+
+    while triggered:
+        triggered = False
+        for suffix in suffixes:
+            if s.endswith(suffix):
+                triggered = True
+                s = s[:-len(suffix)]
+                break
+    return s
+
+def normalize_path(path):
+    path = path.lower()
+    path = remove_multiple_suffix(path, ["/", ".md", ".html"])
+    return path
+
+
+def walk_tree(item, level, path_title_level_list):
+    if isinstance(item, list):
+        for subitem in item:
+            walk_tree(subitem, level+1,path_title_level_list)
+    elif isinstance(item, dict):
+        # print(item)
+        if "path" in item:
+            path_title_level_list.append([item["path"], item["title"], level])
+        else:
+            path_title_level_list.append([None, item["title"], level])
+
+        if "children" in item:
+            for subitem in item["children"]:
+                walk_tree(subitem, level+1,path_title_level_list)
+    elif isinstance(item, str):
+        path_title_level_list.append([item, None, level])
+
+# steps
+# -----------------------------
+
+def generate_hierarchy():
+
+    # load url & title
+    with open("exportPages.json", "r", encoding="u8") as f:
+        export_pages = json.load(f)
+
+    export_pages[0]["title"] = ""
+
+    url_to_title = {normalize_path(page["url"]):page["title"] for page in export_pages}
+
+    # load sidebar (for hierarchy)
+    with open("sidebar.json", "r", encoding="u8") as f:
+        sidebar = json.load(f)
+
+    path_title_level_list = []
+    walk_tree(sidebar, 0, path_title_level_list)
+
+    # find title for childrens in sidebar
+    for idx, (path, title, level) in enumerate(path_title_level_list):
+        if title is None:
+            url = normalize_path(path)
+            title = url_to_title[url]
+        path_title_level_list[idx][1] = title
+
+    print("load from website, length", len(path_title_level_list))
+
+    return path_title_level_list
+
+
+def find_title_pages():
+    recipe = toml.loads(recipe_str)
+
+    with open_pdf(pdf_path) as doc:
+        toc = extract_toc(doc, Recipe(recipe))
+
+    print("load from pdf, length", len(toc))
+
+    return toc
+
+def check_toc_length(path_title_level_list, toc):
+    if len(toc) != len([i for i in path_title_level_list if i[0] is not None]):
+        print("WARNING: missing some chapters, the PDF provided might not be the most up-to-date version.")
+        print("警告：部分存在于网站中的章节不存在于 PDF 中。这可能是因为 PDF 构建后网站中增加了新章节。重新构建 PDF 可以解决这一问题。")
+
+
+
+def build_final_toc(path_title_level_list, toc):
+    idx1, idx2 = 0, 0
+    last_page_num = 1
+    final_toc = []
+    for idx1, (path, title, level) in enumerate(path_title_level_list):
+
+        title_match = "".join(title.split()) == "".join(toc[idx2].title.split())
+
+
+        if path is None or (path is not None or title_match):
+            final_toc.append(ToCEntry(level, title, toc[idx2].pagenum))
+
+        if path is not None:
+            if title_match:
+                idx2 += 1
+            else:
+                print("missing chapter: ", title)
+                final_toc.pop() # remove missing ones
+
+
+    return final_toc
+
+def save_toc(final_toc):
+    with open_pdf(pdf_path) as doc:
+        write_toc(doc, final_toc)
+        doc.save(toc_pdf_path)
+
+    with open(final_toc_path, "w") as f:
+        f.write(dump_toc(final_toc))
+
+# main
+# -----------------------------
+
+def main():
+    path_title_level_list = generate_hierarchy()
+    toc = find_title_pages()
+    check_toc_length(path_title_level_list, toc)
+    final_toc = build_final_toc(path_title_level_list, toc)
+    save_toc(final_toc)
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/.vuepress/dist/pdf/tips.txt b/.vuepress/dist/pdf/tips.txt
@@ -0,0 +1 @@
+# 生成PDF耗时较长，请在必要时候执行npm run export来更新
diff --git a/.vuepress/dist/pdf/toc.txt b/.vuepress/dist/pdf/toc.txt
@@ -0,0 +1,111 @@
+"Ŀ¼" 6
+"ǰ��" 11
+    "��������" 11
+    "����ֽ����" 13
+    "ʲô�ǡ���˼ܹ���" 15
+"̽����" 20
+    "�Ķ�ָ��" 20
+        "������־" 20
+        "��ο�ʼ" 27
+    "������ʾ����" 32
+        "ǰ�˹���" 33
+        "����ܹ���Spring Boot" 39
+        "΢����Spring Cloud" 44
+        "΢����Kubernetes" 51
+        "��������Istio" 57
+        "�޷���AWS Lambda" 64
+"�ݽ��еļܹ�" 68
+    "����ܹ��ݽ�ʷ" 68
+        "ԭʼ�ֲ�ʽʱ��" 69
+        "����ϵͳʱ��" 73
+        "SOA ʱ��" 78
+        "΢����ʱ��" 83
+        "��΢����ʱ��" 88
+        "�޷���ʱ��" 93
+"�ܹ�ʦ���ӽ�" 97
+    "����Զ�̷���" 97
+        "Զ�̷������" 98
+        "REST ��Ʒ��" 110
+    "������" 128
+        "��������" 130
+        "ȫ������" 140
+        "��������" 146
+        "�ֲ�ʽ����" 148
+    "͸���༶����ϵͳ" 161
+        "�ͻ��˻���" 163
+        "��������" 169
+        "������·" 173
+        "���ݷַ�����" 184
+        "���ؾ���" 192
+        "����˻���" 204
+    "�ܹ���ȫ��" 219
+        "��֤" 220
+        "��Ȩ" 232
+        "ƾ֤" 245
+        "����" 254
+        "����" 260
+        "��֤" 273
+"�ֲ�ʽ�Ļ�ʯ" 279
+    "�ֲ�ʽ��ʶ�㷨" 279
+        "Paxos" 282
+        "Multi Paxos" 291
+        "Gossip Э��" 296
+    "����⵽����" 299
+        "������" 301
+        "����·��" 308
+        "�ͻ��˸��ؾ���" 314
+    "��������" 322
+        "�����ݴ�" 323
+        "��������" 337
+    "�ɿ�ͨѶ" 346
+        "����������" 347
+        "����ȫ" 353
+    "�ɹ۲���" 363
+        "�¼���־" 368
+        "��·׷��" 375
+        "�ۺ϶���" 383
+"���ɱ������ʩ" 392
+    "��΢������ԭ��" 392
+    "���⻯����" 394
+        "����������" 397
+        "����������ϵͳ" 410
+        "��Ӧ��Ϊ���ĵķ�װ" 421
+    "����������" 436
+        "Linux �������⻯" 437
+        "������������̬" 459
+    "�־û��洢" 468
+        "Kubernetes �洢���" 469
+        "�����洢����̬" 482
+    "��Դ�����" 495
+    "��������" 507
+        "͸��ͨ�ŵ�����" 509
+        "������������̬" 526
+"����������" 534
+    "��΢��������" 534
+        "Ŀ�ģ�΢�����������" 535
+        "ǰ�᣺΢������Ҫ������" 538
+        "�߽磺΢���������" 544
+        "����������ϵͳ������" 548
+"�������" 555
+    "2020��" 555
+        "Graal VM" 555
+            "��һ����ʱ������" 558
+            "��ԭ������" 560
+            "û��������� Java" 564
+            "Spring over Graal" 567
+        "QCon2020 �����ݽ�����ԭ��ʱ����Java ��Σ���" 575
+    "2021��" 587
+        "OpenJDK with CLion ���˰�" 587
+        "����Ա֮·" 594
+        "Fenix-CLI������ʽ��ԭ���ͻ���" 600
+        "ArchSummit2021�����ݽ�������������ʷ���ܹ���δ��" 610
+"��¼" 617
+    "���𻷾�" 617
+        "���� Docker CE ��������" 618
+        "����Kubernetes��Ⱥ" 627
+            "ʹ�� Kubeadm ����" 628
+            "ʹ�� Rancher ����" 639
+            "ʹ�� Minikube ����" 646
+    "���� Istio" 649
+    "���� Elastic Stack" 653
+    "���� Prometheus" 658
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		# 生成PDF耗时较长，请在必要时候执行npm run export来更新