change the pdf_images dpi

CosmosShadow · Embracex1998 · Sep 20, 2024 · Sep 20, 2024 · Sep 26, 2024 · Sep 26, 2024
commit b71bb7d922e09e0751e8286d6517326854cd7ac2
diff --git a/gptpdf/parse.py b/gptpdf/parse.py
@@ -130,7 +130,6 @@ def _parse_rects(page: fitz.Page) -> List[Tuple[float, float, float, float]]:
 
     return [rect.bounds for rect in merged_rects]
 
-#增加了PDF图片解析的dpi参数，默认值设为300，根据需求设置300-1000可以满足清晰度需求
 def _parse_pdf_to_images(pdf_path: str,dpi:int,output_dir: str = './') -> List[Tuple[str, List[str]]]:
     """
     Parse PDF to images and save to output_dir.
@@ -242,7 +241,7 @@ def _process_page(index: int, image_info: Tuple[str, List[str]]) -> Tuple[int, s
 
     return '\n\n'.join(contents)
 
-
+#增加了PDF图片解析的dpi参数，默认值设为200，根据需求设置300-1000可以满足清晰度需求
 def parse_pdf(
         pdf_path: str,
         output_dir: str = './',
@@ -252,7 +251,7 @@ def parse_pdf(
         model: str = 'gpt-4o',
         verbose: bool = False,
         gpt_worker: int = 1,
-        dpi:int = 300,
+        dpi:int = 200,
         **args
 ) -> Tuple[str, List[str]]:
     """

diff --git a/test/test.py b/test/test.py
@@ -50,6 +50,7 @@ def test_qwen_vl_max():
     base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
     # Refer to: https://help.aliyun.com/zh/dashscope/developer-reference/compatibility-of-openai-with-dashscope
     model  = 'qwen-vl-max'
+    #新增了dpi参数，默认值为200
     content, image_paths = parse_pdf(pdf_path, output_dir=output_dir, api_key=api_key, base_url=base_url, model=model, verbose=True, temperature=0.5, max_tokens=1000, top_p=0.9, frequency_penalty=1,dpi=1000)
     print(content)
     print(image_paths)