We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GEOSException Traceback (most recent call last) in <cell line: 2>() 1 # @title 🚀 Define vars & Kickoff ----> 2 result = parse_pdf( 3 pdf_path=pdf_file_path, 4 output_dir="./output", 5 api_key="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", # @param {type:"string"} - String Field
9 frames in parse_pdf(pdf_path, output_dir, prompt, api_key, base_url, model, verbose, gpt_worker, **args) 260 os.makedirs(output_dir) 261 --> 262 image_infos = _parse_pdf_to_images(pdf_path, output_dir=output_dir) 263 content = _gpt_parse_images( 264 image_infos=image_infos,
in _parse_pdf_to_images(pdf_path, output_dir) 145 logging.info(f'parse page: {page_index}') 146 rect_images = [] --> 147 rects = _parse_rects(page) 148 for index, rect in enumerate(rects): 149 fitz_rect = fitz.Rect(rect)
in _parse_rects(page) 115 rect_list += image_rects 116 --> 117 merged_rects = _merge_rects(rect_list, distance=10, horizontal_distance=100) 118 merged_rects = [rect for rect in merged_rects if explain_validity(rect) == 'Valid Geometry'] 119
in _merge_rects(rect_list, distance, horizontal_distance) 66 if _is_near(rect, other_rect, distance) or ( 67 horizontal_distance and _is_horizontal_near(rect, other_rect, horizontal_distance)): ---> 68 rect = _union_rects(rect, other_rect) 69 rect_list.remove(other_rect) 70 merged = True
in _union_rects(rect1, rect2) 49 Union two rectangles. 50 """ ---> 51 return sg.box(*(rect1.union(rect2).bounds)) 52 53
/usr/local/lib/python3.10/dist-packages/shapely/geometry/geo.py in box(minx, miny, maxx, maxy, ccw) 51 if not ccw: 52 coords = coords[::-1] ---> 53 return Polygon(coords) 54 55
/usr/local/lib/python3.10/dist-packages/shapely/geometry/polygon.py in new(self, shell, holes) 228 return shell 229 else: --> 230 shell = LinearRing(shell) 231 232 if holes is not None:
/usr/local/lib/python3.10/dist-packages/shapely/geometry/polygon.py in new(self, coordinates) 102 return shapely.from_wkt("LINEARRING EMPTY") 103 --> 104 geom = shapely.linearrings(coordinates) 105 if not isinstance(geom, LinearRing): 106 raise ValueError("Invalid values passed to LinearRing constructor")
/usr/local/lib/python3.10/dist-packages/shapely/decorators.py in wrapped(*args, **kwargs) 75 for arr in array_args: 76 arr.flags.writeable = False ---> 77 return func(*args, **kwargs) 78 finally: 79 for arr, old_flag in zip(array_args, old_flags):
/usr/local/lib/python3.10/dist-packages/shapely/creation.py in linearrings(coords, y, z, indices, out, **kwargs) 169 coords = _xyz_to_coords(coords, y, z) 170 if indices is None: --> 171 return lib.linearrings(coords, out=out, **kwargs) 172 else: 173 return simple_geometries_1d(coords, indices, GeometryType.LINEARRING, out=out)
GEOSException: IllegalArgumentException: Points of LinearRing do not form a closed linestring
The text was updated successfully, but these errors were encountered:
No branches or pull requests
环境:colab
补充下处理的目标文件
https://drive.google.com/file/d/1uQSPYKJu-J-_NerHZE-C524pR_IgT-_K/view?usp=sharing
GEOSException Traceback (most recent call last)
in <cell line: 2>()
1 # @title 🚀 Define vars & Kickoff
----> 2 result = parse_pdf(
3 pdf_path=pdf_file_path,
4 output_dir="./output",
5 api_key="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", # @param {type:"string"} - String Field
9 frames
in parse_pdf(pdf_path, output_dir, prompt, api_key, base_url, model, verbose, gpt_worker, **args)
260 os.makedirs(output_dir)
261
--> 262 image_infos = _parse_pdf_to_images(pdf_path, output_dir=output_dir)
263 content = _gpt_parse_images(
264 image_infos=image_infos,
in _parse_pdf_to_images(pdf_path, output_dir)
145 logging.info(f'parse page: {page_index}')
146 rect_images = []
--> 147 rects = _parse_rects(page)
148 for index, rect in enumerate(rects):
149 fitz_rect = fitz.Rect(rect)
in _parse_rects(page)
115 rect_list += image_rects
116
--> 117 merged_rects = _merge_rects(rect_list, distance=10, horizontal_distance=100)
118 merged_rects = [rect for rect in merged_rects if explain_validity(rect) == 'Valid Geometry']
119
in _merge_rects(rect_list, distance, horizontal_distance)
66 if _is_near(rect, other_rect, distance) or (
67 horizontal_distance and _is_horizontal_near(rect, other_rect, horizontal_distance)):
---> 68 rect = _union_rects(rect, other_rect)
69 rect_list.remove(other_rect)
70 merged = True
in _union_rects(rect1, rect2)
49 Union two rectangles.
50 """
---> 51 return sg.box(*(rect1.union(rect2).bounds))
52
53
/usr/local/lib/python3.10/dist-packages/shapely/geometry/geo.py in box(minx, miny, maxx, maxy, ccw)
51 if not ccw:
52 coords = coords[::-1]
---> 53 return Polygon(coords)
54
55
/usr/local/lib/python3.10/dist-packages/shapely/geometry/polygon.py in new(self, shell, holes)
228 return shell
229 else:
--> 230 shell = LinearRing(shell)
231
232 if holes is not None:
/usr/local/lib/python3.10/dist-packages/shapely/geometry/polygon.py in new(self, coordinates)
102 return shapely.from_wkt("LINEARRING EMPTY")
103
--> 104 geom = shapely.linearrings(coordinates)
105 if not isinstance(geom, LinearRing):
106 raise ValueError("Invalid values passed to LinearRing constructor")
/usr/local/lib/python3.10/dist-packages/shapely/decorators.py in wrapped(*args, **kwargs)
75 for arr in array_args:
76 arr.flags.writeable = False
---> 77 return func(*args, **kwargs)
78 finally:
79 for arr, old_flag in zip(array_args, old_flags):
/usr/local/lib/python3.10/dist-packages/shapely/creation.py in linearrings(coords, y, z, indices, out, **kwargs)
169 coords = _xyz_to_coords(coords, y, z)
170 if indices is None:
--> 171 return lib.linearrings(coords, out=out, **kwargs)
172 else:
173 return simple_geometries_1d(coords, indices, GeometryType.LINEARRING, out=out)
GEOSException: IllegalArgumentException: Points of LinearRing do not form a closed linestring
The text was updated successfully, but these errors were encountered: