diff --git a/cv2ext.py b/cv2ext.py index 023651f..c60c08a 100644 --- a/cv2ext.py +++ b/cv2ext.py @@ -136,17 +136,9 @@ def remove_border_in_contours( height, width = get_hw(image) for cnt in contours: for contour in cnt: - if contour[0, 0] - border_size < -1: - raise ValueError("Contour should not be inside the border.") - if contour[0, 0] - border_size > width: - raise ValueError("Contour should not be inside the border.") contour[0, 0] = compute.clamp( contour[0, 0] - border_size, 0, width - 1 ) - if contour[0, 1] - border_size < -1: - raise ValueError("Contour should not be inside the border.") - if contour[0, 1] - border_size > height: - raise ValueError("Contour should not be inside the border.") contour[0, 1] = compute.clamp( contour[0, 1] - border_size, 0, height - 1 ) @@ -290,6 +282,7 @@ def __insert_border_in_mask( epsilon: Tuple[int, float], page_angle: float, ) -> None: + __pourcentage_white_allowed__ = 0.01 epsilon_border, epsilon_angle = epsilon height, width = get_hw(threshold2) cnt2 = cnt[cnt[:, 0, 0] > epsilon_border] @@ -377,9 +370,11 @@ def __insert_border_in_mask( ) mask = cv2.drawContours(mask, [pts], 0, 255, -1) histogram = cv2.calcHist([threshold2], [0], mask, [2], [0, 256]) - if 0.1 * histogram[0] > sum(histogram[1:]) or 0.1 * histogram[ - -1 - ] > sum(histogram[:-1]): + if __pourcentage_white_allowed__ * histogram[0] > sum( + histogram[1:] + ) or __pourcentage_white_allowed__ * histogram[-1] > sum( + histogram[:-1] + ): mask_border_only = cv2.drawContours( mask_border_only, [pts], 0, (0), -1 ) @@ -408,9 +403,11 @@ def __insert_border_in_mask( ) mask = cv2.drawContours(mask, [pts], 0, 255, -1) histogram = cv2.calcHist([threshold2], [0], mask, [2], [0, 256]) - if 0.1 * histogram[0] > sum(histogram[1:]) or 0.1 * histogram[ - -1 - ] > sum(histogram[:-1]): + if __pourcentage_white_allowed__ * histogram[0] > sum( + histogram[1:] + ) or __pourcentage_white_allowed__ * histogram[-1] > sum( + histogram[:-1] + ): mask_border_only = cv2.drawContours( mask_border_only, [pts], 0, (0), -1 ) @@ -419,14 +416,13 @@ def __insert_border_in_mask( def remove_black_border_in_image( gray_bordered: Any, page_angle: float, enable_debug: Optional[str] ) -> Any: - gray_bordered2 = cv2.bitwise_not(gray_bordered) - write_image_if(gray_bordered2, enable_debug, "_2a.png") - _, threshold = cv2.threshold(gray_bordered, 15, 255, cv2.THRESH_BINARY) - write_image_if(threshold, enable_debug, "_2b.png") - threshold2 = cv2.bitwise_not(threshold) - write_image_if(threshold2, enable_debug, "_2b2.png") + thresholdi = threshold_from_gaussian_histogram_black(gray_bordered) + _, threshold = cv2.threshold( + gray_bordered, thresholdi, 255, cv2.THRESH_BINARY_INV + ) + write_image_if(threshold, enable_debug, "_2b2.png") contours, _ = cv2.findContours( - threshold2, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE + threshold, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE ) if enable_debug is not None: image_contours = cv2.drawContours( @@ -453,53 +449,88 @@ def remove_black_border_in_image( ): __insert_border_in_mask( cnt, - threshold2, + threshold, mask_border_only, (__epsilon__, __angle_tolerance__), page_angle, ) - # Borders are in black. + # Borders are in black in mask. write_image_if(mask_border_only, enable_debug, "_2c.png") - res = cv2.bitwise_and( - gray_bordered2, gray_bordered2, mask=mask_border_only + return mask_border_only + + +def apply_mask(image: Any, mask: Any) -> Any: + gray_bordered2 = cv2.bitwise_not(image) + gray_bordered3 = cv2.bitwise_and( + gray_bordered2, gray_bordered2, mask=mask ) - write_image_if(res, enable_debug, "_2d.png") - gray_bordered2 = cv2.bitwise_not(res) - write_image_if(gray_bordered2, enable_debug, "_2e.png") - return gray_bordered2 + gray_bordered4 = cv2.bitwise_not(gray_bordered3) + # Borders are in white in original image. + return gray_bordered4 def erode_and_dilate( - image: Any, size: Tuple[int, int], iterations: int + image: Any, size: Tuple[int, int], iterations: int, reverse: bool = False ) -> Any: - eroded = cv2.erode( - image, - cv2.getStructuringElement(cv2.MORPH_ELLIPSE, size), - iterations=iterations, - ) - dilate = cv2.dilate( - eroded, - cv2.getStructuringElement(cv2.MORPH_ELLIPSE, size), - iterations=iterations, - ) - return dilate + start = int(reverse) + img = image + for i in range(2): + if (i + start) % 2 == 0: + img = cv2.erode( + img, + cv2.getStructuringElement(cv2.MORPH_ELLIPSE, size), + iterations=iterations, + ) + else: + img = cv2.dilate( + img, + cv2.getStructuringElement(cv2.MORPH_ELLIPSE, size), + iterations=iterations, + ) + return img -def threshold_from_gaussian_histogram( - image: Any, pourcentage: float = 0.8 -) -> Any: +def threshold_from_gaussian_histogram_white( + image: Any, pourcentage: float = 0.2, blur_kernel_size: int = 31 +) -> int: histogram = cv2.calcHist([image], [0], None, [256], [0, 256]) + histogram_blur = cv2.GaussianBlur( + histogram, + (1, blur_kernel_size), + blur_kernel_size, + borderType=cv2.BORDER_REPLICATE, + ) i = 255 - for j in range(255, 0, -1): - if histogram[j][0] == 0: + extreme_min = histogram_blur[255][0] + for j in range(254, 0, -1): + if histogram_blur[j][0] < extreme_min: + extreme_min = histogram_blur[j][0] + else: i = j break + limit = extreme_min * (1 + pourcentage) for j in range(i, 0, -1): - if histogram[j][0] != 0: + if histogram_blur[j][0] > limit: i = j break - return int(pourcentage * i) + return i + + +def threshold_from_gaussian_histogram_black( + image: Any, blur_kernel_size: int = 31 +) -> int: + histogram = cv2.calcHist([image], [0], None, [256], [0, 256]) + histogram_blur = cv2.GaussianBlur( + histogram, + (1, blur_kernel_size), + blur_kernel_size, + borderType=cv2.BORDER_REPLICATE, + ) + for i in range(1, 256): + if histogram_blur[i][0] < histogram_blur[i + 1][0]: + return i + return 255 def gaussian_blur_wrap(histogram: Any, kernel_size: int) -> Any: @@ -637,3 +668,19 @@ def bounding_rectangle( [[rectangle[0] + rectangle[2], rectangle[1]]], ] ) + + +def is_line_not_cross_images( + line: Tuple[int, int, int, int], images_mask: Any +) -> bool: + line_x1, line_y1, line_x2, line_y2 = line + image_line = np.zeros(images_mask.shape, np.uint8) + cv2.line( + image_line, + (line_x1, line_y1), + (line_x2, line_y2), + (255, 255, 255), + 1, + ) + image_line = cv2.bitwise_and(images_mask, image_line) + return cv2.countNonZero(image_line) == 0 diff --git a/page/crop.py b/page/crop.py index af0c32b..1abe20d 100644 --- a/page/crop.py +++ b/page/crop.py @@ -58,11 +58,10 @@ class Impl(types.SimpleNamespace): 1, np.pi / (180 * 20), 30, 100, 30 ) find_images: FindImageParameters = FindImageParameters( - 0.005, + 5, (10, 10), (10, 10), (10, 10), - 0.005, 0.01, ) @@ -130,11 +129,13 @@ class CropAroundDataInPageParameters: class Impl(types.SimpleNamespace): found_data_try1: FoundDataTry1Parameters = FoundDataTry1Parameters() found_data_try2: FoundDataTry2Parameters = FoundDataTry2Parameters() - dilate_size: Tuple[int, int] = (2, 2) + dilate_size: Tuple[int, int] = (5, 5) threshold2: int = 200 - contour_area_min: float = 0.002 * 0.002 - contour_area_max: float = 0.5 * 0.5 + contour_area_min: float = 0.01 * 0.01 + contour_area_max: float = 1.0 border: int = 10 + skip_rectangle_closed_to_line: float = 5.0 + closed_to_edge: float = 0.02 def __init__(self) -> None: self.__param = CropAroundDataInPageParameters.Impl() @@ -191,6 +192,22 @@ def border(self) -> int: def border(self, val: int) -> None: self.__param.border = val + @property + def skip_rectangle_closed_to_line(self) -> float: + return self.__param.skip_rectangle_closed_to_line + + @skip_rectangle_closed_to_line.setter + def skip_rectangle_closed_to_line(self, val: float) -> None: + self.__param.skip_rectangle_closed_to_line = val + + @property + def closed_to_edge(self) -> float: + return self.__param.closed_to_edge + + @closed_to_edge.setter + def closed_to_edge(self, val: float) -> None: + self.__param.closed_to_edge = val + def init_default_values( self, key: str, @@ -711,9 +728,8 @@ def crop_around_data( gray = cv2ext.convertion_en_niveau_de_gris(page_gauche_0) - dilated = cv2.dilate( - gray, - cv2.getStructuringElement(cv2.MORPH_ELLIPSE, parameters.dilate_size), + dilated = cv2ext.erode_and_dilate( + gray, parameters.dilate_size, parameters.dilate_size[0] ) cv2ext.write_image_if(dilated, enable_debug, "_" + str(n_page) + "_7.png") @@ -726,19 +742,53 @@ def crop_around_data( cv2ext.write_image_if( threshold, enable_debug, "_" + str(n_page) + "_8.png" ) - contours, _ = cv2.findContours( - threshold, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE + threshold2 = cv2.copyMakeBorder( + threshold, + 1, + 1, + 1, + 1, + cv2.BORDER_CONSTANT, + value=[255], ) + contours, hierarchy = cv2.findContours( + threshold2, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE + ) + cv2ext.remove_border_in_contours(contours, 1, threshold) if enable_debug is not None: image2222 = cv2ext.convertion_en_couleur(page_gauche_0) ncontour_good_size = False + first_cnt_all = int(cv2.contourArea(contours[0])) == (imgh - 1) * ( + imgw - 1 + ) + + def is_border(contour: Any) -> bool: + rectangle = cv2.boundingRect(contour) + ratio = rectangle[3] / rectangle[2] + if ratio > parameters.skip_rectangle_closed_to_line and ( + rectangle[0] < parameters.closed_to_edge * imgw + or rectangle[0] > (1 - parameters.closed_to_edge) * imgw + ): + return True + if ratio < 1 / parameters.skip_rectangle_closed_to_line and ( + rectangle[1] < parameters.closed_to_edge * imgh + or rectangle[1] > (1 - parameters.closed_to_edge) * imgh + ): + return True + return False + contours_listered = filter( lambda x: parameters.contour_area_min * imgh * imgw - < cv2.contourArea(x) - < parameters.contour_area_max * imgh * imgw, - contours, + < cv2.contourArea(x[0]) + < parameters.contour_area_max * imgh * imgw + and ( + (x[1][3] == -1 and not first_cnt_all) + or (x[1][3] == 0 and first_cnt_all) + ) + and not is_border(x[0]), + zip(contours, hierarchy[0]), ) - for cnt in contours_listered: + for cnt, _ in contours_listered: (point_x, point_y, width, height) = cv2.boundingRect(cnt) if enable_debug is not None: cv2.drawContours(image2222, [cnt], -1, (0, 0, 255), 3) diff --git a/page/find_images.py b/page/find_images.py index 903a5c9..1047e85 100644 --- a/page/find_images.py +++ b/page/find_images.py @@ -9,38 +9,35 @@ class FindImageParameters: class Impl(types.SimpleNamespace): - erode_pourcent_size: float + erode_size: int kernel_blur_size: Tuple[int, int] kernel_morphology_size: Tuple[int, int] blur_black_white: Tuple[int, int] - epsilon_approx_poly: float min_area: float def __init__( # pylint: disable=too-many-arguments self, - erode_pourcent_size: float, + erode_size: int, kernel_blur_size: Tuple[int, int], kernel_morphology_size: Tuple[int, int], blur_black_white: Tuple[int, int], - epsilon_approx_poly: float, min_area: float, ): self.__param = FindImageParameters.Impl( - erode_pourcent_size=erode_pourcent_size, + erode_size=erode_size, kernel_blur_size=kernel_blur_size, kernel_morphology_size=kernel_morphology_size, blur_black_white=blur_black_white, - epsilon_approx_poly=epsilon_approx_poly, min_area=min_area, ) @property - def erode_pourcent_size(self) -> float: - return self.__param.erode_pourcent_size + def erode_size(self) -> int: + return self.__param.erode_size - @erode_pourcent_size.setter - def erode_pourcent_size(self, val: float) -> None: - self.__param.erode_pourcent_size = val + @erode_size.setter + def erode_size(self, val: int) -> None: + self.__param.erode_size = val @property def kernel_blur_size(self) -> Tuple[int, int]: @@ -66,14 +63,6 @@ def blur_black_white(self) -> Tuple[int, int]: def blur_black_white(self, val: Tuple[int, int]) -> None: self.__param.blur_black_white = val - @property - def epsilon_approx_poly(self) -> float: - return self.__param.epsilon_approx_poly - - @epsilon_approx_poly.setter - def epsilon_approx_poly(self, val: float) -> None: - self.__param.epsilon_approx_poly = val - @property def min_area(self) -> float: return self.__param.min_area @@ -90,25 +79,31 @@ def find_images( enable_debug: Optional[str], ) -> Any: __internal_border__ = 20 + xxx = 7 cv2ext.write_image_if(image, enable_debug, "_1a.png") - gray = cv2ext.force_image_to_be_grayscale( - image, param.blur_black_white, True - ) + gray = cv2ext.force_image_to_be_grayscale(image, (xxx, xxx), True) cv2ext.write_image_if(gray, enable_debug, "_1b.png") - blurimg_bc = cv2ext.apply_brightness_contrast(gray, -96, 64) + blurimg_bc = cv2ext.erode_and_dilate(gray, (xxx, xxx), xxx) cv2ext.write_image_if(blurimg_bc, enable_debug, "_1c.png") if page_angle is not None: - gray_no_border = cv2ext.remove_black_border_in_image( + mask = cv2ext.remove_black_border_in_image( blurimg_bc, page_angle, enable_debug ) + image_no_border = cv2ext.apply_mask(image, mask) + cv2ext.write_image_if(image_no_border, enable_debug, "_2d.png") + gray2 = cv2ext.force_image_to_be_grayscale( + image_no_border, (xxx, xxx), True + ) + cv2ext.write_image_if(gray2, enable_debug, "_2e.png") + blurimg_bc2 = cv2ext.erode_and_dilate(gray2, (xxx, xxx), xxx, True) + cv2ext.write_image_if(blurimg_bc2, enable_debug, "_2f.png") + gray_no_border = blurimg_bc2 else: gray_no_border = blurimg_bc - gray_equ = cv2.equalizeHist(gray_no_border) - cv2ext.write_image_if(gray_equ, enable_debug, "_3a.png") gray_bordered = cv2.copyMakeBorder( - gray_equ, + gray_no_border, __internal_border__, __internal_border__, __internal_border__, @@ -119,17 +114,17 @@ def find_images( cv2ext.write_image_if(gray_bordered, enable_debug, "_3b.png") dilated = cv2.dilate( gray_bordered, - cv2.getStructuringElement(cv2.MORPH_ELLIPSE, param.kernel_blur_size), + cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (xxx, xxx)), ) cv2ext.write_image_if(dilated, enable_debug, "_3c.png") - thresholdi = cv2ext.threshold_from_gaussian_histogram(dilated) - _, threshold1 = cv2.threshold(dilated, thresholdi, 255, cv2.THRESH_BINARY) - cv2ext.write_image_if(threshold1, enable_debug, "_4.png") - _, threshold2 = cv2.threshold(threshold1, 0, 255, cv2.THRESH_BINARY_INV) - cv2ext.write_image_if(threshold2, enable_debug, "_5.png") + thresholdi = cv2ext.threshold_from_gaussian_histogram_white(dilated) + _, threshold = cv2.threshold( + dilated, thresholdi, 255, cv2.THRESH_BINARY_INV + ) + cv2ext.write_image_if(threshold, enable_debug, "_4.png") morpho1 = cv2.morphologyEx( - threshold2, + threshold, cv2.MORPH_CLOSE, cv2.getStructuringElement( cv2.MORPH_ELLIPSE, param.kernel_morphology_size @@ -161,9 +156,7 @@ def find_images( contours, ) all_polygon = map( - lambda cnt: cv2.approxPolyDP( - cnt, param.epsilon_approx_poly * cv2ext.get_hw(image)[0], True - ), + lambda cnt: cv2.approxPolyDP(cnt, param.erode_size, True), big_images, ) @@ -175,18 +168,13 @@ def find_images( debug_image_mask = cv2.drawContours( debug_image_mask, [contour], -1, (255, 0, 0), -1 ) - img_mask_erodei = np.zeros(image.shape, np.uint8) img_mask_erodei = cv2.drawContours( - img_mask_erodei, [contour], -1, (255, 0, 0), -1 + np.zeros(image.shape, np.uint8), [contour], -1, (255, 0, 0), -1 ) - bounding_rect = cv2.boundingRect(contour) img_mask_erodei = cv2.erode( img_mask_erodei, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)), - iterations=int( - np.minimum(bounding_rect[2], bounding_rect[3]) - * param.erode_pourcent_size - ), + iterations=param.erode_size, ) img_mask_erode = cv2.bitwise_or(img_mask_erode, img_mask_erodei) diff --git a/page/split.py b/page/split.py index ca751f5..99be607 100644 --- a/page/split.py +++ b/page/split.py @@ -301,12 +301,12 @@ class Impl(types.SimpleNamespace): blur_size: Tuple[int, int] = (10, 10) canny: CannyParameters = CannyParameters(25, 255, 5) hough_lines: HoughLinesParameters = HoughLinesParameters( - 1, np.pi / (180 * 40), 150, 200, 150 + 1, np.pi / (180 * 20), 150, 200, 150 ) delta_rho: int = 200 delta_tetha: float = 20.0 find_images: FindImageParameters = FindImageParameters( - 0.02, (10, 10), (10, 10), (10, 10), 0.005, 0.01 + 5, (10, 10), (10, 10), (10, 10), 0.01 ) find_candidates: FindCandidatesSplitLineWithWaveParameters = ( FindCandidatesSplitLineWithWaveParameters( @@ -395,33 +395,30 @@ def init_default_values( def __found_candidates_split_line_with_line( image: Any, + images_mask: Any, param: FindCandidatesSplitLineWithLineParameters, enable_debug: Optional[str] = None, ) -> List[Tuple[int, int, int, int]]: - blurimg = cv2ext.force_image_to_be_grayscale(image, param.blur_size, False) - cv2ext.write_image_if(blurimg, enable_debug, "_2_2a.png") - blurimg_bc = cv2ext.apply_brightness_contrast(blurimg, -96, 64) - cv2ext.write_image_if(blurimg_bc, enable_debug, "_2_2b.png") - threshold = blurimg_bc & 0b11000000 - cv2ext.write_image_if(threshold, enable_debug, "_2_3.png") - erode_dilate = cv2ext.erode_and_dilate( - threshold, param.erode.size, param.erode.iterations - ) - cv2ext.write_image_if(erode_dilate, enable_debug, "_2_4.png") + xxx = 7 + blurimg = cv2ext.force_image_to_be_grayscale(image, (xxx, xxx), True) + cv2ext.write_image_if(blurimg, enable_debug, "_2_1.png") + erode_dilate = cv2ext.erode_and_dilate(blurimg, (xxx, xxx), xxx) + cv2ext.write_image_if(erode_dilate, enable_debug, "_2_2.png") canny = cv2.Canny( erode_dilate, param.canny.minimum, param.canny.maximum, apertureSize=param.canny.aperture_size, ) - cv2ext.write_image_if(canny, enable_debug, "_2_5.png") + cv2ext.write_image_if(canny, enable_debug, "_2_3.png") + list_lines_p = cv2.HoughLinesP( canny, param.hough_lines.delta_rho, param.hough_lines.delta_tetha, param.hough_lines.threshold, minLineLength=param.hough_lines.min_line_length, - maxLineGap=param.hough_lines.max_line_gap, + maxLineGap=xxx ** 2, ) if enable_debug is not None: cv2.imwrite( @@ -449,7 +446,21 @@ def __found_candidates_split_line_with_line( image, lines_valid, (0, 0, 255), 1 ), ) - return list(map(lambda p: p[0], lines_valid)) + lines_valid2 = list( + filter( + lambda x: cv2ext.is_line_not_cross_images(x[0], images_mask), + lines_valid, + ) + ) + if enable_debug is not None: + cv2.imwrite( + enable_debug + "_2_8.png", + cv2ext.draw_lines_from_hough_lines( + image, lines_valid2, (0, 0, 255), 1 + ), + ) + + return list(map(lambda p: p[0], lines_valid2)) def __loop_to_find_best_mean_angle_pos( @@ -458,6 +469,9 @@ def __loop_to_find_best_mean_angle_pos( ecart: int, valid_lines: List[Tuple[int, int, int, int]], ) -> Tuple[float, int]: + if ecart % 2 == 0: + ecart = ecart + 1 + histogram_posx_length = histogram_posx_maxy - histogram_posx_miny histogram_posx_length_blur = cv2.GaussianBlur( @@ -523,6 +537,7 @@ def __best_candidates_split_line_with_line( def found_split_line_with_line( image: Any, + images_found: Any, param: FoundSplitLineWithLineParameters, enable_debug: Optional[str] = None, ) -> Tuple[float, int]: @@ -533,6 +548,7 @@ def found_split_line_with_line( valid_lines.extend( __found_candidates_split_line_with_line( image, + images_found, FindCandidatesSplitLineWithLineParameters( param.blur_size, param.canny, @@ -684,26 +700,11 @@ def found_split_line_with_wave( page_angle: Optional[float], enable_debug: Optional[str] = None, ) -> Tuple[float, int]: - cv2ext.write_image_if(image, enable_debug, "_1.png") - blurimg = cv2ext.force_image_to_be_grayscale( - image, parameters.blur_size, False - ) - cv2ext.write_image_if(blurimg, enable_debug, "_2a.png") - blurimg_bc = cv2ext.apply_brightness_contrast(blurimg, -96, 64) - cv2ext.write_image_if(blurimg_bc, enable_debug, "_2b.png") - blurimg_equ = cv2.equalizeHist(blurimg_bc) - cv2ext.write_image_if(blurimg_equ, enable_debug, "_2c.png") - _, threshold = cv2.threshold( - blurimg_equ, - cv2ext.threshold_from_gaussian_histogram(blurimg_equ), - 255, - cv2.THRESH_BINARY, - ) - cv2ext.write_image_if(threshold, enable_debug, "_3.png") - erode_dilate = cv2ext.erode_and_dilate( - threshold, parameters.erode.size, parameters.erode.iterations - ) - cv2ext.write_image_if(erode_dilate, enable_debug, "_4.png") + xxx = 7 + blurimg = cv2ext.force_image_to_be_grayscale(image, (xxx, xxx), True) + cv2ext.write_image_if(blurimg, enable_debug, "_0.png") + erode_dilate = cv2ext.erode_and_dilate(blurimg, (xxx, xxx), xxx) + cv2ext.write_image_if(erode_dilate, enable_debug, "_1.png") size_border = 20 eroded_bordered = cv2.copyMakeBorder( erode_dilate, @@ -714,11 +715,19 @@ def found_split_line_with_wave( cv2.BORDER_CONSTANT, value=[0], ) - cv2ext.write_image_if(eroded_bordered, enable_debug, "_4a.png") + cv2ext.write_image_if(eroded_bordered, enable_debug, "_2.png") + dilated = cv2.dilate( + eroded_bordered, + cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (xxx, xxx)), + ) + cv2ext.write_image_if(dilated, enable_debug, "_3.png") + thresholdi = cv2ext.threshold_from_gaussian_histogram_white(dilated) + _, threshold1 = cv2.threshold(dilated, thresholdi, 255, cv2.THRESH_BINARY) + cv2ext.write_image_if(threshold1, enable_debug, "_4.png") # On cherche tous les contours contours, _ = cv2.findContours( - eroded_bordered, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE + threshold1, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE ) # pour ne garder que le plus grand. Normalement, cela doit ĂȘtre celui # qui fait le contour des pages diff --git a/page/unskew.py b/page/unskew.py index 2f6af32..8912f88 100644 --- a/page/unskew.py +++ b/page/unskew.py @@ -18,11 +18,10 @@ class Impl(types.SimpleNamespace): 1, np.pi / (180 * 20), 70, 300, 90 ) find_images: FindImageParameters = FindImageParameters( - 0.005, + 5, (10, 10), (10, 10), (10, 10), - 8, 0.01, ) @@ -82,10 +81,11 @@ def found_angle_unskew_page( histogram_blur_top_sorted = sorted( histogram_blur_top, key=lambda x: ( - 1. - - 2. + 1.0 + - 2.0 * np.absolute( - compute.norm_cdf(x * delta_angle, approximate_angle, 10.) - 0.5 + compute.norm_cdf(x * delta_angle, approximate_angle, 10.0) + - 0.5 ) ) * histogram_blur[x], @@ -173,19 +173,11 @@ def find_rotation( ) cv2.imwrite(enable_debug + "_" + str(n_page) + "_4bbbb.png", img) - lines_filtered = [] - for line_x1, line_y1, line_x2, line_y2 in lines: - image_line = np.zeros(images_mask.shape, np.uint8) - cv2.line( - image_line, - (line_x1, line_y1), - (line_x2, line_y2), - (255, 255, 255), - 1, + lines_filtered = list( + filter( + lambda x: cv2ext.is_line_not_cross_images(x, images_mask), lines ) - image_line = cv2.bitwise_and(images_mask, image_line) - if cv2.countNonZero(image_line) == 0: - lines_filtered.append((line_x1, line_y1, line_x2, line_y2)) + ) if enable_debug is not None: image_with_lines = cv2ext.convertion_en_couleur(image) diff --git a/script.py b/script.py index 723e85a..fcba9ad 100644 --- a/script.py +++ b/script.py @@ -30,6 +30,12 @@ def split_two_waves( parameters: SplitTwoWavesParameters, enable_debug: Optional[str], ) -> Tuple[Any, Any]: + self.__images_found = page.find_images.find_images( + image, + parameters.find_images, + None, + compute.optional_concat(enable_debug, "_0"), + ) param1 = FoundSplitLineWithLineParameters( parameters.blur_size, parameters.erode, @@ -39,7 +45,10 @@ def split_two_waves( parameters.delta_tetha, ) first = page.split.found_split_line_with_line( - image, param1, compute.optional_concat(enable_debug, "_1") + image, + self.__images_found, + param1, + compute.optional_concat(enable_debug, "_1"), ) param2 = FoundSplitLineWithWave( @@ -286,6 +295,7 @@ def treat_file( __output: PrintInterface __angle_split: float + __images_found: Any def get_absolute_from_current_path(root: str, filename: str) -> str: diff --git a/tests b/tests index 297281c..61d6ab7 160000 --- a/tests +++ b/tests @@ -1 +1 @@ -Subproject commit 297281c1f39cd208a542b974bf33bb8b8767ae74 +Subproject commit 61d6ab74707a3d3c74a63535f6ea301f39931046