Skip to content

Commit

Permalink
feat(*): env var TABLE_TO_IMAGE
Browse files Browse the repository at this point in the history
to determine should HTML tables be converted to image or just be dropped

Signed-off-by: Rongrong <15956627+Rongronggg9@users.noreply.github.com>
  • Loading branch information
Rongronggg9 committed Mar 21, 2022
1 parent e838987 commit 1498a74
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 29 deletions.
1 change: 1 addition & 0 deletions .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,6 @@ TELEGRAPH_TOKEN="
#R_PROXY=socks5://172.17.0.1:1080 # Proxy used to fetch feeds
#PROXY_BYPASS_PRIVATE=1 # default: 0
#PROXY_BYPASS_DOMAINS=example.com;example.net
#TABLE_TO_IMAGE=1 # default: 0
#DEBUG=1 # debug logging, default: 0
# ↑------ Advanced settings ------↑ #
1 change: 1 addition & 0 deletions docker-compose.yml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,6 @@ services:
#- R_PROXY=socks5://172.17.0.1:1080 # Proxy used to fetch feeds
#- PROXY_BYPASS_PRIVATE=1 # default: 0
#- PROXY_BYPASS_DOMAINS=example.com;example.net
#- TABLE_TO_IMAGE=1 # default: 0
#- DEBUG=1 # debug logging, default: 0
# ↑------ Advanced settings ------↑ #
1 change: 1 addition & 0 deletions docs/advanced-settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
| `IMG_RELAY_SERVER` | Media relay server URL | `https://images.weserv.nl/?url=` | `https://rsstt-img-relay.rongrong.workers.dev/` |
| `IMAGES_WESERV_NL` | images.weserv.nl URL | `https://t0.nl/` | `https://images.weserv.nl/` |
| `DATABASE_URL` | Database URL [^5] | `postgres://user:pass@example.com:5432/table` | `sqlite://config/db.sqlite3?journal_mode=OFF` |
| `TABLE_TO_IMAGE` | Convert tables to image (causing high CPU usage) or just drop them? | `1` | `0` |
| `DEBUG` | Enable debug logging or not? | `1` | `0` |

## Manager options
Expand Down
3 changes: 2 additions & 1 deletion src/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,9 @@ def __list_parser(var: Optional[str]) -> list[str]:
else _database_url)
del _database_url

# ----- debug config -----
# ----- misc config -----
DEBUG: Final = __bool_parser(os.environ.get('DEBUG'))
TABLE_TO_IMAGE: Final = __bool_parser(os.environ.get('TABLE_TO_IMAGE'))

# ----- environment config -----
RAILWAY_STATIC_URL: Final = os.environ.get('RAILWAY_STATIC_URL')
Expand Down
5 changes: 3 additions & 2 deletions src/parsing/html_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from urllib.parse import urlparse
from attr import define

from src import web
from src import web, env
from .medium import Video, Image, Media, Animation, Audio, UploadedImage
from .html_node import *
from .utils import stripNewline, stripLineEnd, isAbsoluteHttpLink, resolve_relative_link, emojify, is_emoticon
Expand Down Expand Up @@ -88,7 +88,8 @@ async def _parse_item(self, soup: Union[PageElement, BeautifulSoup, Tag, Navigab
for row in rows:
columns = row.findAll(('td', 'th'))
if len(columns) != 1:
self.media.add(UploadedImage(convert_table_to_png(str(soup))))
if env.TABLE_TO_IMAGE:
self.media.add(UploadedImage(convert_table_to_png(str(soup))))
return None
row_content = await self._parse_item(columns[0])
if row_content:
Expand Down
61 changes: 35 additions & 26 deletions src/parsing/table_drawer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
plt.rcParams['axes.unicode_minus'] = False

filterwarnings('error', 'constrained_layout not applied', UserWarning)

filterwarnings('ignore', "coroutine 'convert_table_to_png' was never awaited", RuntimeWarning)

def _convert_table_to_png(table_html: str) -> Optional[BytesIO]:
soup = BeautifulSoup(table_html, 'lxml')
Expand Down Expand Up @@ -87,40 +87,49 @@ def _convert_table_to_png(table_html: str) -> Optional[BytesIO]:

auto_set_column_width_flag = True
for tries in range(2):
# draw table
fig, ax = plt.subplots(figsize=(8, 8))
table = ax.table(cellText=cell_texts,
rowLabels=row_labels or None,
colLabels=column_labels or None,
loc='center',
cellLoc='center',
rowLoc='center')
row_heights = defaultdict(lambda: 0)
if auto_set_column_width_flag:
table.auto_set_column_width(tuple(range(max_columns)))
# set row height
for xy, cell in table.get_celld().items():
text = cell.get_text().get_text()
text = fill(text.strip(), wrap_length)
cell.get_text().set_text(text)
row_heights[xy[0]] = max(
cell.get_height() * (text.count('\n') + 1) * 0.75 + cell.get_height() * 0.25,
row_heights[xy[0]]
)
for xy, cell in table.get_celld().items():
cell.set_height(row_heights[xy[0]])
fig.set_constrained_layout(True)
ax.axis('off')
buffer = BytesIO()
try:
# draw table
fig, ax = plt.subplots(figsize=(8, 8))
table = ax.table(cellText=cell_texts,
rowLabels=row_labels or None,
colLabels=column_labels or None,
loc='center',
cellLoc='center',
rowLoc='center')
row_heights = defaultdict(lambda: 0)
if auto_set_column_width_flag:
table.auto_set_column_width(tuple(range(max_columns)))
# set row height
for xy, cell in table.get_celld().items():
text = cell.get_text().get_text()
text = fill(text.strip(), wrap_length)
cell.get_text().set_text(text)
row_heights[xy[0]] = max(
cell.get_height() * (text.count('\n') + 1) * 0.75 + cell.get_height() * 0.25,
row_heights[xy[0]]
)
for xy, cell in table.get_celld().items():
cell.set_height(row_heights[xy[0]])
fig.set_constrained_layout(True)
ax.axis('off')
buffer = BytesIO()
fig.savefig(buffer, format='png', dpi=200)
except UserWarning:
# if auto_set_column_width_flag:
# auto_set_column_width_flag = False # oops, overflowed!
# continue # once a figure is exported, some stuff may be frozen, so we need to re-create the table
return None
except Exception as e:
raise e
finally:
# noinspection PyBroadException
try:
plt.close()
except Exception:
pass

# crop
# noinspection PyUnboundLocalVariable
image = Image.open(buffer)
ori_width, ori_height = image.size
upper = left = float('inf')
Expand Down

0 comments on commit 1498a74

Please sign in to comment.