Skip to content

Commit

Permalink
Add --since flag to only get recent messages (#202)
Browse files Browse the repository at this point in the history
* Add --since flag to only get recent messages

* Support --since flag for the CLI
  • Loading branch information
volker-fr authored Nov 4, 2024
1 parent 5902931 commit 9db95ae
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 30 deletions.
9 changes: 6 additions & 3 deletions slackviewer/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,12 @@ def get_export_info(archive_name):
extracted_path = extract_archive(archive_name)
base_filename = basename(archive_name)
(noext_filename, _) = splitext(base_filename)
# Typical extract name: "My Friends and Family Slack export Jul 21 2018 - Sep 06 2018"
# If that's not the format, we will just fall back to the extension-free filename.
(workspace_name, _) = noext_filename.split(" Slack export ", 1)
workspace_name = base_filename
# In case the archive is a zip file
if not os.path.isdir(extracted_path):
# Typical extract name: "My Friends and Family Slack export Jul 21 2018 - Sep 06 2018"
# If that's not the format, we will just fall back to the extension-free filename.
(workspace_name, _) = noext_filename.split(" Slack export ", 1)
return {
"readable_path": extracted_path,
"basename": base_filename,
Expand Down
12 changes: 9 additions & 3 deletions slackviewer/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,16 @@ def clean(wet):


@cli.command(help="Generates a single-file printable export for an archive file or directory")
@click.option('--debug', is_flag=True, default=flag_ennvar("FLASK_DEBUG"))
@click.option("--since", default=None, type=click.DateTime(formats=["%Y-%m-%d"]),
help="Only show messages since this date.")
@click.argument('archive_dir')
def export(archive_dir):

def export(archive_dir, debug, since):
css = pkgutil.get_data('slackviewer', 'static/viewer.css').decode('utf-8')
tmpl = Environment(loader=PackageLoader('slackviewer')).get_template("export_single.html")
export_file_info = get_export_info(archive_dir)
r = Reader(export_file_info["readable_path"])
r = Reader(export_file_info["readable_path"], debug, since)
channel_list = sorted(
[{"channel_name": k, "messages": v} for (k, v) in r.compile_channels().items()],
key=lambda d: d["channel_name"]
Expand All @@ -51,5 +55,7 @@ def export(archive_dir):
source_file=export_file_info["basename"],
channels=channel_list
)
with open(export_file_info['stripped_name'] + '.html', 'w') as outfile:
with open(export_file_info['stripped_name'] + '.html', 'wb') as outfile:
outfile.write(html.encode('utf-8'))

print("Exported to {}.html".format(export_file_info['stripped_name']))
47 changes: 28 additions & 19 deletions slackviewer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
from slackviewer.reader import Reader
from slackviewer.freezer import CustomFreezer
from slackviewer.utils.click import envvar, flag_ennvar

def configure_app(app, archive, channels, no_sidebar, no_external_references, debug):


def configure_app(app, archive, channels, no_sidebar, no_external_references, debug, since):
app.debug = debug
app.no_sidebar = no_sidebar
app.no_external_references = no_external_references
Expand All @@ -19,7 +20,7 @@ def configure_app(app, archive, channels, no_sidebar, no_external_references, de
app.config["PROPAGATE_EXCEPTIONS"] = True

path = extract_archive(archive)
reader = Reader(path)
reader = Reader(path, debug, since)

top = flask._app_ctx_stack
top.path = path
Expand All @@ -30,6 +31,12 @@ def configure_app(app, archive, channels, no_sidebar, no_external_references, de
top.mpims = reader.compile_mpim_messages()
top.mpim_users = reader.compile_mpim_users()

# remove any empty channels & groups. DM's are needed for now
# since the application loads the first
top.channels = {k: v for k, v in top.channels.items() if v}
top.groups = {k: v for k, v in top.groups.items() if v}


@click.command()
@click.option('-p', '--port', default=envvar('SEV_PORT', '5000'),
type=click.INT, help="Host port to serve your content on")
Expand Down Expand Up @@ -57,32 +64,34 @@ def configure_app(app, archive, channels, no_sidebar, no_external_references, de
@click.option('--debug', is_flag=True, default=flag_ennvar("FLASK_DEBUG"))
@click.option("-o", "--output-dir", default="html_output", type=click.Path(),
help="Output directory for static HTML files.")
@click.option("--html-only", is_flag=True, default=False,
@click.option("--html-only", is_flag=True, default=False,
help="If you want static HTML only, set this.")
@click.option("--since", default=None, type=click.DateTime(formats=["%Y-%m-%d"]),
help="Only show messages since this date.")

def main(
port,
archive,
ip,
no_browser,
channels,
no_sidebar,
no_external_references,
test,
debug,
output_dir,
html_only
):
port,
archive,
ip,
no_browser,
channels,
no_sidebar,
no_external_references,
test,
debug,
output_dir,
html_only,
since,
):
if not archive:
raise ValueError("Empty path provided for archive")

configure_app(app, archive, channels, no_sidebar, no_external_references, debug)
configure_app(app, archive, channels, no_sidebar, no_external_references, debug, since)

if html_only:

# We need relative URLs, otherwise channel refs do not work
app.config["FREEZER_RELATIVE_URLS"] = True

# Custom subclass of Freezer allows overwriting the output directory
freezer = CustomFreezer(app)
freezer.cf_output_dir = output_dir
Expand Down
10 changes: 9 additions & 1 deletion slackviewer/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import emoji


class Message(object):

_DEFAULT_USER_ICON_SIZE = 72
Expand All @@ -12,6 +13,13 @@ def __init__(self, formatter, message):
self._formatter = formatter
self._message = message

def __repr__(self):
message = self._message.get("text")
if message and len(message) > 20:
message = message[:20] + "..."

return f"<Message({self.username}@{self.time}: {message})>"

##############
# Properties #
##############
Expand Down Expand Up @@ -116,7 +124,7 @@ def subtype(self):
return self._message.get("subtype")


class LinkAttachment(object):
class LinkAttachment():
"""
Wrapper class for entries in either the "files" or "attachments" arrays.
"""
Expand Down
77 changes: 73 additions & 4 deletions slackviewer/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import io
import json
import os
import datetime
import sys

from slackviewer.formatter import SlackFormatter
from slackviewer.message import Message
Expand All @@ -15,8 +17,10 @@ class Reader(object):
Reader object will read all of the archives' data from the json files
"""

def __init__(self, PATH):
def __init__(self, PATH, debug, since):
self._PATH = PATH
self._debug = debug
self._since = since
# TODO: Make sure this works
with io.open(os.path.join(self._PATH, "users.json"), encoding="utf8") as f:
self.__USER_DATA = {u["id"]: User(u) for u in json.load(f)}
Expand Down Expand Up @@ -178,11 +182,11 @@ def _create_messages(self, names, data, isDms=False):
for day in sorted(day_files):
with io.open(os.path.join(self._PATH, day), encoding="utf8") as f:
# loads all messages
day_messages = json.load(f)
day_messages = json.load(f)

# sorts the messages in the json file
day_messages.sort(key=Reader._extract_time)
day_messages.sort(key=Reader._extract_time)

messages.extend([Message(formatter, d) for d in day_messages])

chats[name] = messages
Expand Down Expand Up @@ -261,6 +265,10 @@ def _build_threads(self, channel_data):
if isinstance(item, Message):
data_with_sorted_threads.append(item)
channel_data[channel_name] = data_with_sorted_threads.copy()

if self._since:
channel_data = self._message_filter_timeframe(channel_data.copy())

return channel_data

def _read_from_json(self, file):
Expand All @@ -279,3 +287,64 @@ def _read_from_json(self, file):
return {u["id"]: u for u in json.load(f)}
except IOError:
return {}

def _message_filter_timeframe(self, channel_data):
"""
It might be more efficient to filter the messages in the thread sorting
loop. Yet, this is a more straightforward approach, especially factoring
in the thread/non-thread message ids etc.
Messages & threads need to be provided in a sorted form
"""
for channel in channel_data.keys():
messages_in_thread = []
last_thread_message_in_timeframe = False
delete_messages = []

for location, message in enumerate(channel_data[channel]):
is_msg_in_timeframe = self._message_in_timeframe(message)
msg_text = message._message.get('text')

# Message can be empty
if not msg_text:
is_thread_msg = False
else:
is_thread_msg = msg_text.startswith("**Thread Reply:**")

# new main message
if not is_thread_msg:
if not last_thread_message_in_timeframe:
delete_messages.extend(messages_in_thread)
messages_in_thread = [location]
# Thread message
else:
if last_thread_message_in_timeframe and not is_msg_in_timeframe:
print("ERROR: This should never happen. sorting is broken...")
sys.exit(1)

messages_in_thread.append(location)

last_thread_message_in_timeframe = is_msg_in_timeframe

# Last thread/message...
if not last_thread_message_in_timeframe:
delete_messages.extend(messages_in_thread)

# Remove all messages that are not in the timeframe
for loc in sorted(delete_messages, reverse=True):
del channel_data[channel][loc]

return channel_data


def _message_in_timeframe(self, msg):
"""
Returns true if message timestamp is older as since
"""
if not self._since:
return True

ts = msg._message.get('ts')
ts_obj = datetime.datetime.fromtimestamp(float(ts))

return self._since < ts_obj

0 comments on commit 9db95ae

Please sign in to comment.