Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Offline pages parser #322

Merged
merged 1 commit into from
Jan 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Offline pages parser
Find .mht & .mhtml files and extract mime metadata and link to file in the report.
  • Loading branch information
abrignoni committed Jan 26, 2023
commit a48ee7f2c8577d647e0a73ac091b9d22d693e36a
45 changes: 45 additions & 0 deletions scripts/artifacts/offlinePages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import datetime
import email
import os

from scripts.artifact_report import ArtifactHtmlReport
from scripts.ilapfuncs import logfunc, tsv, is_platform_windows, media_to_html

def get_offlinePages(files_found, report_folder, seeker, wrap_text):

data_list = []

for file_found in files_found:
file_found = str(file_found)

modified_time = os.path.getmtime(file_found)
utc_modified_date = datetime.datetime.utcfromtimestamp(modified_time)

with open(file_found,'r', errors='replace') as fp:
message = email.message_from_file(fp)
sourced = (message['Snapshot-Content-Location'])
subjectd = (message['Subject'])
dated = (message['Date'])
media = media_to_html(file_found, files_found, report_folder)

data_list.append((utc_modified_date, media, sourced, subjectd, dated, file_found))

if len(data_list) > 0:
note = 'Source location in extraction found in the report for each item.'
report = ArtifactHtmlReport('Offline Pages')
report.start_artifact_report(report_folder, f'Offline Pages')
report.add_script()
data_headers = ('Timestamp Modified', 'File', 'Web Source', 'Subject', 'MIME Date', 'Source in Extraction')
report.write_artifact_data_table(data_headers, data_list, note, html_no_escape=['File'])
report.end_artifact_report()

tsvname = f'Offline Pages'
tsv(report_folder, data_headers, data_list, tsvname)

__artifacts__ = {
"pages": (
"Offline Pages",
('*/*.mhtml', '*/*.mht'),
get_offlinePages)
}

1 change: 1 addition & 0 deletions scripts/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def get_icon_name(category, artifact):
if artifact.find('CALENDAR - EVENTS') >=0: icon = 'calendar'
else: icon = 'calendar'
elif category == 'CALL LOGS': icon = 'phone'
elif category == 'OFFLINE PAGES': icon = 'cloud-off'
elif category == 'CASH APP': icon = 'credit-card'
elif category == 'CAST': icon = 'cast'
elif category == 'CHATS': icon = 'message-circle'
Expand Down