Skip to content

Instantly share code, notes, and snippets.

Last active November 6, 2024 06:20
Show Gist options
  • Save Rust1667/efc055debaf4876e4de39cc0d32f18c1 to your computer and use it in GitHub Desktop.
Save Rust1667/efc055debaf4876e4de39cc0d32f18c1 to your computer and use it in GitHub Desktop.
This is a script to download all rentrys from the FreeMediaHeckYeah Wiki including the base64 page. --- Requirements: You need to have the python package "rentry" installed ("pip install rentry" or download in the same folder with the example.env file from that same repo renamed as ".env"))
import os
import base64
import re
import requests
import rentry
def extract_string_from_url(url):
# Define the regular expression pattern to match the string after the last '/'
pattern = r'https://rentry\.(?:co|org)/([^/]+)$'
# Use to find the match in the URL
match =, url)
if match:
# Group 1 of the match contains the extracted string
extracted_string =
return extracted_string
return None
def extract_rentry_links(text):
# Define a regular expression pattern to match and links
pattern_rentry = r'\(https:\/\/rentry\.(?:co|org)\/[a-zA-Z0-9\-_]+\)'
# Find all matches in the text using the pattern
matches_rentry = re.findall(pattern_rentry, text)
matches = matches_rentry
# Remove the parentheses
links = [match[1:-1] for match in matches]
return links
def download_raw_content(url):
print("\ndownloading from " + url + "...")
response = requests.get(url, timeout=10)
return response.text
def get_main_wiki_rentry_links():
url = ""
text = download_raw_content(url)
rentry_links = extract_rentry_links(text)
return rentry_links
def get_base64_rentry_links():
url = ""
content = download_raw_content(url)
# Decode base64
decoded_content = ""
start = 0
while True:
start = content.find("`", start)
if start == -1:
end = content.find("`", start + 1)
if end == -1:
encoded_string = content[start + 1:end]
decoded_bytes = base64.b64decode(encoded_string)
decoded_content += decoded_bytes.decode("utf-8")
except base64.binascii.Error:
decoded_content += f"`{encoded_string}`"
start = end + 1
decoded_content = decoded_content.replace("http", "\nhttp")
links = []
for line in decoded_content.split("\n"):
if"(rentry\.co|rentry\.org)", line):
return links
def remove_duplicates(strings):
unique_strings = {}
result = []
for s in strings:
s_lower = s.lower()
if s_lower not in unique_strings:
unique_strings[s_lower] = True
print("\nremoved " + str(len(strings) - len(result)) + " duplicates.")
return result
def get_rentry_content_and_title(url):
urlstring = extract_string_from_url(url)
rentry_content_and_title = {
'title': urlstring,
'content': rentry.raw(urlstring)['content']
return rentry_content_and_title
def save_string_to_text_file(string, file_path):
with open(file_path, "w", encoding='utf-8') as f:
print(f"Result saved to {file_path}")
def main():
# Links from the main wiki
main_wiki_rentry_links = get_main_wiki_rentry_links()
print(str(len(main_wiki_rentry_links)) + " links")
for link in main_wiki_rentry_links: print(link)
# Links from base64
base64_rentry_links = get_base64_rentry_links()
print(str(len(base64_rentry_links)) + " links")
for link in base64_rentry_links: print(link)
# Join lists, remove duplicates, count total
links = main_wiki_rentry_links + base64_rentry_links
links = remove_duplicates(links)
print("\n\n" + str(len(links)) + " links total\n\n")
# Confirm download in current directory
print("Current directory: " + os.getcwd() + "\n")
confirm = input(f"Do you want to download {len(links)} markdown files in the current directory? (y/n): ") == "y"
# Download files
if confirm:
for link in links:
r = get_rentry_content_and_title(link)
save_string_to_text_file(r['content'], str(r['title']) + ".md")
if __name__ == "__main__":
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment