Skip to content

Commit

Permalink
Applying nice styles.
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastian-code committed Apr 11, 2020
1 parent 3af9540 commit a1de7a4
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 42 deletions.
81 changes: 57 additions & 24 deletions bootcamp/news/metadatareader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,31 @@
from bs4 import BeautifulSoup, Comment
from urllib.parse import urljoin


class Metadata:
url = ""
type = "" # https://ogp.me/#types
type = "" # https://ogp.me/#types
title = ""
description = ""
image = ""

def __str__(self):
return "{url: " + self.url + ", type: " + self.type + ", title: " + self.title + ", description: " + self.description + ", image: " + self.image + "}"
return (
"{url: "
+ self.url
+ ", type: "
+ self.type
+ ", title: "
+ self.title
+ ", description: "
+ self.description
+ ", image: "
+ self.image
+ "}"
)

class Metadatareader:

class Metadatareader:
@staticmethod
def get_metadata_from_url_in_text(text):
# look for the first url in the text
Expand Down Expand Up @@ -48,13 +61,21 @@ def get_url_metadata(url):
for meta in soup.findAll("meta"):
# priorize using Open Graph Protocol
# https://ogp.me/
metadata.type = Metadatareader.get_meta_property(meta, "og:type", metadata.type)
metadata.title = Metadatareader.get_meta_property(meta, "og:title", metadata.title)
metadata.description = Metadatareader.get_meta_property(meta, "og:description", metadata.description)
metadata.image = Metadatareader.get_meta_property(meta, "og:image", metadata.image)
metadata.type = Metadatareader.get_meta_property(
meta, "og:type", metadata.type
)
metadata.title = Metadatareader.get_meta_property(
meta, "og:title", metadata.title
)
metadata.description = Metadatareader.get_meta_property(
meta, "og:description", metadata.description
)
metadata.image = Metadatareader.get_meta_property(
meta, "og:image", metadata.image
)
if metadata.image:
metadata.image = urljoin(url, metadata.image)

if not metadata.title and soup.title:
# use page title
metadata.title = soup.title.text
Expand All @@ -68,28 +89,36 @@ def get_url_metadata(url):
if not metadata.description and soup.body:
# use text from body
for text in soup.body.find_all(string=True):
if text.parent.name != "script" and text.parent.name != "style" and not isinstance(text, Comment):
if (
text.parent.name != "script"
and text.parent.name != "style"
and not isinstance(text, Comment)
):
metadata.description += text

if metadata.description:
# remove white spaces and break lines
metadata.description = re.sub("\n|\r|\t", " ", metadata.description)
metadata.description = re.sub(" +", " ", metadata.description)
metadata.description = metadata.description.strip()

return metadata

@staticmethod
def get_final_url(url, timeout=5):
# get final url after all redirections
# get http response header
# look for the "Location: " header
proc = subprocess.Popen([
"curl",
"-Ls",#follow redirect 301 and silently
"-I",#don't download html body
url
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
proc = subprocess.Popen(
[
"curl",
"-Ls", # follow redirect 301 and silently
"-I", # don't download html body
url,
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
try:
out, err = proc.communicate(timeout=timeout)
except TimeoutExpired:
Expand All @@ -104,13 +133,17 @@ def get_final_url(url, timeout=5):
@staticmethod
def get_url_content(url, timeout=5):
# get url html
proc = subprocess.Popen([
"curl",
"-i",
"-k",#ignore ssl certificate requisite
"-L",#follow redirect 301
url
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
proc = subprocess.Popen(
[
"curl",
"-i",
"-k", # ignore ssl certificate requisite
"-L", # follow redirect 301
url,
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
try:
out, err = proc.communicate(timeout=timeout)
except TimeoutExpired:
Expand All @@ -122,4 +155,4 @@ def get_url_content(url, timeout=5):
def get_meta_property(meta, property_name, default_value=""):
if "property" in meta.attrs and meta.attrs["property"] == property_name:
return meta.attrs["content"]
return default_value
return default_value
22 changes: 11 additions & 11 deletions bootcamp/news/migrations/0002_auto_20200405_1227.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,33 @@
class Migration(migrations.Migration):

dependencies = [
('news', '0001_initial'),
("news", "0001_initial"),
]

operations = [
migrations.AddField(
model_name='news',
name='meta_description',
model_name="news",
name="meta_description",
field=models.TextField(max_length=255, null=True),
),
migrations.AddField(
model_name='news',
name='meta_image',
model_name="news",
name="meta_image",
field=models.CharField(max_length=255, null=True),
),
migrations.AddField(
model_name='news',
name='meta_title',
model_name="news",
name="meta_title",
field=models.CharField(max_length=255, null=True),
),
migrations.AddField(
model_name='news',
name='meta_type',
model_name="news",
name="meta_type",
field=models.CharField(max_length=255, null=True),
),
migrations.AddField(
model_name='news',
name='meta_url',
model_name="news",
name="meta_url",
field=models.CharField(max_length=2048, null=True),
),
]
4 changes: 2 additions & 2 deletions bootcamp/news/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@ def __str__(self):
return str(self.content)

def save(self, *args, **kwargs):
#extract metada from content url
# extract metada from content url
metadata = Metadatareader.get_metadata_from_url_in_text(self.content)
self.meta_url = metadata.url[0:2048]
self.meta_type = metadata.type[0:255]
self.meta_title = metadata.title[0:255]
self.meta_description = metadata.description[0:255]
self.meta_image = metadata.image[0:255]

super().save(*args, **kwargs)
if not self.reply:
channel_layer = get_channel_layer()
Expand Down
3 changes: 2 additions & 1 deletion bootcamp/news/templatetags/urlize_target_blank.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

register = template.Library()


@register.filter(is_safe=True, needs_autoescape=True)
@stringfilter
def urlize_target_blank(value, autoescape=None):
return value.replace('<a', '<a target="_blank"')
return value.replace("<a", '<a target="_blank"')
5 changes: 1 addition & 4 deletions bootcamp/news/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,7 @@ def get_thread(request):
news = News.objects.get(pk=news_id)
news_html = render_to_string("news/news_single.html", {"news": news})
thread_html = render_to_string(
"news/news_thread.html", {
"thread": news.get_thread(),
"request": request
}
"news/news_thread.html", {"thread": news.get_thread(), "request": request}
)
return JsonResponse({"uuid": news_id, "news": news_html, "thread": thread_html})

Expand Down

0 comments on commit a1de7a4

Please sign in to comment.