Skip to content

Commit

Permalink
Change User-Agent if it's default for IA & IS
Browse files Browse the repository at this point in the history
  • Loading branch information
Moh Tur committed Jul 18, 2020
1 parent 13ecf14 commit b4e88b2
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 11 deletions.
2 changes: 1 addition & 1 deletion archivenow/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '2020.4.1.10.34.36'
__version__ = '2020.7.18.12.19.44'
2 changes: 1 addition & 1 deletion archivenow/archivenow.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

#from __init__ import __version__ as archiveNowVersion

archiveNowVersion = '2020.4.1.10.34.36'
archiveNowVersion = '2020.7.18.12.19.44'

# archive handlers path
PATH = Path(os.path.dirname(os.path.abspath(__file__)))
Expand Down
6 changes: 4 additions & 2 deletions archivenow/handlers/ia_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ def push(self, uri_org, p_args=[], session=requests.Session()):
msg = ''
try:
uri = 'https://web.archive.org/save/' + uri_org
archiveTodayUserAgent = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64)"}
archiveTodayUserAgent = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"
}
# push into the archive
# r = session.get(uri, timeout=120, allow_redirects=True, headers=archiveTodayUserAgent)

if 'user-agent' in session.headers:
if ('user-agent' in session.headers) and (not session.headers['User-Agent'].lower().startswith('python-requests/')):
r = session.get(uri, timeout=120, allow_redirects=True)
else:
r = session.get(uri, timeout=120, allow_redirects=True, headers=archiveTodayUserAgent)
Expand Down
19 changes: 14 additions & 5 deletions archivenow/handlers/is_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,11 @@ def push(self, uri_org, p_args=[], session=requests.Session()):

msg = ''

archiveTodayUserAgent = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64)" , "host": host}

if 'user-agent' in session.headers:
archiveTodayUserAgent = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36",
"host": host}

if ('user-agent' in session.headers) and (not session.headers['User-Agent'].lower().startswith('python-requests/')):
rid = session.get('https://'+host+'/',timeout=120, allow_redirects=True)
else:
rid = session.get('https://'+host+'/',timeout=120, allow_redirects=True, headers=archiveTodayUserAgent)
Expand All @@ -55,10 +57,17 @@ def push(self, uri_org, p_args=[], session=requests.Session()):
msg = "IndexError (" + self.name+ "): unable to extract 'submitid' "
raise

# push to the archive
r = session.post('https://'+host+'/submit/', timeout=120,
# push to the archive
if ('user-agent' in session.headers) and (not session.headers['User-Agent'].lower().startswith('python-requests/')):
r = session.post('https://'+host+'/submit/', timeout=120,
data={"anyway":"1" , "url":uri_org, "submitid":archiveTodaySubmitId},
allow_redirects=True)
else:
r = session.post('https://'+host+'/submit/', timeout=120,
data={"anyway":"1" , "url":uri_org, "submitid":archiveTodaySubmitId},
allow_redirects=True,
headers=archiveTodayUserAgent)


r.raise_for_status()
# extract the link to the archived copy
Expand Down
4 changes: 3 additions & 1 deletion archivenow/handlers/mg_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@



new_header = 'Mozilla/5.0 (X11; Linux x86_64; rv:62.0) Gecko/20100101 Firefox/62.0'
new_header = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'



class MG_handler(object):

Expand Down
2 changes: 1 addition & 1 deletion archivenow/handlers/st_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import requests
import re

new_header = 'Mozilla/5.0 (X11; Linux x86_64; rv:62.0) Gecko/20100101 Firefox/62.0'
new_header = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'

headers = {'User-Agent': new_header}

Expand Down

0 comments on commit b4e88b2

Please sign in to comment.