forked from certsocietegenerale/FIR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathurl.py
executable file
·36 lines (30 loc) · 884 Bytes
/
url.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import re
from fir_artifacts.artifacts import AbstractArtifact
class URL(AbstractArtifact):
key = 'url'
display_name = 'URLs'
regex = r"""
(?P<search>
((?P<scheme>[\w]{2,9}):\/\/)?
([\S]*\:[\S]*\@)?
(?P<hostname>(
((([\w\-]+\.)+)
([a-zA-Z]{2,6}))
|([\d+]{1,3}\.[\d+]{1,3}\.[\d+]{1,3}\.[\d+]{1,3})
)
)
(\:[\d]{1,5})?
(?P<path>(\/[\/\~\w\-_%\.\*\#\$&%]*)?
(\?[\~\w\-_%\.&=\*\#\$%]*)?
(\#[\S]*)?)
)
"""
@classmethod
def find(cls, data):
urls = []
_re = re.compile(cls.regex, re.VERBOSE)
for i in re.finditer(_re, data):
url = i.group('search')
if url.find('/') != -1:
urls.append(url)
return urls