Skip to content

Commit

Permalink
Added cookie support
Browse files Browse the repository at this point in the history
  • Loading branch information
Charlie committed Apr 17, 2016
1 parent fb628ef commit 4bb032d
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 9 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ If you wish to login with HTTP Basic Auth then crawl:
./xsscrapy.py -u http://example.com/login_page -l loginname --basic
```

If you wish to use cookies:
```shell
./xsscrapy.py -u http://example.com/login_page --cookie "SessionID=abcdef1234567890"
```

If you wish to limit simultaneous connections to 20:
```shell
./xsscrapy.py -u http://example.com -c 20
Expand Down
10 changes: 7 additions & 3 deletions xsscrapy.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def get_args():
parser.add_argument('-c', '--connections', default='30', help="Set the max number of simultaneous connections allowed, default=30")
parser.add_argument('-r', '--ratelimit', default='0', help="Rate in requests per minute, default=0")
parser.add_argument('--basic', help="Use HTTP Basic Auth to login", action="store_true")
parser.add_argument('-k', '--cookie',help="Cookie key; --cookie SessionID=afgh3193e9103bca9318031bcdf")
args = parser.parse_args()
return args

Expand All @@ -28,9 +29,12 @@ def main():
if rate not in [None, '0']:
rate = str(60 / float(rate))
try:
execute(['scrapy', 'crawl', 'xsscrapy',
'-a', 'url=%s' % args.url, '-a', 'user=%s' % args.login, '-a',
'pw=%s' % args.password, '-a', 'basic=%s' % args.basic,
cookie_key = args.cookie.split('=',1)[0] if args.cookie else None
cookie_value = ''.join(args.cookie.split('=',1)[1:]) if args.cookie else None
execute(['scrapy', 'crawl', 'xsscrapy',
'-a', 'url=%s' % args.url, '-a', 'user=%s' % args.login, '-a',
'pw=%s' % args.password, '-a', 'basic=%s' % args.basic,
'-a', 'cookie_key=%s' % cookie_key, '-a', 'cookie_value=%s' % cookie_value,
'-s', 'CONCURRENT_REQUESTS=%s' % args.connections,
'-s', 'DOWNLOAD_DELAY=%s' % rate])
except KeyboardInterrupt:
Expand Down
36 changes: 30 additions & 6 deletions xsscrapy/spiders/xss_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,24 @@ def __init__(self, *args, **kwargs):
# gruyere or the second cookie delim
self.test_str = '\'"(){}<x>:/'

# Login details
# Login details. Either user or cookie
self.login_user = kwargs.get('user')
self.login_cookie_key = kwargs.get('cookie_key')
self.login_cookie_value = kwargs.get('cookie_value')

# Turn Nones to Nones
if self.login_user == 'None':
self.login_user = None
else:
# Don't hit links with 'logout' in them since self.login_user exists
if self.login_cookie_key == 'None':
self.login_cookie_key = None
if self.login_cookie_value == 'None':
self.login_cookie_value = None

if self.login_user or (self.login_cookie_key and self.login_cookie_value):
# Don't hit links with 'logout' in them since self.login_user or cookies exists
self.rules = (Rule(LinkExtractor(deny=('logout')), callback='parse_resp', follow=True), )

# If password is not set and login user is then get password, otherwise set it
if kwargs.get('pw') == 'None' and self.login_user is not None:
self.login_pass = raw_input("Please enter the password: ")
else:
Expand Down Expand Up @@ -83,11 +94,24 @@ def start_requests(self):
otherwise pass it to the normal callback function '''
if self.login_user and self.login_pass:
if self.basic_auth == 'true':
yield Request(url=self.start_urls[0]) # Take out the callback arg so crawler falls back to the rules' callback
# Take out the callback arg so crawler falls back to the rules' callback
if self.login_cookie_key and self.login_cookie_value:
yield Request(url=self.start_urls[0], cookies={self.login_cookie_key: self.login_cookie_value})
else:
yield Request(url=self.start_urls[0])
else:
yield Request(url=self.start_urls[0], callback=self.login)
if self.login_cookie_key and self.login_cookie_value:
yield Request(url=self.start_urls[0],
cookies={self.login_cookie_key: self.login_cookie_value},
callback=self.login)
else:
yield Request(url=self.start_urls[0], callback=self.login)
else:
yield Request(url=self.start_urls[0]) # Take out the callback arg so crawler falls back to the rules' callback
# Take out the callback arg so crawler falls back to the rules' callback
if self.login_cookie_key and self.login_cookie_value:
yield Request(url=self.start_urls[0], cookies={self.login_cookie_key: self.login_cookie_value})
else:
yield Request(url=self.start_urls[0])

def login(self, response):
''' Fill out the login form and return the request'''
Expand Down

0 comments on commit 4bb032d

Please sign in to comment.