Skip to content

Commit

Permalink
Version 0.0.1dev4 (#20)
Browse files Browse the repository at this point in the history
* Bump version to 0.0.1dev4

* Add .travis.yml

* Add build status to README

* Remove nightly build from travis

* Loosen up scraped items in tests

* Scrape just 1 item for category scrapes in tests

* Split tests into meaningful modules

* Require attrs >= 19.2.0

* Use pytest

* Register marks, double quotes in travis env

* Different kind of quote

* Maybe these quotes
  • Loading branch information
jmyrberg authored May 9, 2020
1 parent e207f71 commit b120fb1
Show file tree
Hide file tree
Showing 10 changed files with 130 additions and 95 deletions.
19 changes: 19 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
language: python

os:
- linux

python:
- "3.6"
- "3.7"
- "3.8"

env:
- SPIDER=ilarticle
- SPIDER=isarticle
- SPIDER="not spider"

install:
- pip install -r requirements.txt -r requirements-dev.txt

script: pytest -m "$SPIDER"
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# finscraper

[![Build Status](https://travis-ci.com/jmyrberg/finscraper.svg?branch=master)](https://travis-ci.com/jmyrberg/finscraper) [![Documentation Status](https://readthedocs.org/projects/finscraper/badge/?version=latest)](https://finscraper.readthedocs.io/en/latest/?badge=latest)

![finscraper cover](https://github.com/jmyrberg/finscraper/blob/master/docs/cover.jpg?raw=true)

The library provides an easy-to-use API for fetching data from various Finnish websites:
Expand Down Expand Up @@ -37,6 +39,4 @@ repository up-to-date all by myself - pull requests are more than welcome!

---

[![Documentation Status](https://readthedocs.org/projects/finscraper/badge/?version=latest)](https://finscraper.readthedocs.io/en/latest/?badge=latest)

Jesse Myrberg ([email protected])
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def add_source_parser(_old_add_source_parser, self, *args, **kwargs):
author = 'Jesse Myrberg'

# The full version, including alpha/beta/rc tags
release = '0.0.1dev3'
release = '0.0.1dev4'


# -- General configuration ---------------------------------------------------
Expand Down
5 changes: 5 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[pytest]
markers =
spider: Mark test as a spider test.
isarticle: Mark test as a isarticle test.
ilarticle: Mark test as a ilarticle test.
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
nose==1.3.7
pytest==5.4.2
twine
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
attrs==19.2.0
pandas==1.0.3
selenium==3.141.0
scrapy==2.1.0
Expand Down
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setuptools.setup(
name='finscraper',
version='0.0.1dev3',
version='0.0.1dev4',
license='MIT',
description='Web scraping API for Finnish websites',
long_description=long_description,
Expand All @@ -21,7 +21,8 @@
'selenium>=3.141.0',
'scrapy>=2.1.0',
'tqdm>=4.46.0',
'webdriver-manager>=2.4.0'
'webdriver-manager>=2.4.0',
'attrs>=19.2.0'
],
packages=setuptools.find_packages(),
classifiers=[
Expand Down
45 changes: 45 additions & 0 deletions tests/test_ilarticle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Module for testing ILArticle."""


import pytest
pytestmark = [pytest.mark.spider, pytest.mark.ilarticle]

from finscraper.spiders import ILArticle


def test_ILArticle_with_category():
# Test scraping
spider = ILArticle('ulkomaat').scrape(1)
df = spider.get()
assert len(df) >= 1
assert len(df.columns) == 8

# Test continuing scraping
df2 = spider.scrape(1).get()
assert len(df2) >= len(df) + 1

# Save and load spider
jobdir = spider.save()
spider = ILArticle.load(jobdir)

df3 = spider.scrape(1).get()
assert len(df3) >= len(df2) + 1


def test_ILArticle_no_params():
# Test scraping
spider = ILArticle().scrape(10)
df = spider.get()
assert len(df) >= 10
assert len(df.columns) == 8

# Test continuing scraping
df2 = spider.scrape(10).get()
assert len(df2) >= len(df) + 10

# Save and load spider
jobdir = spider.save()
spider = ILArticle.load(jobdir)

df3 = spider.scrape(10).get()
assert len(df3) >= len(df2) + 10
51 changes: 51 additions & 0 deletions tests/test_isarticle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""Module for testing ISArticle."""


import pytest
pytestmark = [pytest.mark.spider, pytest.mark.isarticle]

from finscraper.spiders import ISArticle


def test_ISArticle_with_category():
# Test scraping, no chromedriver
spider = ISArticle('ulkomaat').scrape(1)
df = spider.get()
assert len(df) >= 1
assert len(df.columns) == 8

# Test scraping with chromedriver
spider = ISArticle('ulkomaat', allow_chromedriver=True).scrape(1)
df = spider.get()
assert len(df) >= 1
assert len(df.columns) == 8

# Test continuing scraping
df2 = spider.scrape(1).get()
assert len(df2) >= len(df) + 1

# Save and load spider
jobdir = spider.save()
spider = ISArticle.load(jobdir)

df3 = spider.scrape(1).get()
assert len(df3) >= len(df2) + 1


def test_ISArticle_no_params():
# Test scraping
spider = ISArticle().scrape(10)
df = spider.get()
assert len(df) == 10
assert len(df.columns) == 8

# Test continuing scraping (poor results, no driver)
df2 = spider.scrape(10).get()
assert len(df2) >= len(df) + 10

# Save and load spider
jobdir = spider.save()
spider = ISArticle.load(jobdir)

df3 = spider.scrape(10).get()
assert len(df3) >= len(df2) + 10
91 changes: 2 additions & 89 deletions tests/test_spiders.py → tests/test_wrappers.py
Original file line number Diff line number Diff line change
@@ -1,100 +1,13 @@
"""Module for testing spiders."""
"""Module for testing spider wrapper functionalities."""


import json
import logging
import tempfile

from pathlib import Path

from finscraper.spiders import ISArticle, ILArticle


# TODO: Implement utility test function that performs common Spider checks


def test_ISArticle_with_category():
# Test scraping, no chromedriver
spider = ISArticle('ulkomaat').scrape(20)
df = spider.get()
assert len(df) >= 20
assert len(df.columns) == 8

# Test scraping with chromedriver
spider = ISArticle('ulkomaat', allow_chromedriver=True).scrape(20)
df = spider.get()
assert len(df) >= 20
assert len(df.columns) == 8

# Test continuing scraping
df2 = spider.scrape(10).get()
assert len(df2) >= len(df) + 10

# Save and load spider
jobdir = spider.save()
spider = ISArticle.load(jobdir)

df3 = spider.scrape(10).get()
assert len(df3) >= len(df2) + 10


def test_ISArticle_no_params():
# Test scraping
spider = ISArticle().scrape(10)
df = spider.get()
assert len(df) == 10
assert len(df.columns) == 8

# Test continuing scraping (poor results, no driver)
df2 = spider.scrape(10).get()
assert len(df2) >= len(df) + 10

# Save and load spider
jobdir = spider.save()
spider = ISArticle.load(jobdir)

df3 = spider.scrape(10).get()
assert len(df3) >= len(df2) + 10


def test_ILArticle_with_category():
# Test scraping
spider = ILArticle('ulkomaat').scrape(5)
df = spider.get()
assert len(df) >= 5
assert len(df.columns) == 8

# Test continuing scraping
df2 = spider.scrape(10).get()
assert len(df2) >= len(df) + 10

# Save and load spider
jobdir = spider.save()
spider = ILArticle.load(jobdir)

df3 = spider.scrape(10).get()
assert len(df3) >= len(df2) + 10


def test_ILArticle_no_params():
# Test scraping
spider = ILArticle().scrape(10)
df = spider.get()
assert len(df) >= 10
assert len(df.columns) == 8

# Test continuing scraping
df2 = spider.scrape(10).get()
assert len(df2) >= len(df) + 10

# Save and load spider
jobdir = spider.save()
spider = ILArticle.load(jobdir)

df3 = spider.scrape(10).get()
assert len(df3) >= len(df2) + 10


def test_spider_save_load_with_jobdir():
jobdir = '../jobdir'
category = 'jaakiekko'
Expand Down Expand Up @@ -174,7 +87,7 @@ def test_spider_logging():


def test_spider_progress_bar():
# Progress bas true by default
# Progress bar true by default
spider = ILArticle()
spider.scrape(1)
assert spider.progress_bar == True
Expand Down

0 comments on commit b120fb1

Please sign in to comment.