From 3261adb3a53ff7421cced147a0e2a1a687484914 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Sun, 28 Feb 2021 15:52:16 +0300
Subject: [PATCH 01/50] target score change

---
 target_score.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target_score.txt b/target_score.txt
index a404aa74..cf79a25c 100644
--- a/target_score.txt
+++ b/target_score.txt
@@ -1,8 +1,8 @@
 # Target score for scrapper.py:
-6
+10
 
 # Target score for pipeline.py:
-6
+10
 
 # Skip pipeline checks:
 1

From f471f4a267f948671f2b91ab737ed12d636b8384 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Mon, 8 Mar 2021 22:10:34 +0300
Subject: [PATCH 02/50] completed stages 1 and 2

---
 crawler_config.json |  12 +++--
 scrapper.py         | 104 +++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 102 insertions(+), 14 deletions(-)

diff --git a/crawler_config.json b/crawler_config.json
index e60ce0f7..80f0c5d8 100644
--- a/crawler_config.json
+++ b/crawler_config.json
@@ -1,5 +1,11 @@
 {
-    "base_urls": [],
-    "total_articles_to_find_and_parse": 0,
-    "max_number_articles_to_get_from_one_seed": 0
+    "base_urls": ["https://burunen.ru/news/society/",
+                  "https://burunen.ru/news/culture/",
+                  "https://burunen.ru/news/economy/",
+                  "https://burunen.ru/news/sports/",
+                  "https://burunen.ru/news/incidents/",
+                  "https://burunen.ru/news/politic/"
+    ],
+    "total_articles_to_find_and_parse": 20,
+    "max_number_articles_to_get_from_one_seed": 25
 }
\ No newline at end of file
diff --git a/scrapper.py b/scrapper.py
index 43aecef5..d4460dc1 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -2,6 +2,15 @@
 Crawler implementation
 """
 
+import json
+import requests
+from requests.exceptions import RequestException
+from bs4 import BeautifulSoup
+from time import sleep as wait
+from article import Article
+
+
+CRAWLER_CONFIG_PATH = 'crawler_config.json'
 
 class IncorrectURLError(Exception):
     """
@@ -31,18 +40,53 @@ class Crawler:
     """
     Crawler implementation
     """
-    def __init__(self, seed_urls: list, max_articles: int):
-        pass
+    def __init__(self, seed_urls: list, total_max_articles: int, max_articles_per_seed: int):
+        self.seed_urls = seed_urls
+        self.total_max_articles = total_max_articles
+        self.max_articles_per_seed = max_articles_per_seed
+        self.urls = []
+
+        self.URL_START = 'https://burunen.ru'
 
     @staticmethod
-    def _extract_url(article_bs):
-        pass
+    def _extract_url(article_bs, seen):
+        extracted = list(set([link['href'] for link in article_bs.find_all('a', href=True)]))
+        # print(extracted)
+        # print('          ',extracted)
+        return list(filter(lambda x: True if x.startswith('/news/')
+                           and x not in seen
+                           # and any(map(lambda y: y.isdigit(), x))
+                           else False, extracted))
 
     def find_articles(self):
         """
         Finds articles
         """
-        pass
+        for url in self.seed_urls:
+            article_bs = BeautifulSoup(requests.get(url, 'html.parser').text, 'html.parser')
+            newfound = self._extract_url(article_bs, self.urls)
+            self.urls.extend(newfound[:self.max_articles_per_seed])
+        self.urls = [i for i in self.urls if len(i) > 20][:self.total_max_articles]
+        print('Scraped seed urls, overall number of urls is', len(self.urls))
+
+        old = len(self.urls)
+        while len(self.urls) < self.total_max_articles:
+            print('Due to insufficient number started further iteration')
+            print('current number', len(self.urls), ', required', self.total_max_articles)
+            for url in self.urls:
+                article_bs = BeautifulSoup(requests.get(self.URL_START + url, 'html.parser').text, 'html.parser')
+                newfound = filter(lambda x: len(x) > 20, self._extract_url(article_bs, self.urls))
+                print('    checked new url, found', len(newfound), 'articles')
+                self.urls.extend(newfound[:self.max_articles_per_seed])
+                # wait(10)
+                if len(self.urls) > self.total_max_articles:
+                    break
+            if len(self.urls) == old:
+                print('Something is wrong with scraping parameters')
+                break
+
+            self.urls = self.urls[:self.total_max_articles]
+
 
     def get_search_urls(self):
         """
@@ -56,10 +100,14 @@ class ArticleParser:
     ArticleParser implementation
     """
     def __init__(self, full_url: str, article_id: int):
-        pass
+        self.full_url = full_url
+        self.article_id = article_id
+        self.article = Article(self.full_url, self.article_id)
 
     def _fill_article_with_text(self, article_soup):
-        pass
+        all_text = article_soup.find('div', {'class' : 'text letter', 'itemprop' : 'articleBody'}).text
+        text = (all_text..split('Автор:')[0].strip())
+        self.text = text
 
     def _fill_article_with_meta_information(self, article_soup):
         pass
@@ -75,7 +123,11 @@ def parse(self):
         """
         Parses each article
         """
-        pass
+        html = requests.get(self.full_url, 'html.parser').text
+        article_bs = BeautifulSoup(html, 'html.parser')
+        self._fill_article_with_text(article_bs)
+        # self._fill_article_with_text(article_bs)
+        # self._fill_article_with_meta_information(article_bs)
 
 
 def prepare_environment(base_path):
@@ -89,9 +141,39 @@ def validate_config(crawler_path):
     """
     Validates given config
     """
-    pass
+    with open(crawler_path) as crawler_config:
+        config = json.load(crawler_config)
+    try:
+        good_response = list(map(lambda link: True if requests.get(link).status_code == 200 else False,
+                                 config['base_urls']))
+    except RequestException:
+        raise IncorrectURLError
+    except Exception:
+        raise UnknownConfigError
+    if not all(good_response):
+        raise IncorrectURLError
+    if not all((isinstance(config['total_articles_to_find_and_parse'], int),
+                isinstance(config['max_number_articles_to_get_from_one_seed'], int))):
+        raise IncorrectNumberOfArticlesError
+    if not config['total_articles_to_find_and_parse'] < config['max_number_articles_to_get_from_one_seed']\
+       * len(good_response):
+        raise NumberOfArticlesOutOfRangeError
+    return config['base_urls'], config['total_articles_to_find_and_parse'], \
+        config['max_number_articles_to_get_from_one_seed']
 
 
 if __name__ == '__main__':
-    # YOUR CODE HERE
-    pass
+    seed_urls, max_articles, max_articles_per_seed = validate_config(CRAWLER_CONFIG_PATH)
+    crawler = Crawler(seed_urls=seed_urls,
+                      total_max_articles=max_articles,
+                      max_articles_per_seed=max_articles_per_seed)
+
+    crawler.find_articles()
+    print('Scraped', len(crawler.urls), 'articles')
+
+    print('onto parsing')
+
+    for n, url in enumerate(crawler.urls[:1]):
+        full_url = crawler.URL_START + url
+        parser = ArticleParser(full_url, n)
+        article = parser.parse()

From dbcaa4d6b532c6e6c05aea72a4b1e9f93c067993 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Tue, 9 Mar 2021 15:10:15 +0300
Subject: [PATCH 03/50] build for four

---
 article.py          | 16 ++++++++--------
 config/constants.py |  9 +++++++++
 scrapper.py         | 42 ++++++++++++++++++++++++++----------------
 target_score.txt    |  4 ++--
 4 files changed, 45 insertions(+), 26 deletions(-)
 create mode 100644 config/constants.py

diff --git a/article.py b/article.py
index 1d759cd2..d7547150 100644
--- a/article.py
+++ b/article.py
@@ -38,14 +38,14 @@ def save_raw(self):
 
         with open(self._get_raw_text_path(), 'w', encoding='utf-8') as file:
             file.write(self.text)
-
-        with open(os.path.join(ASSETS_PATH, article_meta_name), "w", encoding='utf-8') as file:
-            json.dump(self._get_meta(),
-                      file,
-                      sort_keys=False,
-                      indent=4,
-                      ensure_ascii=False,
-                      separators=(',', ': '))
+        #
+        # with open(os.path.join(ASSETS_PATH, article_meta_name), "w", encoding='utf-8') as file:
+        #     json.dump(self._get_meta(),
+        #               file,
+        #               sort_keys=False,
+        #               indent=4,
+        #               ensure_ascii=False,
+        #               separators=(',', ': '))
     
     @staticmethod
     def from_meta_json(json_path: str):
diff --git a/config/constants.py b/config/constants.py
new file mode 100644
index 00000000..12d85256
--- /dev/null
+++ b/config/constants.py
@@ -0,0 +1,9 @@
+"""
+Useful constant variables
+"""
+
+import os
+
+PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__))
+ASSETS_PATH = os.path.join(PROJECT_ROOT, 'tmp', 'articles')
+CRAWLER_CONFIG_PATH = os.path.join(PROJECT_ROOT, 'crawler_config.json')
diff --git a/scrapper.py b/scrapper.py
index d4460dc1..e217300e 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -6,11 +6,14 @@
 import requests
 from requests.exceptions import RequestException
 from bs4 import BeautifulSoup
-from time import sleep as wait
+# from time import sleep as wait
 from article import Article
+import re
 
 
 CRAWLER_CONFIG_PATH = 'crawler_config.json'
+NEWLINES_RE = re.compile(r"\n{2,}")  # two or more "\n" characters
+
 
 class IncorrectURLError(Exception):
     """
@@ -66,7 +69,8 @@ def find_articles(self):
             article_bs = BeautifulSoup(requests.get(url, 'html.parser').text, 'html.parser')
             newfound = self._extract_url(article_bs, self.urls)
             self.urls.extend(newfound[:self.max_articles_per_seed])
-        self.urls = [i for i in self.urls if len(i) > 20][:self.total_max_articles]
+        self.urls = [i for i in self.urls if len(i) > 20
+                     and not any(map(lambda y: y.isupper(), i))][:self.total_max_articles]
         print('Scraped seed urls, overall number of urls is', len(self.urls))
 
         old = len(self.urls)
@@ -82,12 +86,11 @@ def find_articles(self):
                 if len(self.urls) > self.total_max_articles:
                     break
             if len(self.urls) == old:
-                print('Something is wrong with scraping parameters')
+                print('     Something is wrong with scraping parameters')
                 break
 
             self.urls = self.urls[:self.total_max_articles]
 
-
     def get_search_urls(self):
         """
         Returns seed_urls param
@@ -105,9 +108,12 @@ def __init__(self, full_url: str, article_id: int):
         self.article = Article(self.full_url, self.article_id)
 
     def _fill_article_with_text(self, article_soup):
-        all_text = article_soup.find('div', {'class' : 'text letter', 'itemprop' : 'articleBody'}).text
-        text = (all_text..split('Автор:')[0].strip())
-        self.text = text
+        try:
+            text = article_soup.find('div', {'class': 'text letter', 'itemprop': 'articleBody'}).text.strip()
+            # text = NEWLINES_RE.split(all_text)  # regex splitting
+            self.article.text = text
+        except AttributeError:
+            print('    unable to parse', self.full_url)
 
     def _fill_article_with_meta_information(self, article_soup):
         pass
@@ -123,18 +129,21 @@ def parse(self):
         """
         Parses each article
         """
+        # print(self.full_url)
+        self.article.url = self.full_url
+        self.article.article_id = self.article_id
         html = requests.get(self.full_url, 'html.parser').text
         article_bs = BeautifulSoup(html, 'html.parser')
         self._fill_article_with_text(article_bs)
-        # self._fill_article_with_text(article_bs)
         # self._fill_article_with_meta_information(article_bs)
+        self.article.save_raw()
 
 
-def prepare_environment(base_path):
-    """
-    Creates ASSETS_PATH folder if not created and removes existing folder
-    """
-    pass
+# def prepare_environment(base_path):
+#     """
+#     Creates ASSETS_PATH folder if not created and removes existing folder
+#     """
+#     pass
 
 
 def validate_config(crawler_path):
@@ -169,11 +178,12 @@ def validate_config(crawler_path):
                       max_articles_per_seed=max_articles_per_seed)
 
     crawler.find_articles()
-    print('Scraped', len(crawler.urls), 'articles')
+    # print('Scraped', len(crawler.urls), 'articles')
 
     print('onto parsing')
 
-    for n, url in enumerate(crawler.urls[:1]):
+    for n, url in enumerate(crawler.urls):
         full_url = crawler.URL_START + url
         parser = ArticleParser(full_url, n)
-        article = parser.parse()
+        parser.parse()
+    print('parsing is finished')
diff --git a/target_score.txt b/target_score.txt
index 686883dc..ad4942b9 100644
--- a/target_score.txt
+++ b/target_score.txt
@@ -1,6 +1,6 @@
   
 # Target score for scrapper.py:
-6
+4
 
 # Target score for pipeline.py:
-0
\ No newline at end of file
+10
\ No newline at end of file

From 0c35e7729b07b73bc8b9f9991c291783867044d4 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Tue, 9 Mar 2021 15:50:36 +0300
Subject: [PATCH 04/50] build for four, fixed target score

---
 config/raw_metadata_score_four_test.py |  1 +
 scrapper.py                            | 21 ++++++++++++++++++---
 target_score.txt                       |  1 -
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/config/raw_metadata_score_four_test.py b/config/raw_metadata_score_four_test.py
index d8691879..e6900c05 100644
--- a/config/raw_metadata_score_four_test.py
+++ b/config/raw_metadata_score_four_test.py
@@ -16,6 +16,7 @@ def setUp(self) -> None:
 
     def test_validate_sort(self):
         list_ids = [pair[0] for pair in self.texts]
+        print(list_ids)
         for i in range(1, len(list_ids)+1):
             self.assertTrue(i in list_ids,
                             msg="""Articles ids are not homogeneous. E.g. numbers are not from 1 to N""")
diff --git a/scrapper.py b/scrapper.py
index e217300e..b643e044 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -116,7 +116,22 @@ def _fill_article_with_text(self, article_soup):
             print('    unable to parse', self.full_url)
 
     def _fill_article_with_meta_information(self, article_soup):
-        pass
+        try:
+            title = article_soup.title.text
+            self.article.title = title
+
+            credits = article_soup.find('div', {'class': 'credits t-caption'}).text.strip().split('\n')[0]
+            if 'Автор:' in credits:
+                author = article_soup.find('div', {'class': 'credits t-caption'}).text.strip().split('\n')[0][7:]
+            elif 'Источник:' in credits:
+                author = article_soup.find('div', {'class': 'credits t-caption'}).text.strip().split('\n')[0][9:].strip()
+            else:
+                author = ''
+            self.article.author = author
+        except AttributeError:
+            print('    something is off with', self.full_url)
+        # print(title)
+        # self.article.title = title
 
     @staticmethod
     def unify_date_format(date_str):
@@ -135,7 +150,7 @@ def parse(self):
         html = requests.get(self.full_url, 'html.parser').text
         article_bs = BeautifulSoup(html, 'html.parser')
         self._fill_article_with_text(article_bs)
-        # self._fill_article_with_meta_information(article_bs)
+        self._fill_article_with_meta_information(article_bs)
         self.article.save_raw()
 
 
@@ -184,6 +199,6 @@ def validate_config(crawler_path):
 
     for n, url in enumerate(crawler.urls):
         full_url = crawler.URL_START + url
-        parser = ArticleParser(full_url, n)
+        parser = ArticleParser(full_url, n + 1)
         parser.parse()
     print('parsing is finished')
diff --git a/target_score.txt b/target_score.txt
index ad4942b9..221e98ce 100644
--- a/target_score.txt
+++ b/target_score.txt
@@ -1,4 +1,3 @@
-  
 # Target score for scrapper.py:
 4
 

From 8a09da7d8e34f01d2ec2632c3fa7a461ba40c9e4 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Tue, 9 Mar 2021 21:09:46 +0300
Subject: [PATCH 05/50] build for idk what score

---
 article.py       | 18 +++++++++---------
 scrapper.py      | 37 ++++++++++++++++++++++++++++++-------
 target_score.txt |  4 ++--
 3 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/article.py b/article.py
index d7547150..907f4f30 100644
--- a/article.py
+++ b/article.py
@@ -38,14 +38,14 @@ def save_raw(self):
 
         with open(self._get_raw_text_path(), 'w', encoding='utf-8') as file:
             file.write(self.text)
-        #
-        # with open(os.path.join(ASSETS_PATH, article_meta_name), "w", encoding='utf-8') as file:
-        #     json.dump(self._get_meta(),
-        #               file,
-        #               sort_keys=False,
-        #               indent=4,
-        #               ensure_ascii=False,
-        #               separators=(',', ': '))
+
+        with open(os.path.join(ASSETS_PATH, article_meta_name), "w", encoding='utf-8') as file:
+            json.dump(self._get_meta(),
+                      file,
+                      sort_keys=False,
+                      indent=4,
+                      ensure_ascii=False,
+                      separators=(',', ': '))
     
     @staticmethod
     def from_meta_json(json_path: str):
@@ -99,7 +99,7 @@ def _date_to_text(self):
         """
         Converts datetime object to text
         """
-        return self.date.strftime("%Y-%m-%d %H:%M:%S")
+        return self.date.strftime("%Y-%m-%d")
     
     def _get_raw_text_path(self):
         """
diff --git a/scrapper.py b/scrapper.py
index b643e044..f149dd2a 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -9,7 +9,8 @@
 # from time import sleep as wait
 from article import Article
 import re
-
+from datetime import date
+import os
 
 CRAWLER_CONFIG_PATH = 'crawler_config.json'
 NEWLINES_RE = re.compile(r"\n{2,}")  # two or more "\n" characters
@@ -128,6 +129,11 @@ def _fill_article_with_meta_information(self, article_soup):
             else:
                 author = ''
             self.article.author = author
+            date = article_soup.find('div', {'class': 'b-caption'}).text.strip().split('\n')[1]
+            self.article.date = self.unify_date_format(date)
+
+            topic = article_soup.find('div', {'class': 'b-caption'}).text.strip().split('\n')[0]
+            self.article.topics = topic
         except AttributeError:
             print('    something is off with', self.full_url)
         # print(title)
@@ -138,7 +144,22 @@ def unify_date_format(date_str):
         """
         Unifies date format
         """
-        pass
+        day, month, year = date_str.split()
+        if len(day) == 1:
+            day = '0' + day
+        match = {'янв': '01',
+                 'фев': '02',
+                 'мар': '03',
+                 'апр': '04',
+                 'май': '05',
+                 'июн': '06',
+                 'июл': '07',
+                 'авг': '08',
+                 'сен': '09',
+                 'окт': '10',
+                 'ноя': '11',
+                 'дек': '12'}
+        return date.fromisoformat(year + '-' + match[month] + '-' + day)
 
     def parse(self):
         """
@@ -154,11 +175,13 @@ def parse(self):
         self.article.save_raw()
 
 
-# def prepare_environment(base_path):
-#     """
-#     Creates ASSETS_PATH folder if not created and removes existing folder
-#     """
-#     pass
+def prepare_environment(base_path):
+    """
+    Creates ASSETS_PATH folder if not created and removes existing folder
+    """
+    newpath = r'{}/ASSETS_PATH'.format(base_path)
+    if not os.path.exists(newpath):
+        os.makedirs(newpath)
 
 
 def validate_config(crawler_path):
diff --git a/target_score.txt b/target_score.txt
index 221e98ce..dde8d696 100644
--- a/target_score.txt
+++ b/target_score.txt
@@ -1,5 +1,5 @@
 # Target score for scrapper.py:
-4
+8
 
 # Target score for pipeline.py:
-10
\ No newline at end of file
+8
\ No newline at end of file

From 6fee8b34923b49beafb2c94337ecdbed5c14df20 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Tue, 9 Mar 2021 21:27:57 +0300
Subject: [PATCH 06/50] fixed linting a little

---
 article.py  |  6 +++---
 scrapper.py | 49 +++++++++++++++++++++++++------------------------
 2 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/article.py b/article.py
index 907f4f30..f471cb6e 100644
--- a/article.py
+++ b/article.py
@@ -46,7 +46,7 @@ def save_raw(self):
                       indent=4,
                       ensure_ascii=False,
                       separators=(',', ': '))
-    
+
     @staticmethod
     def from_meta_json(json_path: str):
         """
@@ -94,13 +94,13 @@ def _get_meta(self):
             'author': self.author,
             'topics': self.topics
         }
-    
+
     def _date_to_text(self):
         """
         Converts datetime object to text
         """
         return self.date.strftime("%Y-%m-%d")
-    
+
     def _get_raw_text_path(self):
         """
         Returns path for requested raw article
diff --git a/scrapper.py b/scrapper.py
index f149dd2a..6efd8294 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -2,15 +2,15 @@
 Crawler implementation
 """
 
+import os
+import re
 import json
+from datetime import date
 import requests
 from requests.exceptions import RequestException
 from bs4 import BeautifulSoup
 # from time import sleep as wait
 from article import Article
-import re
-from datetime import date
-import os
 
 CRAWLER_CONFIG_PATH = 'crawler_config.json'
 NEWLINES_RE = re.compile(r"\n{2,}")  # two or more "\n" characters
@@ -50,7 +50,7 @@ def __init__(self, seed_urls: list, total_max_articles: int, max_articles_per_se
         self.max_articles_per_seed = max_articles_per_seed
         self.urls = []
 
-        self.URL_START = 'https://burunen.ru'
+        self.URLSTART = 'https://burunen.ru'
 
     @staticmethod
     def _extract_url(article_bs, seen):
@@ -66,8 +66,8 @@ def find_articles(self):
         """
         Finds articles
         """
-        for url in self.seed_urls:
-            article_bs = BeautifulSoup(requests.get(url, 'html.parser').text, 'html.parser')
+        for link in self.seed_urls:
+            article_bs = BeautifulSoup(requests.get(link, 'html.parser').text, 'html.parser')
             newfound = self._extract_url(article_bs, self.urls)
             self.urls.extend(newfound[:self.max_articles_per_seed])
         self.urls = [i for i in self.urls if len(i) > 20
@@ -78,8 +78,8 @@ def find_articles(self):
         while len(self.urls) < self.total_max_articles:
             print('Due to insufficient number started further iteration')
             print('current number', len(self.urls), ', required', self.total_max_articles)
-            for url in self.urls:
-                article_bs = BeautifulSoup(requests.get(self.URL_START + url, 'html.parser').text, 'html.parser')
+            for link in self.urls:
+                article_bs = BeautifulSoup(requests.get(self.URLSTART + link, 'html.parser').text, 'html.parser')
                 newfound = filter(lambda x: len(x) > 20, self._extract_url(article_bs, self.urls))
                 print('    checked new url, found', len(newfound), 'articles')
                 self.urls.extend(newfound[:self.max_articles_per_seed])
@@ -103,8 +103,8 @@ class ArticleParser:
     """
     ArticleParser implementation
     """
-    def __init__(self, full_url: str, article_id: int):
-        self.full_url = full_url
+    def __init__(self, full__url: str, article_id: int):
+        self.full_url = full__url
         self.article_id = article_id
         self.article = Article(self.full_url, self.article_id)
 
@@ -121,16 +121,17 @@ def _fill_article_with_meta_information(self, article_soup):
             title = article_soup.title.text
             self.article.title = title
 
-            credits = article_soup.find('div', {'class': 'credits t-caption'}).text.strip().split('\n')[0]
-            if 'Автор:' in credits:
+            credit = article_soup.find('div', {'class': 'credits t-caption'}).text.strip().split('\n')[0]
+            if 'Автор:' in credit:
                 author = article_soup.find('div', {'class': 'credits t-caption'}).text.strip().split('\n')[0][7:]
-            elif 'Источник:' in credits:
-                author = article_soup.find('div', {'class': 'credits t-caption'}).text.strip().split('\n')[0][9:].strip()
+            elif 'Источник:' in credit:
+                author = article_soup.find('div', {'class': 'credits t-caption'}).text.strip()
+                author = author.split('\n')[0][9:].strip()
             else:
                 author = ''
             self.article.author = author
-            date = article_soup.find('div', {'class': 'b-caption'}).text.strip().split('\n')[1]
-            self.article.date = self.unify_date_format(date)
+            when = article_soup.find('div', {'class': 'b-caption'}).text.strip().split('\n')[1]
+            self.article.date = self.unify_date_format(when)
 
             topic = article_soup.find('div', {'class': 'b-caption'}).text.strip().split('\n')[0]
             self.article.topics = topic
@@ -193,10 +194,10 @@ def validate_config(crawler_path):
     try:
         good_response = list(map(lambda link: True if requests.get(link).status_code == 200 else False,
                                  config['base_urls']))
-    except RequestException:
-        raise IncorrectURLError
-    except Exception:
-        raise UnknownConfigError
+    except RequestException as e:
+        raise IncorrectURLError from e
+    except Exception as e:
+        raise UnknownConfigError from e
     if not all(good_response):
         raise IncorrectURLError
     if not all((isinstance(config['total_articles_to_find_and_parse'], int),
@@ -210,10 +211,10 @@ def validate_config(crawler_path):
 
 
 if __name__ == '__main__':
-    seed_urls, max_articles, max_articles_per_seed = validate_config(CRAWLER_CONFIG_PATH)
-    crawler = Crawler(seed_urls=seed_urls,
+    seedurls, max_articles, max_arts_per_seed = validate_config(CRAWLER_CONFIG_PATH)
+    crawler = Crawler(seed_urls=seedurls,
                       total_max_articles=max_articles,
-                      max_articles_per_seed=max_articles_per_seed)
+                      max_articles_per_seed=max_arts_per_seed)
 
     crawler.find_articles()
     # print('Scraped', len(crawler.urls), 'articles')
@@ -221,7 +222,7 @@ def validate_config(crawler_path):
     print('onto parsing')
 
     for n, url in enumerate(crawler.urls):
-        full_url = crawler.URL_START + url
+        full_url = crawler.URLSTART + url
         parser = ArticleParser(full_url, n + 1)
         parser.parse()
     print('parsing is finished')

From aafdefd921956ac4c5a4294d6a2ac4444c4a4b72 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Tue, 9 Mar 2021 21:37:37 +0300
Subject: [PATCH 07/50] major link work

---
 scrapper.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/scrapper.py b/scrapper.py
index 6efd8294..9f924bec 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -50,17 +50,17 @@ def __init__(self, seed_urls: list, total_max_articles: int, max_articles_per_se
         self.max_articles_per_seed = max_articles_per_seed
         self.urls = []
 
-        self.URLSTART = 'https://burunen.ru'
+        self.url_start = 'https://burunen.ru'
 
     @staticmethod
     def _extract_url(article_bs, seen):
-        extracted = list(set([link['href'] for link in article_bs.find_all('a', href=True)]))
+        extracted = list({link['href'] for link in article_bs.find_all('a', href=True)})
         # print(extracted)
         # print('          ',extracted)
-        return list(filter(lambda x: True if x.startswith('/news/')
+        return list(filter(lambda x: x.startswith('/news/')
                            and x not in seen
                            # and any(map(lambda y: y.isdigit(), x))
-                           else False, extracted))
+                           , extracted))
 
     def find_articles(self):
         """
@@ -79,8 +79,8 @@ def find_articles(self):
             print('Due to insufficient number started further iteration')
             print('current number', len(self.urls), ', required', self.total_max_articles)
             for link in self.urls:
-                article_bs = BeautifulSoup(requests.get(self.URLSTART + link, 'html.parser').text, 'html.parser')
-                newfound = filter(lambda x: len(x) > 20, self._extract_url(article_bs, self.urls))
+                article_bs = BeautifulSoup(requests.get(self.url_start + link, 'html.parser').text, 'html.parser')
+                newfound = list(filter(lambda x: len(x) > 20, self._extract_url(article_bs, self.urls)))
                 print('    checked new url, found', len(newfound), 'articles')
                 self.urls.extend(newfound[:self.max_articles_per_seed])
                 # wait(10)
@@ -192,12 +192,12 @@ def validate_config(crawler_path):
     with open(crawler_path) as crawler_config:
         config = json.load(crawler_config)
     try:
-        good_response = list(map(lambda link: True if requests.get(link).status_code == 200 else False,
+        good_response = list(map(lambda link: requests.get(link).status_code == 200,
                                  config['base_urls']))
-    except RequestException as e:
-        raise IncorrectURLError from e
-    except Exception as e:
-        raise UnknownConfigError from e
+    except RequestException as exception:
+        raise IncorrectURLError from exception
+    except Exception as exception:
+        raise UnknownConfigError from exception
     if not all(good_response):
         raise IncorrectURLError
     if not all((isinstance(config['total_articles_to_find_and_parse'], int),
@@ -222,7 +222,7 @@ def validate_config(crawler_path):
     print('onto parsing')
 
     for n, url in enumerate(crawler.urls):
-        full_url = crawler.URLSTART + url
+        full_url = crawler.url_start + url
         parser = ArticleParser(full_url, n + 1)
         parser.parse()
     print('parsing is finished')

From 0cc3a91d5a1bc1bd2f2423f2c80c1e232cfa4251 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Tue, 9 Mar 2021 21:49:53 +0300
Subject: [PATCH 08/50] added requirements

---
 requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index e69de29b..327297ca 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+beautifulsoup4==4.9.0
+requests==2.23.0

From e3e8eda67d91d7d6155b94493a7173737756e0d1 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Tue, 9 Mar 2021 22:02:02 +0300
Subject: [PATCH 09/50] changed target score

---
 target_score.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target_score.txt b/target_score.txt
index dde8d696..e91213a7 100644
--- a/target_score.txt
+++ b/target_score.txt
@@ -1,5 +1,5 @@
 # Target score for scrapper.py:
-8
+10
 
 # Target score for pipeline.py:
-8
\ No newline at end of file
+10
\ No newline at end of file

From 1355f14609e295e0403bf234ab539ee99c59f3d2 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Thu, 11 Mar 2021 17:49:13 +0300
Subject: [PATCH 10/50] i dont understand why test fails

---
 scrapper.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/scrapper.py b/scrapper.py
index 9f924bec..ac063fb4 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -200,8 +200,11 @@ def validate_config(crawler_path):
         raise UnknownConfigError from exception
     if not all(good_response):
         raise IncorrectURLError
-    if not all((isinstance(config['total_articles_to_find_and_parse'], int),
-                isinstance(config['max_number_articles_to_get_from_one_seed'], int))):
+    # if not all((isinstance(config['total_articles_to_find_and_parse'], int),
+    #             isinstance(config['max_number_articles_to_get_from_one_seed'], int))):
+    if not isinstance(config['total_articles_to_find_and_parse'], int):
+        raise IncorrectNumberOfArticlesError
+    if not isinstance(config['max_number_articles_to_get_from_one_seed'], int):
         raise IncorrectNumberOfArticlesError
     if not config['total_articles_to_find_and_parse'] < config['max_number_articles_to_get_from_one_seed']\
        * len(good_response):

From 56fbb389cf39f00cebdf32b8949f44c2e038f497 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Thu, 11 Mar 2021 18:01:43 +0300
Subject: [PATCH 11/50] oooohh

---
 scrapper.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/scrapper.py b/scrapper.py
index ac063fb4..19cbb595 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -58,9 +58,7 @@ def _extract_url(article_bs, seen):
         # print(extracted)
         # print('          ',extracted)
         return list(filter(lambda x: x.startswith('/news/')
-                           and x not in seen
-                           # and any(map(lambda y: y.isdigit(), x))
-                           , extracted))
+                           and x not in seen, extracted))
 
     def find_articles(self):
         """
@@ -200,15 +198,18 @@ def validate_config(crawler_path):
         raise UnknownConfigError from exception
     if not all(good_response):
         raise IncorrectURLError
-    # if not all((isinstance(config['total_articles_to_find_and_parse'], int),
-    #             isinstance(config['max_number_articles_to_get_from_one_seed'], int))):
-    if not isinstance(config['total_articles_to_find_and_parse'], int):
-        raise IncorrectNumberOfArticlesError
-    if not isinstance(config['max_number_articles_to_get_from_one_seed'], int):
-        raise IncorrectNumberOfArticlesError
-    if not config['total_articles_to_find_and_parse'] < config['max_number_articles_to_get_from_one_seed']\
-       * len(good_response):
-        raise NumberOfArticlesOutOfRangeError
+    try:
+        if not isinstance(config['total_articles_to_find_and_parse'], int):
+            raise IncorrectNumberOfArticlesError
+        if not config['total_articles_to_find_and_parse'] > 1000:
+            raise NumberOfArticlesOutOfRangeError
+        if not isinstance(config['max_number_articles_to_get_from_one_seed'], int):
+            raise IncorrectNumberOfArticlesError
+        if not config['total_articles_to_find_and_parse'] < config['max_number_articles_to_get_from_one_seed']\
+           * len(good_response):
+            raise NumberOfArticlesOutOfRangeError
+    except KeyError as exception:
+        raise IncorrectNumberOfArticlesError from exception
     return config['base_urls'], config['total_articles_to_find_and_parse'], \
         config['max_number_articles_to_get_from_one_seed']
 

From cfeae30b6034925b90e2dc53152b427fd1f40d47 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Thu, 11 Mar 2021 18:14:53 +0300
Subject: [PATCH 12/50] i have questions

---
 scrapper.py | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/scrapper.py b/scrapper.py
index 19cbb595..ae60601f 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -201,21 +201,29 @@ def validate_config(crawler_path):
     try:
         if not isinstance(config['total_articles_to_find_and_parse'], int):
             raise IncorrectNumberOfArticlesError
-        if not config['total_articles_to_find_and_parse'] > 1000:
-            raise NumberOfArticlesOutOfRangeError
-        if not isinstance(config['max_number_articles_to_get_from_one_seed'], int):
-            raise IncorrectNumberOfArticlesError
-        if not config['total_articles_to_find_and_parse'] < config['max_number_articles_to_get_from_one_seed']\
-           * len(good_response):
+        if config['total_articles_to_find_and_parse'] > 1000:
             raise NumberOfArticlesOutOfRangeError
+        # if not isinstance(config['max_number_articles_to_get_from_one_seed'], int):
+        #     raise IncorrectNumberOfArticlesError
+        # if not config['total_articles_to_find_and_parse'] < config['max_number_articles_to_get_from_one_seed']\
+        #    * len(good_response):
+        #     raise NumberOfArticlesOutOfRangeError
     except KeyError as exception:
         raise IncorrectNumberOfArticlesError from exception
-    return config['base_urls'], config['total_articles_to_find_and_parse'], \
-        config['max_number_articles_to_get_from_one_seed']
+    try:
+        return config['base_urls'], config['total_articles_to_find_and_parse'], \
+            config['max_number_articles_to_get_from_one_seed']
+    except KeyError:
+        return config['base_urls'], config['total_articles_to_find_and_parse']
 
 
 if __name__ == '__main__':
-    seedurls, max_articles, max_arts_per_seed = validate_config(CRAWLER_CONFIG_PATH)
+    params = validate_config(CRAWLER_CONFIG_PATH)
+    if len(params) == 3:
+        seedurls, max_articles, max_arts_per_seed = params
+    else:
+        seedurls, max_articles = params
+        max_arts_per_seed = max_articles
     crawler = Crawler(seed_urls=seedurls,
                       total_max_articles=max_articles,
                       max_articles_per_seed=max_arts_per_seed)

From 39e892355a93b4f8c02d33cd27df9ab137963d7c Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Thu, 11 Mar 2021 18:19:36 +0300
Subject: [PATCH 13/50] didn't work

---
 scrapper.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/scrapper.py b/scrapper.py
index ae60601f..c608d87a 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -214,15 +214,12 @@ def validate_config(crawler_path):
         return config['base_urls'], config['total_articles_to_find_and_parse'], \
             config['max_number_articles_to_get_from_one_seed']
     except KeyError:
-        return config['base_urls'], config['total_articles_to_find_and_parse']
+        return config['base_urls'], config['total_articles_to_find_and_parse'], None
 
 
 if __name__ == '__main__':
-    params = validate_config(CRAWLER_CONFIG_PATH)
-    if len(params) == 3:
-        seedurls, max_articles, max_arts_per_seed = params
-    else:
-        seedurls, max_articles = params
+    seedurls, max_articles, max_arts_per_seed = validate_config(CRAWLER_CONFIG_PATH)
+    if not max_arts_per_seed:
         max_arts_per_seed = max_articles
     crawler = Crawler(seed_urls=seedurls,
                       total_max_articles=max_articles,

From 6e5ec03b80db40932fbbc25ae8c952f30684837d Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Thu, 11 Mar 2021 18:27:23 +0300
Subject: [PATCH 14/50] working on

---
 scrapper.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/scrapper.py b/scrapper.py
index c608d87a..a9d49748 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -11,9 +11,7 @@
 from bs4 import BeautifulSoup
 # from time import sleep as wait
 from article import Article
-
-CRAWLER_CONFIG_PATH = 'crawler_config.json'
-NEWLINES_RE = re.compile(r"\n{2,}")  # two or more "\n" characters
+from constants import CRAWLER_CONFIG_PATH, ASSETS_PATH
 
 
 class IncorrectURLError(Exception):
@@ -109,7 +107,6 @@ def __init__(self, full__url: str, article_id: int):
     def _fill_article_with_text(self, article_soup):
         try:
             text = article_soup.find('div', {'class': 'text letter', 'itemprop': 'articleBody'}).text.strip()
-            # text = NEWLINES_RE.split(all_text)  # regex splitting
             self.article.text = text
         except AttributeError:
             print('    unable to parse', self.full_url)
@@ -218,6 +215,7 @@ def validate_config(crawler_path):
 
 
 if __name__ == '__main__':
+    prepare_environment(ASSETS_PATH)
     seedurls, max_articles, max_arts_per_seed = validate_config(CRAWLER_CONFIG_PATH)
     if not max_arts_per_seed:
         max_arts_per_seed = max_articles

From 4733964aa55e33dcd433258a314e87cd2c3a6b2a Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Thu, 11 Mar 2021 18:29:35 +0300
Subject: [PATCH 15/50] there is no pleasing you

---
 scrapper.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scrapper.py b/scrapper.py
index a9d49748..7058e47d 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -3,7 +3,6 @@
 """
 
 import os
-import re
 import json
 from datetime import date
 import requests

From ead1f177aecf94154d2f4b9a533a0b7bd8bbfdd6 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Thu, 11 Mar 2021 18:42:43 +0300
Subject: [PATCH 16/50] i'm experimenting

---
 target_score.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target_score.txt b/target_score.txt
index e91213a7..3de837b8 100644
--- a/target_score.txt
+++ b/target_score.txt
@@ -2,4 +2,4 @@
 10
 
 # Target score for pipeline.py:
-10
\ No newline at end of file
+0
\ No newline at end of file

From cf7e97ba541a4c1e2ca92d5ff3e3e9dcb169e182 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Thu, 11 Mar 2021 21:06:11 +0300
Subject: [PATCH 17/50] added stuff for 10, tests will fail?

---
 config/constants.py  |   1 +
 constants.py         |   2 +
 links/url_backup.txt | 320 +++++++++++++++++++++++++++++++++++++++++++
 scrapper.py          |  94 +++++++++++--
 4 files changed, 403 insertions(+), 14 deletions(-)
 create mode 100644 links/url_backup.txt

diff --git a/config/constants.py b/config/constants.py
index 12d85256..28a84b06 100644
--- a/config/constants.py
+++ b/config/constants.py
@@ -7,3 +7,4 @@
 PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__))
 ASSETS_PATH = os.path.join(PROJECT_ROOT, 'tmp', 'articles')
 CRAWLER_CONFIG_PATH = os.path.join(PROJECT_ROOT, 'crawler_config.json')
+LINKS_STORAGE = os.path.join(PROJECT_ROOT, 'links')
diff --git a/constants.py b/constants.py
index 12d85256..3dc98002 100644
--- a/constants.py
+++ b/constants.py
@@ -7,3 +7,5 @@
 PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__))
 ASSETS_PATH = os.path.join(PROJECT_ROOT, 'tmp', 'articles')
 CRAWLER_CONFIG_PATH = os.path.join(PROJECT_ROOT, 'crawler_config.json')
+LINKS_STORAGE = os.path.join(PROJECT_ROOT, 'links')
+URL_START = 'https://burunen.ru'
\ No newline at end of file
diff --git a/links/url_backup.txt b/links/url_backup.txt
new file mode 100644
index 00000000..d7bb92f5
--- /dev/null
+++ b/links/url_backup.txt
@@ -0,0 +1,320 @@
+/news/society/80553-pionery-buryatskogo-biznesa/
+/news/society/80527-zurkhay-na-10-marta-27-lunnyy-den/
+/news/society/80543-nakanune-10-marta-v-buryatii-ushel-iz-zhizni-eks-rektor-bgskha-aleksandr-popov/
+/news/society/80575-v-buryatii-startoval-federalnyy-proekt-chistaya-voda/
+/news/society/80548-v-stolitsu-buryatii-pribyl-arsen-fadzaev/
+/news/culture/80594-polozhenie-o-konkurse-rasskaza-2021/
+/news/society/80583-vystavka-okhotnichikh-laek-proshla-v-zakamenskom-rayone-buryatii/
+/news/society/80574-v-vuzakh-buryatii-poyavyatsya-prorektory-po-tsifrovizatsii/
+/news/society/80552-poyushchaya-garga-kak-zhivyet-poyushchee-selskoe-poselenie-v-kurumkane/
+/news/society/80572-zurkhay-na-11-marta-28-lunnyy-den/
+/news/society/80555-buryatiya-voshla-v-chislo-regionov-kotorye-podderzhali-pevitsu-manizhu/
+/news/society/80581-zhivaya-legenda-volnoy-borby-rossii-pribyl-v-buryatiyu-v-svoy-den-rozhdeniya/
+/news/society/80540-v-stolitsu-buryatii-nachali-pribyvat-uchastniki-i-gosti-chempionata-rossii-po-volnoy-borbe/
+/news/society/80542-stalo-izvestno-komu-iz-zhiteley-buryatii-udastsya-vyyti-na-pensiyu-dosrochno/
+/news/society/80602-v-buryatskom-sele-vydrino-otremontiruyut-detskuyu-shkolu-iskusstv/
+/news/society/80573-preimushchestvenno-bez-osadkov-dnyem-do-5-tepla-ozhidaetsya-v-buryatii-segodnya-11-marta-/
+/news/culture/79137-pamyati-geroev-rabotniki-sudebnoy-sistemy-zabaykalya-vypustili-knigu-vospominaniy-ob-uchastnikakh-vo/
+/news/society/80558-irkutskuyu-oblast-i-buryatiyu-vozmozhno-svyazhet-noveyshaya-elektrichka/
+/news/society/80535-v-buryatii-podveli-itogi-baykalskoy-mili/
+/news/society/80565-v-zaigraevskom-rayone-buryatii-otkrylas-novaya-shkola-na-450-mest/
+/news/society/80577-na-buryatiyu-obrushitsya-anomalnoe-poteplenie/
+/news/society/80584-v-buryatii-nachalis-meropriyatiya-po-profilaktike-ledyanykh-zatorov/
+/news/culture/80559-proverit-svoe-znanie-buryatskoy-grammatiki-smogut-zhiteli-buryatii/
+/news/culture/80475-dusha-v-tantse-studenty-kolledzha-iskusstv-dali-uroki-yekhora-dlya-zhurnalistov/
+/news/culture/80511-v-buryatii-otkrylas-vystavka-eksperimentalnogo-iskusstva-ii-sulde/
+/news/culture/80515-reper-iz-buryatii-zapisal-klip-s-pevitsey-kotoraya-vystupit-na-evrovidenii/
+/news/culture/80387-smysly-zurkhaya-geshe-tsyren-lama-o-tonkostyakh-buddiyskoy-astrologii-/
+/news/culture/80388-v-ulan-ude-sostoyalas-premera-opery-knyazya-igorya/
+/news/culture/80384-v-ulan-ude-vozvrashchaetsya-festival-uu-sound-/
+/news/culture/80521-v-ulan-ude-predstavili-dokumentalnyy-film-o-buryatskom-kostyume/
+/news/culture/80390-yunye-tsirkovye-artisty-buryatii-vpervye-vystupyat-v-tyumenskom-tsirke/
+/news/culture/80416-buryatskiy-ansambl-bulzhamuur-stal-laureatom-vserossiyskogo-festivalya/
+/news/culture/80592-v-ulan-ude-proshyel-kontsert-etnicheskoy-muzyki/
+/news/culture/80560-v-ulan-ude-nachinaet-svoyu-rabotu-etnokovorking/
+/news/culture/80536-knyaz-igor-v-buryatii-otkuda-v-stepi-drevnerusskaya-grust/
+/news/culture/80509-sezon-dozhdey-dadut-kontsert-v-ulan-ude/
+/news/culture/80557-koster-na-glavnoy-ploshchadi-buryatiya-gotovitsya-k-maslenitse/
+/news/culture/80522-v-ulan-ude-proshla-vstrecha-vesny-kak-zavershenie-vostochnogo-novogo-goda/
+/news/culture/80397-muzhchina-iz-severobaykalska-poluchil-realnyy-srok-za-povtornuyu-ezdu-v-pyanom-vide/
+/news/culture/80420-v-buryatii-izdali-knigu-minii-nyutag/
+/news/culture/80389-artisty-teatra-baykal-vyydut-v-efir-radio-mayak/
+/news/economy/80551-v-buryatii-rastet-spros-na-novye-avtomobili/
+/news/economy/80400-agrarii-buryatii-vernuli-chast-zemel-merii-ulan-ude/
+/news/economy/80462-v-buryatii-otremontiruyut-dorogu-v-posele/
+/news/economy/80447-biznes-buryatii-poluchit-vozmozhnosti-opravitsya-ot-posledstviy-pandemii/
+/news/economy/80549-prognoznyy-poleznyy-otpusk-elektroenergii-i-moshchnosti-po-tarifnym-gruppam/
+/news/economy/80427-v-buryatii-proydet-zasedanie-soveta-direktorov-kholdinga-vertolyety-rossii/
+/news/economy/80506-v-buryatii-otremontiruyut-odnu-iz-samykh-populyarnykh-avtodorog/
+/news/economy/80429-buryatiya-poluchit-okolo-3-milliardov-rubley-na-dorogi/
+/news/economy/80415-v-ulan-ude-startoval-akselerator-proektov-upravlentsev-buryatii/
+/news/economy/80432-buryatiya-poluchit-bolee-28-mln-rubley-na-podderzhku-proektov-sotsialno-orientirovannykh-organizatsi/
+/news/economy/80413-usloviya-dalnevostochnaya-ipoteka-namereny-uluchshit-v-buryatii-i-po-vsemu-dfo/
+/news/economy/80340-tunkinskiy-rayon-buryatii-poluchit-sredstva-na-razvitie-turizma/
+/news/economy/80435-ivan-alkheev-naznachen-zampredom-pravitelstva-buryatii/
+/news/economy/80433-na-ulan-udenskom-aviazavode-vpervye-proshlo-zasedanie-soveta-direktorov-vertolety-rossii/
+/news/economy/80317-bolee-2-tysyach-zhiteley-buryatii-prinyali-uchastie-v-biznes-vstreche-s-alekseem-tsydenovym/
+/news/economy/80478-produktsiya-iz-buryatii-vyshla-na-rynok-germanii/
+/news/economy/80412-buryatiya-priobretet-paket-aktsiy-avrora/
+/news/economy/80488-v-buryatii-razdeli-mestorozhdenie-urana/
+/news/economy/80344-pereezd-na-dalniy-vostok-pozvolit-buryatii-reanimirovat-dva-krupnykh-proekta/
+/news/economy/80491-buryatiya-voshla-v-zonu-modernizatsii-zheleznykh-dorog/
+/news/sports/80463-vdokhnovlyayushchie-rezultaty-novye-vzlyety-khudozhestvennoy-gimnastiki-buryatii/
+/news/sports/80600-v-kabanskom-rayone-buryatii-otkryli-sportivnuyu-ploshchadku/
+/news/sports/80546-studentki-iz-buryatii-na-pedestale-rossiyskogo-urovnya/
+/news/sports/80331-boksery-iz-buryatii-zavoevali-shest-medaley-na-vserossiyskom-turnire-klassa-a/
+/news/sports/80520-final-kubka-buryatii-ne-doigrali-no-pobeditelya-opredelili/
+/news/sports/80578-na-chempionate-rossii-v-ulan-ude-103-bortsa-opredelyat-pervykh-finalistov-obnovlyaetsya/
+/news/sports/80477-borets-iz-buryatii-rasskazal-kak-borolsya-za-ameriku/
+/news/sports/80351-boytsy-federatsii-pankrationa-buryatii-vystupili-na-urovne-dfo/
+/news/sports/80601-ministr-sporta-buryatii-chempionat-rossii-dast-novyy-impuls-razvitiyu-volnoy-borby-v-respublike/
+/news/sports/80507-buryatiya-utverdila-okonchatelnyy-sostav-sbornoy-dlya-uchastiya-v-chempionate-rossii-po-volnoy-borbe/
+/news/sports/80494-borits-iz-buryatii-zakryli-na-karantin-v-rime-/
+/news/sports/80472-na-lyzhakh-s-lukom-buryatskaya-sportsmenka-vyigrala-chetyre-medali-na-pervenstve-rossii/
+/news/sports/80547-beskompromissnye-igry-buryatskikh-shakhmatistov-na-dalnem-vostoke/
+/news/sports/80354-pervyy-bortsovskiy-internat-buryatii-vzyal-na-vooruzhenie-igru-go/
+/news/sports/80568-v-stolitse-buryatii-proshla-pervaya-zherebevka-chempionata-rossii-po-volnoy-borbe/
+/news/sports/80525-glavnyy-favorit-gryadushchego-chempionata-rossii-v-buryatii-ozvuchil-svoy-sostav/
+/news/sports/80470-v-buryatii-gryadet-bitva-titanov/
+/news/sports/80452-sportsmeny-iz-eravninskogo-rayona-buryatii-oderzhali-dve-pobedy/
+/news/sports/80345-znamenityy-rossiyskiy-futbolist-vyigral-baykalskiy-marafon-v-buryatii/
+/news/incidents/80181-v-buryatii-stali-bolshe-pit/
+/news/incidents/80467-v-buryatii-vyyavili-pyat-narusheniy-protivoepidemicheskikh-mer/
+/news/incidents/80408-byvshemu-rukovoditelyu-energeticheskoy-kompanii-iz-buryatii-inkriminiruyut-sozdanie-opg/
+/news/incidents/80517-kommunisty-buryatii-obvinili-organizatorov-baykalskoy-mili/
+/news/incidents/80453-v-kabanskom-rayone-buryatii-zhiteli-zamerzayut-v-svoikh-domakh-/
+/news/incidents/80461-v-bichure-podtopilo-pushkina-/
+/news/incidents/80226-v-buryatii-vynesli-verdikt-po-delu-o-napadenii-sobak-na-pervoklassnika/
+/news/incidents/80471-v-belorussii-izbili-futbolnogo-trenera-iz-buryatii/
+/news/incidents/80530-delo-o-zaderzhanii-brakonerov-v-buryatii-prokommentiroval-skr-/
+/news/incidents/80411-na-baykale-zamorozili-demontazh-zavoda-po-rozlivu-baykalskoy-vody/
+/news/incidents/80399-vrachi-rasskazali-o-sostoyanii-rebenka-ranennogo-v-buryatii/
+/news/incidents/80545-v-buryatii-pri-pozhare-v-zhilom-dome-spaslis-mat-i-dvoe-detey/
+/news/incidents/80582-v-kurumkanskom-rayone-buryatii-zaderzhan-pokhititel-myasa/
+/news/incidents/80349-musornaya-reforma-v-buryatii-stala-povodom-dlya-politicheskikh-sporov/
+/news/incidents/80380-v-buryatii-snova-otlichilsya-voditel-leksusa/
+/news/incidents/80422-sledstvennyy-komitet-po-buryatii-dal-kommentariy-po-povodu-zaderzhaniya-sergeya-ivanova/
+/news/incidents/80191-intsident-na-baykale-pod-kontrolem-pravitelstva-buryatii/
+/news/incidents/80326-v-buryatii-obyasnili-prichiny-tramvaynogo-kollapsa-v-ulan-ude/
+/news/incidents/80529-materialnyy-ushcherb-po-faktu-gibeli-podrostkov-v-buryatii-vzyshchut-s-vladeltsa-avto/
+/news/incidents/80492-v-buryatii-obvinyayut-inspektora-kotoryy-zaderzhal-vliyatelnykh-brakonerov-/
+/news/politics/80591-ministr-sporta-buryatii-otmetil-naplyv-imenitykh-sportsmenov-v-dni-chempionata-rossii-po-volnoy-borb/
+/news/politics/80588-glava-buryatii-prinyal-uchastie-v-soveshchanii-generalnogo-direktora-oao-rzhd//news/society/80553-pionery-buryatskogo-biznesa/
+/news/society/80527-zurkhay-na-10-marta-27-lunnyy-den/
+/news/society/80543-nakanune-10-marta-v-buryatii-ushel-iz-zhizni-eks-rektor-bgskha-aleksandr-popov/
+/news/society/80575-v-buryatii-startoval-federalnyy-proekt-chistaya-voda/
+/news/society/80548-v-stolitsu-buryatii-pribyl-arsen-fadzaev/
+/news/culture/80594-polozhenie-o-konkurse-rasskaza-2021/
+/news/society/80583-vystavka-okhotnichikh-laek-proshla-v-zakamenskom-rayone-buryatii/
+/news/society/80574-v-vuzakh-buryatii-poyavyatsya-prorektory-po-tsifrovizatsii/
+/news/society/80552-poyushchaya-garga-kak-zhivyet-poyushchee-selskoe-poselenie-v-kurumkane/
+/news/society/80572-zurkhay-na-11-marta-28-lunnyy-den/
+/news/society/80555-buryatiya-voshla-v-chislo-regionov-kotorye-podderzhali-pevitsu-manizhu/
+/news/society/80581-zhivaya-legenda-volnoy-borby-rossii-pribyl-v-buryatiyu-v-svoy-den-rozhdeniya/
+/news/society/80540-v-stolitsu-buryatii-nachali-pribyvat-uchastniki-i-gosti-chempionata-rossii-po-volnoy-borbe/
+/news/society/80542-stalo-izvestno-komu-iz-zhiteley-buryatii-udastsya-vyyti-na-pensiyu-dosrochno/
+/news/society/80602-v-buryatskom-sele-vydrino-otremontiruyut-detskuyu-shkolu-iskusstv/
+/news/society/80573-preimushchestvenno-bez-osadkov-dnyem-do-5-tepla-ozhidaetsya-v-buryatii-segodnya-11-marta-/
+/news/culture/79137-pamyati-geroev-rabotniki-sudebnoy-sistemy-zabaykalya-vypustili-knigu-vospominaniy-ob-uchastnikakh-vo/
+/news/society/80558-irkutskuyu-oblast-i-buryatiyu-vozmozhno-svyazhet-noveyshaya-elektrichka/
+/news/society/80535-v-buryatii-podveli-itogi-baykalskoy-mili/
+/news/society/80565-v-zaigraevskom-rayone-buryatii-otkrylas-novaya-shkola-na-450-mest/
+/news/society/80577-na-buryatiyu-obrushitsya-anomalnoe-poteplenie/
+/news/society/80584-v-buryatii-nachalis-meropriyatiya-po-profilaktike-ledyanykh-zatorov/
+/news/culture/80559-proverit-svoe-znanie-buryatskoy-grammatiki-smogut-zhiteli-buryatii/
+/news/culture/80475-dusha-v-tantse-studenty-kolledzha-iskusstv-dali-uroki-yekhora-dlya-zhurnalistov/
+/news/culture/80511-v-buryatii-otkrylas-vystavka-eksperimentalnogo-iskusstva-ii-sulde/
+/news/culture/80515-reper-iz-buryatii-zapisal-klip-s-pevitsey-kotoraya-vystupit-na-evrovidenii/
+/news/culture/80387-smysly-zurkhaya-geshe-tsyren-lama-o-tonkostyakh-buddiyskoy-astrologii-/
+/news/culture/80388-v-ulan-ude-sostoyalas-premera-opery-knyazya-igorya/
+/news/culture/80384-v-ulan-ude-vozvrashchaetsya-festival-uu-sound-/
+/news/culture/80521-v-ulan-ude-predstavili-dokumentalnyy-film-o-buryatskom-kostyume/
+/news/culture/80390-yunye-tsirkovye-artisty-buryatii-vpervye-vystupyat-v-tyumenskom-tsirke/
+/news/culture/80416-buryatskiy-ansambl-bulzhamuur-stal-laureatom-vserossiyskogo-festivalya/
+/news/culture/80592-v-ulan-ude-proshyel-kontsert-etnicheskoy-muzyki/
+/news/culture/80560-v-ulan-ude-nachinaet-svoyu-rabotu-etnokovorking/
+/news/culture/80536-knyaz-igor-v-buryatii-otkuda-v-stepi-drevnerusskaya-grust/
+/news/culture/80509-sezon-dozhdey-dadut-kontsert-v-ulan-ude/
+/news/culture/80557-koster-na-glavnoy-ploshchadi-buryatiya-gotovitsya-k-maslenitse/
+/news/culture/80522-v-ulan-ude-proshla-vstrecha-vesny-kak-zavershenie-vostochnogo-novogo-goda/
+/news/culture/80397-muzhchina-iz-severobaykalska-poluchil-realnyy-srok-za-povtornuyu-ezdu-v-pyanom-vide/
+/news/culture/80420-v-buryatii-izdali-knigu-minii-nyutag/
+/news/culture/80389-artisty-teatra-baykal-vyydut-v-efir-radio-mayak/
+/news/economy/80551-v-buryatii-rastet-spros-na-novye-avtomobili/
+/news/economy/80400-agrarii-buryatii-vernuli-chast-zemel-merii-ulan-ude/
+/news/economy/80462-v-buryatii-otremontiruyut-dorogu-v-posele/
+/news/economy/80447-biznes-buryatii-poluchit-vozmozhnosti-opravitsya-ot-posledstviy-pandemii/
+/news/economy/80549-prognoznyy-poleznyy-otpusk-elektroenergii-i-moshchnosti-po-tarifnym-gruppam/
+/news/economy/80427-v-buryatii-proydet-zasedanie-soveta-direktorov-kholdinga-vertolyety-rossii/
+/news/economy/80506-v-buryatii-otremontiruyut-odnu-iz-samykh-populyarnykh-avtodorog/
+/news/economy/80429-buryatiya-poluchit-okolo-3-milliardov-rubley-na-dorogi/
+/news/economy/80415-v-ulan-ude-startoval-akselerator-proektov-upravlentsev-buryatii/
+/news/economy/80432-buryatiya-poluchit-bolee-28-mln-rubley-na-podderzhku-proektov-sotsialno-orientirovannykh-organizatsi/
+/news/economy/80413-usloviya-dalnevostochnaya-ipoteka-namereny-uluchshit-v-buryatii-i-po-vsemu-dfo/
+/news/economy/80340-tunkinskiy-rayon-buryatii-poluchit-sredstva-na-razvitie-turizma/
+/news/economy/80435-ivan-alkheev-naznachen-zampredom-pravitelstva-buryatii/
+/news/economy/80433-na-ulan-udenskom-aviazavode-vpervye-proshlo-zasedanie-soveta-direktorov-vertolety-rossii/
+/news/economy/80317-bolee-2-tysyach-zhiteley-buryatii-prinyali-uchastie-v-biznes-vstreche-s-alekseem-tsydenovym/
+/news/economy/80478-produktsiya-iz-buryatii-vyshla-na-rynok-germanii/
+/news/economy/80412-buryatiya-priobretet-paket-aktsiy-avrora/
+/news/economy/80488-v-buryatii-razdeli-mestorozhdenie-urana/
+/news/economy/80344-pereezd-na-dalniy-vostok-pozvolit-buryatii-reanimirovat-dva-krupnykh-proekta/
+/news/economy/80491-buryatiya-voshla-v-zonu-modernizatsii-zheleznykh-dorog/
+/news/sports/80463-vdokhnovlyayushchie-rezultaty-novye-vzlyety-khudozhestvennoy-gimnastiki-buryatii/
+/news/sports/80600-v-kabanskom-rayone-buryatii-otkryli-sportivnuyu-ploshchadku/
+/news/sports/80546-studentki-iz-buryatii-na-pedestale-rossiyskogo-urovnya/
+/news/sports/80331-boksery-iz-buryatii-zavoevali-shest-medaley-na-vserossiyskom-turnire-klassa-a/
+/news/sports/80520-final-kubka-buryatii-ne-doigrali-no-pobeditelya-opredelili/
+/news/sports/80578-na-chempionate-rossii-v-ulan-ude-103-bortsa-opredelyat-pervykh-finalistov-obnovlyaetsya/
+/news/sports/80477-borets-iz-buryatii-rasskazal-kak-borolsya-za-ameriku/
+/news/sports/80351-boytsy-federatsii-pankrationa-buryatii-vystupili-na-urovne-dfo/
+/news/sports/80601-ministr-sporta-buryatii-chempionat-rossii-dast-novyy-impuls-razvitiyu-volnoy-borby-v-respublike/
+/news/sports/80507-buryatiya-utverdila-okonchatelnyy-sostav-sbornoy-dlya-uchastiya-v-chempionate-rossii-po-volnoy-borbe/
+/news/sports/80494-borits-iz-buryatii-zakryli-na-karantin-v-rime-/
+/news/sports/80472-na-lyzhakh-s-lukom-buryatskaya-sportsmenka-vyigrala-chetyre-medali-na-pervenstve-rossii/
+/news/sports/80547-beskompromissnye-igry-buryatskikh-shakhmatistov-na-dalnem-vostoke/
+/news/sports/80354-pervyy-bortsovskiy-internat-buryatii-vzyal-na-vooruzhenie-igru-go/
+/news/sports/80568-v-stolitse-buryatii-proshla-pervaya-zherebevka-chempionata-rossii-po-volnoy-borbe/
+/news/sports/80525-glavnyy-favorit-gryadushchego-chempionata-rossii-v-buryatii-ozvuchil-svoy-sostav/
+/news/sports/80470-v-buryatii-gryadet-bitva-titanov/
+/news/sports/80452-sportsmeny-iz-eravninskogo-rayona-buryatii-oderzhali-dve-pobedy/
+/news/sports/80345-znamenityy-rossiyskiy-futbolist-vyigral-baykalskiy-marafon-v-buryatii/
+/news/incidents/80181-v-buryatii-stali-bolshe-pit/
+/news/incidents/80467-v-buryatii-vyyavili-pyat-narusheniy-protivoepidemicheskikh-mer/
+/news/incidents/80408-byvshemu-rukovoditelyu-energeticheskoy-kompanii-iz-buryatii-inkriminiruyut-sozdanie-opg/
+/news/incidents/80517-kommunisty-buryatii-obvinili-organizatorov-baykalskoy-mili/
+/news/incidents/80453-v-kabanskom-rayone-buryatii-zhiteli-zamerzayut-v-svoikh-domakh-/
+/news/incidents/80461-v-bichure-podtopilo-pushkina-/
+/news/incidents/80226-v-buryatii-vynesli-verdikt-po-delu-o-napadenii-sobak-na-pervoklassnika/
+/news/incidents/80471-v-belorussii-izbili-futbolnogo-trenera-iz-buryatii/
+/news/incidents/80530-delo-o-zaderzhanii-brakonerov-v-buryatii-prokommentiroval-skr-/
+/news/incidents/80411-na-baykale-zamorozili-demontazh-zavoda-po-rozlivu-baykalskoy-vody/
+/news/incidents/80399-vrachi-rasskazali-o-sostoyanii-rebenka-ranennogo-v-buryatii/
+/news/incidents/80545-v-buryatii-pri-pozhare-v-zhilom-dome-spaslis-mat-i-dvoe-detey/
+/news/incidents/80582-v-kurumkanskom-rayone-buryatii-zaderzhan-pokhititel-myasa/
+/news/incidents/80349-musornaya-reforma-v-buryatii-stala-povodom-dlya-politicheskikh-sporov/
+/news/incidents/80380-v-buryatii-snova-otlichilsya-voditel-leksusa/
+/news/incidents/80422-sledstvennyy-komitet-po-buryatii-dal-kommentariy-po-povodu-zaderzhaniya-sergeya-ivanova/
+/news/incidents/80191-intsident-na-baykale-pod-kontrolem-pravitelstva-buryatii/
+/news/incidents/80326-v-buryatii-obyasnili-prichiny-tramvaynogo-kollapsa-v-ulan-ude/
+/news/incidents/80529-materialnyy-ushcherb-po-faktu-gibeli-podrostkov-v-buryatii-vzyshchut-s-vladeltsa-avto/
+/news/incidents/80492-v-buryatii-obvinyayut-inspektora-kotoryy-zaderzhal-vliyatelnykh-brakonerov-/
+/news/politics/80591-ministr-sporta-buryatii-otmetil-naplyv-imenitykh-sportsmenov-v-dni-chempionata-rossii-po-volnoy-borb/
+/news/politics/80588-glava-buryatii-prinyal-uchastie-v-soveshchanii-generalnogo-direktora-oao-rzhd/
+/news/politics/80590-glava-buryatii-vyekhal-s-rabochey-poezdkoy-v-mukhorshibirskiy-rayon/
+/news/politics/80498-aleksey-tsydenov-pozdravil-vsekh-zhenshchin-buryatii-s-8-marta-/
+/news/politics/80538-delo-o-zaderzhanii-brakonerov-v-buryatii-poruchil-vzyat-pod-kontrol-yuriy-trutnev/
+/news/politics/80480-glava-buryatii-vmeste-s-poslom-izrailya-v-rossii-pochtili-pamyat-geroev-vov/
+/news/politics/80482-glava-buryatii-predlozhil-poslu-izrailya-sotrudnichestvo-v-sfere-turizma-i-meditsiny/
+/news/politics/80436-glava-buryatii-i-prezident-tatarstana-obsudili-sotrudnichestvo-mezhdu-regionami/
+/news/politics/80456-glavnyy-kommunist-buryatii-ne-voshyel-v-short-list-ot-kprf/
+/news/politics/80508-glava-buryatii-nagradil-laureatov-gosudarstvennykh-premiy-respubliki-v-sfere-kultury-i-iskusstva//news/society/80553-pionery-buryatskogo-biznesa/
+/news/society/80527-zurkhay-na-10-marta-27-lunnyy-den/
+/news/society/80543-nakanune-10-marta-v-buryatii-ushel-iz-zhizni-eks-rektor-bgskha-aleksandr-popov/
+/news/society/80575-v-buryatii-startoval-federalnyy-proekt-chistaya-voda/
+/news/society/80548-v-stolitsu-buryatii-pribyl-arsen-fadzaev/
+/news/culture/80594-polozhenie-o-konkurse-rasskaza-2021/
+/news/society/80583-vystavka-okhotnichikh-laek-proshla-v-zakamenskom-rayone-buryatii/
+/news/society/80574-v-vuzakh-buryatii-poyavyatsya-prorektory-po-tsifrovizatsii/
+/news/society/80552-poyushchaya-garga-kak-zhivyet-poyushchee-selskoe-poselenie-v-kurumkane/
+/news/society/80572-zurkhay-na-11-marta-28-lunnyy-den/
+/news/society/80555-buryatiya-voshla-v-chislo-regionov-kotorye-podderzhali-pevitsu-manizhu/
+/news/society/80581-zhivaya-legenda-volnoy-borby-rossii-pribyl-v-buryatiyu-v-svoy-den-rozhdeniya/
+/news/society/80540-v-stolitsu-buryatii-nachali-pribyvat-uchastniki-i-gosti-chempionata-rossii-po-volnoy-borbe/
+/news/society/80542-stalo-izvestno-komu-iz-zhiteley-buryatii-udastsya-vyyti-na-pensiyu-dosrochno/
+/news/society/80602-v-buryatskom-sele-vydrino-otremontiruyut-detskuyu-shkolu-iskusstv/
+/news/society/80573-preimushchestvenno-bez-osadkov-dnyem-do-5-tepla-ozhidaetsya-v-buryatii-segodnya-11-marta-/
+/news/culture/79137-pamyati-geroev-rabotniki-sudebnoy-sistemy-zabaykalya-vypustili-knigu-vospominaniy-ob-uchastnikakh-vo/
+/news/society/80558-irkutskuyu-oblast-i-buryatiyu-vozmozhno-svyazhet-noveyshaya-elektrichka/
+/news/society/80535-v-buryatii-podveli-itogi-baykalskoy-mili/
+/news/society/80565-v-zaigraevskom-rayone-buryatii-otkrylas-novaya-shkola-na-450-mest/
+/news/society/80577-na-buryatiyu-obrushitsya-anomalnoe-poteplenie/
+/news/society/80584-v-buryatii-nachalis-meropriyatiya-po-profilaktike-ledyanykh-zatorov/
+/news/culture/80559-proverit-svoe-znanie-buryatskoy-grammatiki-smogut-zhiteli-buryatii/
+/news/culture/80475-dusha-v-tantse-studenty-kolledzha-iskusstv-dali-uroki-yekhora-dlya-zhurnalistov/
+/news/culture/80511-v-buryatii-otkrylas-vystavka-eksperimentalnogo-iskusstva-ii-sulde/
+/news/culture/80515-reper-iz-buryatii-zapisal-klip-s-pevitsey-kotoraya-vystupit-na-evrovidenii/
+/news/culture/80387-smysly-zurkhaya-geshe-tsyren-lama-o-tonkostyakh-buddiyskoy-astrologii-/
+/news/culture/80388-v-ulan-ude-sostoyalas-premera-opery-knyazya-igorya/
+/news/culture/80384-v-ulan-ude-vozvrashchaetsya-festival-uu-sound-/
+/news/culture/80521-v-ulan-ude-predstavili-dokumentalnyy-film-o-buryatskom-kostyume/
+/news/culture/80390-yunye-tsirkovye-artisty-buryatii-vpervye-vystupyat-v-tyumenskom-tsirke/
+/news/culture/80416-buryatskiy-ansambl-bulzhamuur-stal-laureatom-vserossiyskogo-festivalya/
+/news/culture/80592-v-ulan-ude-proshyel-kontsert-etnicheskoy-muzyki/
+/news/culture/80560-v-ulan-ude-nachinaet-svoyu-rabotu-etnokovorking/
+/news/culture/80536-knyaz-igor-v-buryatii-otkuda-v-stepi-drevnerusskaya-grust/
+/news/culture/80509-sezon-dozhdey-dadut-kontsert-v-ulan-ude/
+/news/culture/80557-koster-na-glavnoy-ploshchadi-buryatiya-gotovitsya-k-maslenitse/
+/news/culture/80522-v-ulan-ude-proshla-vstrecha-vesny-kak-zavershenie-vostochnogo-novogo-goda/
+/news/culture/80397-muzhchina-iz-severobaykalska-poluchil-realnyy-srok-za-povtornuyu-ezdu-v-pyanom-vide/
+/news/culture/80420-v-buryatii-izdali-knigu-minii-nyutag/
+/news/culture/80389-artisty-teatra-baykal-vyydut-v-efir-radio-mayak/
+/news/economy/80551-v-buryatii-rastet-spros-na-novye-avtomobili/
+/news/economy/80400-agrarii-buryatii-vernuli-chast-zemel-merii-ulan-ude/
+/news/economy/80462-v-buryatii-otremontiruyut-dorogu-v-posele/
+/news/economy/80447-biznes-buryatii-poluchit-vozmozhnosti-opravitsya-ot-posledstviy-pandemii/
+/news/economy/80549-prognoznyy-poleznyy-otpusk-elektroenergii-i-moshchnosti-po-tarifnym-gruppam/
+/news/economy/80427-v-buryatii-proydet-zasedanie-soveta-direktorov-kholdinga-vertolyety-rossii/
+/news/economy/80506-v-buryatii-otremontiruyut-odnu-iz-samykh-populyarnykh-avtodorog/
+/news/economy/80429-buryatiya-poluchit-okolo-3-milliardov-rubley-na-dorogi/
+/news/economy/80415-v-ulan-ude-startoval-akselerator-proektov-upravlentsev-buryatii/
+/news/economy/80432-buryatiya-poluchit-bolee-28-mln-rubley-na-podderzhku-proektov-sotsialno-orientirovannykh-organizatsi/
+/news/economy/80413-usloviya-dalnevostochnaya-ipoteka-namereny-uluchshit-v-buryatii-i-po-vsemu-dfo/
+/news/economy/80340-tunkinskiy-rayon-buryatii-poluchit-sredstva-na-razvitie-turizma/
+/news/economy/80435-ivan-alkheev-naznachen-zampredom-pravitelstva-buryatii/
+/news/economy/80433-na-ulan-udenskom-aviazavode-vpervye-proshlo-zasedanie-soveta-direktorov-vertolety-rossii/
+/news/economy/80317-bolee-2-tysyach-zhiteley-buryatii-prinyali-uchastie-v-biznes-vstreche-s-alekseem-tsydenovym/
+/news/economy/80478-produktsiya-iz-buryatii-vyshla-na-rynok-germanii/
+/news/economy/80412-buryatiya-priobretet-paket-aktsiy-avrora/
+/news/economy/80488-v-buryatii-razdeli-mestorozhdenie-urana/
+/news/economy/80344-pereezd-na-dalniy-vostok-pozvolit-buryatii-reanimirovat-dva-krupnykh-proekta/
+/news/economy/80491-buryatiya-voshla-v-zonu-modernizatsii-zheleznykh-dorog/
+/news/sports/80463-vdokhnovlyayushchie-rezultaty-novye-vzlyety-khudozhestvennoy-gimnastiki-buryatii/
+/news/sports/80600-v-kabanskom-rayone-buryatii-otkryli-sportivnuyu-ploshchadku/
+/news/sports/80546-studentki-iz-buryatii-na-pedestale-rossiyskogo-urovnya/
+/news/sports/80331-boksery-iz-buryatii-zavoevali-shest-medaley-na-vserossiyskom-turnire-klassa-a/
+/news/sports/80520-final-kubka-buryatii-ne-doigrali-no-pobeditelya-opredelili/
+/news/sports/80578-na-chempionate-rossii-v-ulan-ude-103-bortsa-opredelyat-pervykh-finalistov-obnovlyaetsya/
+/news/sports/80477-borets-iz-buryatii-rasskazal-kak-borolsya-za-ameriku/
+/news/sports/80351-boytsy-federatsii-pankrationa-buryatii-vystupili-na-urovne-dfo/
+/news/sports/80601-ministr-sporta-buryatii-chempionat-rossii-dast-novyy-impuls-razvitiyu-volnoy-borby-v-respublike/
+/news/sports/80507-buryatiya-utverdila-okonchatelnyy-sostav-sbornoy-dlya-uchastiya-v-chempionate-rossii-po-volnoy-borbe/
+/news/sports/80494-borits-iz-buryatii-zakryli-na-karantin-v-rime-/
+/news/sports/80472-na-lyzhakh-s-lukom-buryatskaya-sportsmenka-vyigrala-chetyre-medali-na-pervenstve-rossii/
+/news/sports/80547-beskompromissnye-igry-buryatskikh-shakhmatistov-na-dalnem-vostoke/
+/news/sports/80354-pervyy-bortsovskiy-internat-buryatii-vzyal-na-vooruzhenie-igru-go/
+/news/sports/80568-v-stolitse-buryatii-proshla-pervaya-zherebevka-chempionata-rossii-po-volnoy-borbe/
+/news/sports/80525-glavnyy-favorit-gryadushchego-chempionata-rossii-v-buryatii-ozvuchil-svoy-sostav/
+/news/sports/80470-v-buryatii-gryadet-bitva-titanov/
+/news/sports/80452-sportsmeny-iz-eravninskogo-rayona-buryatii-oderzhali-dve-pobedy/
+/news/sports/80345-znamenityy-rossiyskiy-futbolist-vyigral-baykalskiy-marafon-v-buryatii/
+/news/incidents/80181-v-buryatii-stali-bolshe-pit/
+/news/incidents/80467-v-buryatii-vyyavili-pyat-narusheniy-protivoepidemicheskikh-mer/
+/news/incidents/80408-byvshemu-rukovoditelyu-energeticheskoy-kompanii-iz-buryatii-inkriminiruyut-sozdanie-opg/
+/news/incidents/80517-kommunisty-buryatii-obvinili-organizatorov-baykalskoy-mili/
+/news/incidents/80453-v-kabanskom-rayone-buryatii-zhiteli-zamerzayut-v-svoikh-domakh-/
+/news/incidents/80461-v-bichure-podtopilo-pushkina-/
+/news/incidents/80226-v-buryatii-vynesli-verdikt-po-delu-o-napadenii-sobak-na-pervoklassnika/
+/news/incidents/80471-v-belorussii-izbili-futbolnogo-trenera-iz-buryatii/
+/news/incidents/80530-delo-o-zaderzhanii-brakonerov-v-buryatii-prokommentiroval-skr-/
+/news/incidents/80411-na-baykale-zamorozili-demontazh-zavoda-po-rozlivu-baykalskoy-vody/
+/news/incidents/80399-vrachi-rasskazali-o-sostoyanii-rebenka-ranennogo-v-buryatii/
+/news/incidents/80545-v-buryatii-pri-pozhare-v-zhilom-dome-spaslis-mat-i-dvoe-detey/
+/news/incidents/80582-v-kurumkanskom-rayone-buryatii-zaderzhan-pokhititel-myasa/
+/news/incidents/80349-musornaya-reforma-v-buryatii-stala-povodom-dlya-politicheskikh-sporov/
+/news/incidents/80380-v-buryatii-snova-otlichilsya-voditel-leksusa/
+/news/incidents/80422-sledstvennyy-komitet-po-buryatii-dal-kommentariy-po-povodu-zaderzhaniya-sergeya-ivanova/
+/news/incidents/80191-intsident-na-baykale-pod-kontrolem-pravitelstva-buryatii/
+/news/incidents/80326-v-buryatii-obyasnili-prichiny-tramvaynogo-kollapsa-v-ulan-ude/
+/news/incidents/80529-materialnyy-ushcherb-po-faktu-gibeli-podrostkov-v-buryatii-vzyshchut-s-vladeltsa-avto/
+/news/incidents/80492-v-buryatii-obvinyayut-inspektora-kotoryy-zaderzhal-vliyatelnykh-brakonerov-/
+/news/politics/80591-ministr-sporta-buryatii-otmetil-naplyv-imenitykh-sportsmenov-v-dni-chempionata-rossii-po-volnoy-borb/
+/news/politics/80588-glava-buryatii-prinyal-uchastie-v-soveshchanii-generalnogo-direktora-oao-rzhd/
+/news/politics/80590-glava-buryatii-vyekhal-s-rabochey-poezdkoy-v-mukhorshibirskiy-rayon/
+/news/politics/80498-aleksey-tsydenov-pozdravil-vsekh-zhenshchin-buryatii-s-8-marta-/
+/news/politics/80538-delo-o-zaderzhanii-brakonerov-v-buryatii-poruchil-vzyat-pod-kontrol-yuriy-trutnev/
+/news/politics/80480-glava-buryatii-vmeste-s-poslom-izrailya-v-rossii-pochtili-pamyat-geroev-vov/
+/news/politics/80482-glava-buryatii-predlozhil-poslu-izrailya-sotrudnichestvo-v-sfere-turizma-i-meditsiny/
+/news/politics/80436-glava-buryatii-i-prezident-tatarstana-obsudili-sotrudnichestvo-mezhdu-regionami/
+/news/politics/80456-glavnyy-kommunist-buryatii-ne-voshyel-v-short-list-ot-kprf/
+/news/politics/80508-glava-buryatii-nagradil-laureatov-gosudarstvennykh-premiy-respubliki-v-sfere-kultury-i-iskusstva/
\ No newline at end of file
diff --git a/scrapper.py b/scrapper.py
index 7058e47d..a2d86e41 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -8,9 +8,10 @@
 import requests
 from requests.exceptions import RequestException
 from bs4 import BeautifulSoup
-# from time import sleep as wait
+from time import sleep as wait
+from random import randint
 from article import Article
-from constants import CRAWLER_CONFIG_PATH, ASSETS_PATH
+from constants import CRAWLER_CONFIG_PATH, ASSETS_PATH, LINKS_STORAGE, URL_START
 
 
 class IncorrectURLError(Exception):
@@ -47,8 +48,6 @@ def __init__(self, seed_urls: list, total_max_articles: int, max_articles_per_se
         self.max_articles_per_seed = max_articles_per_seed
         self.urls = []
 
-        self.url_start = 'https://burunen.ru'
-
     @staticmethod
     def _extract_url(article_bs, seen):
         extracted = list({link['href'] for link in article_bs.find_all('a', href=True)})
@@ -68,13 +67,12 @@ def find_articles(self):
         self.urls = [i for i in self.urls if len(i) > 20
                      and not any(map(lambda y: y.isupper(), i))][:self.total_max_articles]
         print('Scraped seed urls, overall number of urls is', len(self.urls))
-
         old = len(self.urls)
         while len(self.urls) < self.total_max_articles:
             print('Due to insufficient number started further iteration')
             print('current number', len(self.urls), ', required', self.total_max_articles)
             for link in self.urls:
-                article_bs = BeautifulSoup(requests.get(self.url_start + link, 'html.parser').text, 'html.parser')
+                article_bs = BeautifulSoup(requests.get(URL_START + link, 'html.parser').text, 'html.parser')
                 newfound = list(filter(lambda x: len(x) > 20, self._extract_url(article_bs, self.urls)))
                 print('    checked new url, found', len(newfound), 'articles')
                 self.urls.extend(newfound[:self.max_articles_per_seed])
@@ -87,11 +85,71 @@ def find_articles(self):
 
             self.urls = self.urls[:self.total_max_articles]
 
+
+class CrawlerRecursive(Crawler):
+
+    def __init__(self, seed_urls: list, total_max_articles: int, max_articles_per_seed: int):
+        super().__init__(seed_urls, total_max_articles, max_articles_per_seed)
+
+    def find_articles(self):
+        if self.get_backedup():
+            print('backed up urls found, starting iteration')
+        if not self.urls:
+            for link in self.seed_urls:
+                # wait(randint(0, 10))
+                article_bs = BeautifulSoup(requests.get(link, 'html.parser').text, 'html.parser')
+                newfound = self._extract_url(article_bs, self.urls)
+                self.urls.extend(newfound)
+                self.urls = [i for i in self.urls if len(i) > 20
+                             and not any(map(lambda y: y.isupper(), i))]
+                with open('links/url_backup.txt', 'w', encoding='utf-8') as file:
+                    file.write('\n'.join(self.urls))
+            print(f'Scraped {len(self.urls)} from seed')
+            if self.verify_proceed():
+                print('starting recursive scraping')
+                self.find_articles()
+            else:
+                print(f'recursive crawling finished with {len(self.urls)} urls.')
+        else:
+            old = len(self.urls)
+            for link in self.urls:
+                # wait(randint(0, 10))
+                article_bs = BeautifulSoup(requests.get(URL_START + link, 'html.parser').text, 'html.parser')
+                newfound = self._extract_url(article_bs, self.urls)
+                newfound = [i for i in newfound if len(i) > 20
+                            and not any(map(lambda y: y.isupper(), i))]
+                self.urls.extend(newfound)
+            with open('links/url_backup.txt', 'a', encoding='utf-8') as file:
+                file.write('\n'.join(self.urls))
+            if len(self.urls) == old:
+                print(f'there are no unseen links found\nrecursive crawling finished with {len(self.urls)} urls.')
+            else:
+                print(f'found {len(self.urls) - old} new urls')
+                if self.verify_proceed():
+                    print('starting new iteration')
+                    self.find_articles()
+                else:
+                    print(f'recursive crawling finished with {len(self.urls)} urls.')
+
+    @staticmethod
+    def verify_proceed():
+        answer = input('Would you like to proceed? yes or no: ').strip()
+        return True if answer == 'yes' else False
+
+    def get_backedup(self):
+        try:
+            with open('links/url_backup.txt', 'r', encoding='utf-8') as file:
+                sources = file.read().split('\n')
+                self.urls = sources
+                return True
+        except FileNotFoundError:
+            return False
+
     def get_search_urls(self):
         """
         Returns seed_urls param
         """
-        pass
+        return self.urls
 
 
 class ArticleParser:
@@ -174,9 +232,16 @@ def prepare_environment(base_path):
     """
     Creates ASSETS_PATH folder if not created and removes existing folder
     """
-    newpath = r'{}/ASSETS_PATH'.format(base_path)
-    if not os.path.exists(newpath):
-        os.makedirs(newpath)
+    if not os.path.exists(base_path):
+        os.makedirs(base_path)
+
+
+def enable_backup(base_path):
+    """
+    Creates folder for backup links if not created
+    """
+    if not os.path.exists(base_path):
+        os.makedirs(base_path)
 
 
 def validate_config(crawler_path):
@@ -215,12 +280,13 @@ def validate_config(crawler_path):
 
 if __name__ == '__main__':
     prepare_environment(ASSETS_PATH)
+    enable_backup(LINKS_STORAGE)
     seedurls, max_articles, max_arts_per_seed = validate_config(CRAWLER_CONFIG_PATH)
     if not max_arts_per_seed:
         max_arts_per_seed = max_articles
-    crawler = Crawler(seed_urls=seedurls,
-                      total_max_articles=max_articles,
-                      max_articles_per_seed=max_arts_per_seed)
+    crawler = CrawlerRecursive(seed_urls=seedurls,
+                               total_max_articles=max_articles,
+                               max_articles_per_seed=max_arts_per_seed)
 
     crawler.find_articles()
     # print('Scraped', len(crawler.urls), 'articles')
@@ -228,7 +294,7 @@ def validate_config(crawler_path):
     print('onto parsing')
 
     for n, url in enumerate(crawler.urls):
-        full_url = crawler.url_start + url
+        full_url = URL_START + url
         parser = ArticleParser(full_url, n + 1)
         parser.parse()
     print('parsing is finished')

From 54a49ed564588d6714170139ca0e994a6e7e70b9 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Thu, 11 Mar 2021 21:13:53 +0300
Subject: [PATCH 18/50] fixed my favorite lint

---
 constants.py |  2 +-
 scrapper.py  | 28 +++++++++++++++-------------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/constants.py b/constants.py
index 3dc98002..3a7976d0 100644
--- a/constants.py
+++ b/constants.py
@@ -8,4 +8,4 @@
 ASSETS_PATH = os.path.join(PROJECT_ROOT, 'tmp', 'articles')
 CRAWLER_CONFIG_PATH = os.path.join(PROJECT_ROOT, 'crawler_config.json')
 LINKS_STORAGE = os.path.join(PROJECT_ROOT, 'links')
-URL_START = 'https://burunen.ru'
\ No newline at end of file
+URL_START = 'https://burunen.ru'
diff --git a/scrapper.py b/scrapper.py
index a2d86e41..65676a05 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -6,10 +6,10 @@
 import json
 from datetime import date
 import requests
+from random import randint
+from time import sleep as wait
 from requests.exceptions import RequestException
 from bs4 import BeautifulSoup
-from time import sleep as wait
-from random import randint
 from article import Article
 from constants import CRAWLER_CONFIG_PATH, ASSETS_PATH, LINKS_STORAGE, URL_START
 
@@ -76,7 +76,6 @@ def find_articles(self):
                 newfound = list(filter(lambda x: len(x) > 20, self._extract_url(article_bs, self.urls)))
                 print('    checked new url, found', len(newfound), 'articles')
                 self.urls.extend(newfound[:self.max_articles_per_seed])
-                # wait(10)
                 if len(self.urls) > self.total_max_articles:
                     break
             if len(self.urls) == old:
@@ -85,18 +84,26 @@ def find_articles(self):
 
             self.urls = self.urls[:self.total_max_articles]
 
+    def get_search_urls(self):
+        """
+        Returns seed_urls param
+        """
+        return self.urls
+
 
 class CrawlerRecursive(Crawler):
 
-    def __init__(self, seed_urls: list, total_max_articles: int, max_articles_per_seed: int):
+    def __init__(self, seed_urls: list, total_max_articles: int, max_articles_per_seed: int, to_wait=False):
         super().__init__(seed_urls, total_max_articles, max_articles_per_seed)
+        self.is_waiting = to_wait
 
     def find_articles(self):
         if self.get_backedup():
             print('backed up urls found, starting iteration')
         if not self.urls:
             for link in self.seed_urls:
-                # wait(randint(0, 10))
+                if self.is_waiting:
+                    wait(randint(0, 10))
                 article_bs = BeautifulSoup(requests.get(link, 'html.parser').text, 'html.parser')
                 newfound = self._extract_url(article_bs, self.urls)
                 self.urls.extend(newfound)
@@ -113,7 +120,8 @@ def find_articles(self):
         else:
             old = len(self.urls)
             for link in self.urls:
-                # wait(randint(0, 10))
+                if self.is_waiting:
+                    wait(randint(0, 10))
                 article_bs = BeautifulSoup(requests.get(URL_START + link, 'html.parser').text, 'html.parser')
                 newfound = self._extract_url(article_bs, self.urls)
                 newfound = [i for i in newfound if len(i) > 20
@@ -134,7 +142,7 @@ def find_articles(self):
     @staticmethod
     def verify_proceed():
         answer = input('Would you like to proceed? yes or no: ').strip()
-        return True if answer == 'yes' else False
+        return answer == 'yes'
 
     def get_backedup(self):
         try:
@@ -145,12 +153,6 @@ def get_backedup(self):
         except FileNotFoundError:
             return False
 
-    def get_search_urls(self):
-        """
-        Returns seed_urls param
-        """
-        return self.urls
-
 
 class ArticleParser:
     """

From 2854e58003ec91eabff24dc44f0db41b42b9a627 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Thu, 11 Mar 2021 21:21:49 +0300
Subject: [PATCH 19/50] optimized a few things

---
 scrapper.py | 44 ++++++++++++++++++++------------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/scrapper.py b/scrapper.py
index 65676a05..eb1295f2 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -5,9 +5,9 @@
 import os
 import json
 from datetime import date
-import requests
 from random import randint
 from time import sleep as wait
+import requests
 from requests.exceptions import RequestException
 from bs4 import BeautifulSoup
 from article import Article
@@ -97,20 +97,23 @@ def __init__(self, seed_urls: list, total_max_articles: int, max_articles_per_se
         super().__init__(seed_urls, total_max_articles, max_articles_per_seed)
         self.is_waiting = to_wait
 
+    def _crawl(self, pool: list):
+        for link in pool:
+            if self.is_waiting:
+                wait(randint(0, 10))
+            article_bs = BeautifulSoup(requests.get(URL_START + link, 'html.parser').text, 'html.parser')
+            newfound = self._extract_url(article_bs, self.urls)
+            newfound = [i for i in newfound if len(i) > 20
+                        and not any(map(lambda y: y.isupper(), i))]
+            return newfound
+
     def find_articles(self):
         if self.get_backedup():
             print('backed up urls found, starting iteration')
         if not self.urls:
-            for link in self.seed_urls:
-                if self.is_waiting:
-                    wait(randint(0, 10))
-                article_bs = BeautifulSoup(requests.get(link, 'html.parser').text, 'html.parser')
-                newfound = self._extract_url(article_bs, self.urls)
-                self.urls.extend(newfound)
-                self.urls = [i for i in self.urls if len(i) > 20
-                             and not any(map(lambda y: y.isupper(), i))]
-                with open('links/url_backup.txt', 'w', encoding='utf-8') as file:
-                    file.write('\n'.join(self.urls))
+            self.urls = self._crawl(self.seed_urls)
+            with open('links/url_backup.txt', 'w', encoding='utf-8') as file:
+                file.write('\n'.join(self.urls))
             print(f'Scraped {len(self.urls)} from seed')
             if self.verify_proceed():
                 print('starting recursive scraping')
@@ -118,21 +121,14 @@ def find_articles(self):
             else:
                 print(f'recursive crawling finished with {len(self.urls)} urls.')
         else:
-            old = len(self.urls)
-            for link in self.urls:
-                if self.is_waiting:
-                    wait(randint(0, 10))
-                article_bs = BeautifulSoup(requests.get(URL_START + link, 'html.parser').text, 'html.parser')
-                newfound = self._extract_url(article_bs, self.urls)
-                newfound = [i for i in newfound if len(i) > 20
-                            and not any(map(lambda y: y.isupper(), i))]
-                self.urls.extend(newfound)
-            with open('links/url_backup.txt', 'a', encoding='utf-8') as file:
-                file.write('\n'.join(self.urls))
-            if len(self.urls) == old:
+            newfound = self._crawl(self.urls)
+            if not newfound:
                 print(f'there are no unseen links found\nrecursive crawling finished with {len(self.urls)} urls.')
             else:
-                print(f'found {len(self.urls) - old} new urls')
+                self.urls.extend(newfound)
+                with open('links/url_backup.txt', 'a', encoding='utf-8') as file:
+                    file.write('\n'.join(newfound))
+                print(f'found {len(newfound)} new urls')
                 if self.verify_proceed():
                     print('starting new iteration')
                     self.find_articles()

From 358f74ba8f1128f862e904ddf1eff20a284a256d Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Thu, 11 Mar 2021 21:55:52 +0300
Subject: [PATCH 20/50] improved text formation

---
 links/url_backup.txt | 320 -------------------------------------------
 scrapper.py          |  13 +-
 2 files changed, 8 insertions(+), 325 deletions(-)
 delete mode 100644 links/url_backup.txt

diff --git a/links/url_backup.txt b/links/url_backup.txt
deleted file mode 100644
index d7bb92f5..00000000
--- a/links/url_backup.txt
+++ /dev/null
@@ -1,320 +0,0 @@
-/news/society/80553-pionery-buryatskogo-biznesa/
-/news/society/80527-zurkhay-na-10-marta-27-lunnyy-den/
-/news/society/80543-nakanune-10-marta-v-buryatii-ushel-iz-zhizni-eks-rektor-bgskha-aleksandr-popov/
-/news/society/80575-v-buryatii-startoval-federalnyy-proekt-chistaya-voda/
-/news/society/80548-v-stolitsu-buryatii-pribyl-arsen-fadzaev/
-/news/culture/80594-polozhenie-o-konkurse-rasskaza-2021/
-/news/society/80583-vystavka-okhotnichikh-laek-proshla-v-zakamenskom-rayone-buryatii/
-/news/society/80574-v-vuzakh-buryatii-poyavyatsya-prorektory-po-tsifrovizatsii/
-/news/society/80552-poyushchaya-garga-kak-zhivyet-poyushchee-selskoe-poselenie-v-kurumkane/
-/news/society/80572-zurkhay-na-11-marta-28-lunnyy-den/
-/news/society/80555-buryatiya-voshla-v-chislo-regionov-kotorye-podderzhali-pevitsu-manizhu/
-/news/society/80581-zhivaya-legenda-volnoy-borby-rossii-pribyl-v-buryatiyu-v-svoy-den-rozhdeniya/
-/news/society/80540-v-stolitsu-buryatii-nachali-pribyvat-uchastniki-i-gosti-chempionata-rossii-po-volnoy-borbe/
-/news/society/80542-stalo-izvestno-komu-iz-zhiteley-buryatii-udastsya-vyyti-na-pensiyu-dosrochno/
-/news/society/80602-v-buryatskom-sele-vydrino-otremontiruyut-detskuyu-shkolu-iskusstv/
-/news/society/80573-preimushchestvenno-bez-osadkov-dnyem-do-5-tepla-ozhidaetsya-v-buryatii-segodnya-11-marta-/
-/news/culture/79137-pamyati-geroev-rabotniki-sudebnoy-sistemy-zabaykalya-vypustili-knigu-vospominaniy-ob-uchastnikakh-vo/
-/news/society/80558-irkutskuyu-oblast-i-buryatiyu-vozmozhno-svyazhet-noveyshaya-elektrichka/
-/news/society/80535-v-buryatii-podveli-itogi-baykalskoy-mili/
-/news/society/80565-v-zaigraevskom-rayone-buryatii-otkrylas-novaya-shkola-na-450-mest/
-/news/society/80577-na-buryatiyu-obrushitsya-anomalnoe-poteplenie/
-/news/society/80584-v-buryatii-nachalis-meropriyatiya-po-profilaktike-ledyanykh-zatorov/
-/news/culture/80559-proverit-svoe-znanie-buryatskoy-grammatiki-smogut-zhiteli-buryatii/
-/news/culture/80475-dusha-v-tantse-studenty-kolledzha-iskusstv-dali-uroki-yekhora-dlya-zhurnalistov/
-/news/culture/80511-v-buryatii-otkrylas-vystavka-eksperimentalnogo-iskusstva-ii-sulde/
-/news/culture/80515-reper-iz-buryatii-zapisal-klip-s-pevitsey-kotoraya-vystupit-na-evrovidenii/
-/news/culture/80387-smysly-zurkhaya-geshe-tsyren-lama-o-tonkostyakh-buddiyskoy-astrologii-/
-/news/culture/80388-v-ulan-ude-sostoyalas-premera-opery-knyazya-igorya/
-/news/culture/80384-v-ulan-ude-vozvrashchaetsya-festival-uu-sound-/
-/news/culture/80521-v-ulan-ude-predstavili-dokumentalnyy-film-o-buryatskom-kostyume/
-/news/culture/80390-yunye-tsirkovye-artisty-buryatii-vpervye-vystupyat-v-tyumenskom-tsirke/
-/news/culture/80416-buryatskiy-ansambl-bulzhamuur-stal-laureatom-vserossiyskogo-festivalya/
-/news/culture/80592-v-ulan-ude-proshyel-kontsert-etnicheskoy-muzyki/
-/news/culture/80560-v-ulan-ude-nachinaet-svoyu-rabotu-etnokovorking/
-/news/culture/80536-knyaz-igor-v-buryatii-otkuda-v-stepi-drevnerusskaya-grust/
-/news/culture/80509-sezon-dozhdey-dadut-kontsert-v-ulan-ude/
-/news/culture/80557-koster-na-glavnoy-ploshchadi-buryatiya-gotovitsya-k-maslenitse/
-/news/culture/80522-v-ulan-ude-proshla-vstrecha-vesny-kak-zavershenie-vostochnogo-novogo-goda/
-/news/culture/80397-muzhchina-iz-severobaykalska-poluchil-realnyy-srok-za-povtornuyu-ezdu-v-pyanom-vide/
-/news/culture/80420-v-buryatii-izdali-knigu-minii-nyutag/
-/news/culture/80389-artisty-teatra-baykal-vyydut-v-efir-radio-mayak/
-/news/economy/80551-v-buryatii-rastet-spros-na-novye-avtomobili/
-/news/economy/80400-agrarii-buryatii-vernuli-chast-zemel-merii-ulan-ude/
-/news/economy/80462-v-buryatii-otremontiruyut-dorogu-v-posele/
-/news/economy/80447-biznes-buryatii-poluchit-vozmozhnosti-opravitsya-ot-posledstviy-pandemii/
-/news/economy/80549-prognoznyy-poleznyy-otpusk-elektroenergii-i-moshchnosti-po-tarifnym-gruppam/
-/news/economy/80427-v-buryatii-proydet-zasedanie-soveta-direktorov-kholdinga-vertolyety-rossii/
-/news/economy/80506-v-buryatii-otremontiruyut-odnu-iz-samykh-populyarnykh-avtodorog/
-/news/economy/80429-buryatiya-poluchit-okolo-3-milliardov-rubley-na-dorogi/
-/news/economy/80415-v-ulan-ude-startoval-akselerator-proektov-upravlentsev-buryatii/
-/news/economy/80432-buryatiya-poluchit-bolee-28-mln-rubley-na-podderzhku-proektov-sotsialno-orientirovannykh-organizatsi/
-/news/economy/80413-usloviya-dalnevostochnaya-ipoteka-namereny-uluchshit-v-buryatii-i-po-vsemu-dfo/
-/news/economy/80340-tunkinskiy-rayon-buryatii-poluchit-sredstva-na-razvitie-turizma/
-/news/economy/80435-ivan-alkheev-naznachen-zampredom-pravitelstva-buryatii/
-/news/economy/80433-na-ulan-udenskom-aviazavode-vpervye-proshlo-zasedanie-soveta-direktorov-vertolety-rossii/
-/news/economy/80317-bolee-2-tysyach-zhiteley-buryatii-prinyali-uchastie-v-biznes-vstreche-s-alekseem-tsydenovym/
-/news/economy/80478-produktsiya-iz-buryatii-vyshla-na-rynok-germanii/
-/news/economy/80412-buryatiya-priobretet-paket-aktsiy-avrora/
-/news/economy/80488-v-buryatii-razdeli-mestorozhdenie-urana/
-/news/economy/80344-pereezd-na-dalniy-vostok-pozvolit-buryatii-reanimirovat-dva-krupnykh-proekta/
-/news/economy/80491-buryatiya-voshla-v-zonu-modernizatsii-zheleznykh-dorog/
-/news/sports/80463-vdokhnovlyayushchie-rezultaty-novye-vzlyety-khudozhestvennoy-gimnastiki-buryatii/
-/news/sports/80600-v-kabanskom-rayone-buryatii-otkryli-sportivnuyu-ploshchadku/
-/news/sports/80546-studentki-iz-buryatii-na-pedestale-rossiyskogo-urovnya/
-/news/sports/80331-boksery-iz-buryatii-zavoevali-shest-medaley-na-vserossiyskom-turnire-klassa-a/
-/news/sports/80520-final-kubka-buryatii-ne-doigrali-no-pobeditelya-opredelili/
-/news/sports/80578-na-chempionate-rossii-v-ulan-ude-103-bortsa-opredelyat-pervykh-finalistov-obnovlyaetsya/
-/news/sports/80477-borets-iz-buryatii-rasskazal-kak-borolsya-za-ameriku/
-/news/sports/80351-boytsy-federatsii-pankrationa-buryatii-vystupili-na-urovne-dfo/
-/news/sports/80601-ministr-sporta-buryatii-chempionat-rossii-dast-novyy-impuls-razvitiyu-volnoy-borby-v-respublike/
-/news/sports/80507-buryatiya-utverdila-okonchatelnyy-sostav-sbornoy-dlya-uchastiya-v-chempionate-rossii-po-volnoy-borbe/
-/news/sports/80494-borits-iz-buryatii-zakryli-na-karantin-v-rime-/
-/news/sports/80472-na-lyzhakh-s-lukom-buryatskaya-sportsmenka-vyigrala-chetyre-medali-na-pervenstve-rossii/
-/news/sports/80547-beskompromissnye-igry-buryatskikh-shakhmatistov-na-dalnem-vostoke/
-/news/sports/80354-pervyy-bortsovskiy-internat-buryatii-vzyal-na-vooruzhenie-igru-go/
-/news/sports/80568-v-stolitse-buryatii-proshla-pervaya-zherebevka-chempionata-rossii-po-volnoy-borbe/
-/news/sports/80525-glavnyy-favorit-gryadushchego-chempionata-rossii-v-buryatii-ozvuchil-svoy-sostav/
-/news/sports/80470-v-buryatii-gryadet-bitva-titanov/
-/news/sports/80452-sportsmeny-iz-eravninskogo-rayona-buryatii-oderzhali-dve-pobedy/
-/news/sports/80345-znamenityy-rossiyskiy-futbolist-vyigral-baykalskiy-marafon-v-buryatii/
-/news/incidents/80181-v-buryatii-stali-bolshe-pit/
-/news/incidents/80467-v-buryatii-vyyavili-pyat-narusheniy-protivoepidemicheskikh-mer/
-/news/incidents/80408-byvshemu-rukovoditelyu-energeticheskoy-kompanii-iz-buryatii-inkriminiruyut-sozdanie-opg/
-/news/incidents/80517-kommunisty-buryatii-obvinili-organizatorov-baykalskoy-mili/
-/news/incidents/80453-v-kabanskom-rayone-buryatii-zhiteli-zamerzayut-v-svoikh-domakh-/
-/news/incidents/80461-v-bichure-podtopilo-pushkina-/
-/news/incidents/80226-v-buryatii-vynesli-verdikt-po-delu-o-napadenii-sobak-na-pervoklassnika/
-/news/incidents/80471-v-belorussii-izbili-futbolnogo-trenera-iz-buryatii/
-/news/incidents/80530-delo-o-zaderzhanii-brakonerov-v-buryatii-prokommentiroval-skr-/
-/news/incidents/80411-na-baykale-zamorozili-demontazh-zavoda-po-rozlivu-baykalskoy-vody/
-/news/incidents/80399-vrachi-rasskazali-o-sostoyanii-rebenka-ranennogo-v-buryatii/
-/news/incidents/80545-v-buryatii-pri-pozhare-v-zhilom-dome-spaslis-mat-i-dvoe-detey/
-/news/incidents/80582-v-kurumkanskom-rayone-buryatii-zaderzhan-pokhititel-myasa/
-/news/incidents/80349-musornaya-reforma-v-buryatii-stala-povodom-dlya-politicheskikh-sporov/
-/news/incidents/80380-v-buryatii-snova-otlichilsya-voditel-leksusa/
-/news/incidents/80422-sledstvennyy-komitet-po-buryatii-dal-kommentariy-po-povodu-zaderzhaniya-sergeya-ivanova/
-/news/incidents/80191-intsident-na-baykale-pod-kontrolem-pravitelstva-buryatii/
-/news/incidents/80326-v-buryatii-obyasnili-prichiny-tramvaynogo-kollapsa-v-ulan-ude/
-/news/incidents/80529-materialnyy-ushcherb-po-faktu-gibeli-podrostkov-v-buryatii-vzyshchut-s-vladeltsa-avto/
-/news/incidents/80492-v-buryatii-obvinyayut-inspektora-kotoryy-zaderzhal-vliyatelnykh-brakonerov-/
-/news/politics/80591-ministr-sporta-buryatii-otmetil-naplyv-imenitykh-sportsmenov-v-dni-chempionata-rossii-po-volnoy-borb/
-/news/politics/80588-glava-buryatii-prinyal-uchastie-v-soveshchanii-generalnogo-direktora-oao-rzhd//news/society/80553-pionery-buryatskogo-biznesa/
-/news/society/80527-zurkhay-na-10-marta-27-lunnyy-den/
-/news/society/80543-nakanune-10-marta-v-buryatii-ushel-iz-zhizni-eks-rektor-bgskha-aleksandr-popov/
-/news/society/80575-v-buryatii-startoval-federalnyy-proekt-chistaya-voda/
-/news/society/80548-v-stolitsu-buryatii-pribyl-arsen-fadzaev/
-/news/culture/80594-polozhenie-o-konkurse-rasskaza-2021/
-/news/society/80583-vystavka-okhotnichikh-laek-proshla-v-zakamenskom-rayone-buryatii/
-/news/society/80574-v-vuzakh-buryatii-poyavyatsya-prorektory-po-tsifrovizatsii/
-/news/society/80552-poyushchaya-garga-kak-zhivyet-poyushchee-selskoe-poselenie-v-kurumkane/
-/news/society/80572-zurkhay-na-11-marta-28-lunnyy-den/
-/news/society/80555-buryatiya-voshla-v-chislo-regionov-kotorye-podderzhali-pevitsu-manizhu/
-/news/society/80581-zhivaya-legenda-volnoy-borby-rossii-pribyl-v-buryatiyu-v-svoy-den-rozhdeniya/
-/news/society/80540-v-stolitsu-buryatii-nachali-pribyvat-uchastniki-i-gosti-chempionata-rossii-po-volnoy-borbe/
-/news/society/80542-stalo-izvestno-komu-iz-zhiteley-buryatii-udastsya-vyyti-na-pensiyu-dosrochno/
-/news/society/80602-v-buryatskom-sele-vydrino-otremontiruyut-detskuyu-shkolu-iskusstv/
-/news/society/80573-preimushchestvenno-bez-osadkov-dnyem-do-5-tepla-ozhidaetsya-v-buryatii-segodnya-11-marta-/
-/news/culture/79137-pamyati-geroev-rabotniki-sudebnoy-sistemy-zabaykalya-vypustili-knigu-vospominaniy-ob-uchastnikakh-vo/
-/news/society/80558-irkutskuyu-oblast-i-buryatiyu-vozmozhno-svyazhet-noveyshaya-elektrichka/
-/news/society/80535-v-buryatii-podveli-itogi-baykalskoy-mili/
-/news/society/80565-v-zaigraevskom-rayone-buryatii-otkrylas-novaya-shkola-na-450-mest/
-/news/society/80577-na-buryatiyu-obrushitsya-anomalnoe-poteplenie/
-/news/society/80584-v-buryatii-nachalis-meropriyatiya-po-profilaktike-ledyanykh-zatorov/
-/news/culture/80559-proverit-svoe-znanie-buryatskoy-grammatiki-smogut-zhiteli-buryatii/
-/news/culture/80475-dusha-v-tantse-studenty-kolledzha-iskusstv-dali-uroki-yekhora-dlya-zhurnalistov/
-/news/culture/80511-v-buryatii-otkrylas-vystavka-eksperimentalnogo-iskusstva-ii-sulde/
-/news/culture/80515-reper-iz-buryatii-zapisal-klip-s-pevitsey-kotoraya-vystupit-na-evrovidenii/
-/news/culture/80387-smysly-zurkhaya-geshe-tsyren-lama-o-tonkostyakh-buddiyskoy-astrologii-/
-/news/culture/80388-v-ulan-ude-sostoyalas-premera-opery-knyazya-igorya/
-/news/culture/80384-v-ulan-ude-vozvrashchaetsya-festival-uu-sound-/
-/news/culture/80521-v-ulan-ude-predstavili-dokumentalnyy-film-o-buryatskom-kostyume/
-/news/culture/80390-yunye-tsirkovye-artisty-buryatii-vpervye-vystupyat-v-tyumenskom-tsirke/
-/news/culture/80416-buryatskiy-ansambl-bulzhamuur-stal-laureatom-vserossiyskogo-festivalya/
-/news/culture/80592-v-ulan-ude-proshyel-kontsert-etnicheskoy-muzyki/
-/news/culture/80560-v-ulan-ude-nachinaet-svoyu-rabotu-etnokovorking/
-/news/culture/80536-knyaz-igor-v-buryatii-otkuda-v-stepi-drevnerusskaya-grust/
-/news/culture/80509-sezon-dozhdey-dadut-kontsert-v-ulan-ude/
-/news/culture/80557-koster-na-glavnoy-ploshchadi-buryatiya-gotovitsya-k-maslenitse/
-/news/culture/80522-v-ulan-ude-proshla-vstrecha-vesny-kak-zavershenie-vostochnogo-novogo-goda/
-/news/culture/80397-muzhchina-iz-severobaykalska-poluchil-realnyy-srok-za-povtornuyu-ezdu-v-pyanom-vide/
-/news/culture/80420-v-buryatii-izdali-knigu-minii-nyutag/
-/news/culture/80389-artisty-teatra-baykal-vyydut-v-efir-radio-mayak/
-/news/economy/80551-v-buryatii-rastet-spros-na-novye-avtomobili/
-/news/economy/80400-agrarii-buryatii-vernuli-chast-zemel-merii-ulan-ude/
-/news/economy/80462-v-buryatii-otremontiruyut-dorogu-v-posele/
-/news/economy/80447-biznes-buryatii-poluchit-vozmozhnosti-opravitsya-ot-posledstviy-pandemii/
-/news/economy/80549-prognoznyy-poleznyy-otpusk-elektroenergii-i-moshchnosti-po-tarifnym-gruppam/
-/news/economy/80427-v-buryatii-proydet-zasedanie-soveta-direktorov-kholdinga-vertolyety-rossii/
-/news/economy/80506-v-buryatii-otremontiruyut-odnu-iz-samykh-populyarnykh-avtodorog/
-/news/economy/80429-buryatiya-poluchit-okolo-3-milliardov-rubley-na-dorogi/
-/news/economy/80415-v-ulan-ude-startoval-akselerator-proektov-upravlentsev-buryatii/
-/news/economy/80432-buryatiya-poluchit-bolee-28-mln-rubley-na-podderzhku-proektov-sotsialno-orientirovannykh-organizatsi/
-/news/economy/80413-usloviya-dalnevostochnaya-ipoteka-namereny-uluchshit-v-buryatii-i-po-vsemu-dfo/
-/news/economy/80340-tunkinskiy-rayon-buryatii-poluchit-sredstva-na-razvitie-turizma/
-/news/economy/80435-ivan-alkheev-naznachen-zampredom-pravitelstva-buryatii/
-/news/economy/80433-na-ulan-udenskom-aviazavode-vpervye-proshlo-zasedanie-soveta-direktorov-vertolety-rossii/
-/news/economy/80317-bolee-2-tysyach-zhiteley-buryatii-prinyali-uchastie-v-biznes-vstreche-s-alekseem-tsydenovym/
-/news/economy/80478-produktsiya-iz-buryatii-vyshla-na-rynok-germanii/
-/news/economy/80412-buryatiya-priobretet-paket-aktsiy-avrora/
-/news/economy/80488-v-buryatii-razdeli-mestorozhdenie-urana/
-/news/economy/80344-pereezd-na-dalniy-vostok-pozvolit-buryatii-reanimirovat-dva-krupnykh-proekta/
-/news/economy/80491-buryatiya-voshla-v-zonu-modernizatsii-zheleznykh-dorog/
-/news/sports/80463-vdokhnovlyayushchie-rezultaty-novye-vzlyety-khudozhestvennoy-gimnastiki-buryatii/
-/news/sports/80600-v-kabanskom-rayone-buryatii-otkryli-sportivnuyu-ploshchadku/
-/news/sports/80546-studentki-iz-buryatii-na-pedestale-rossiyskogo-urovnya/
-/news/sports/80331-boksery-iz-buryatii-zavoevali-shest-medaley-na-vserossiyskom-turnire-klassa-a/
-/news/sports/80520-final-kubka-buryatii-ne-doigrali-no-pobeditelya-opredelili/
-/news/sports/80578-na-chempionate-rossii-v-ulan-ude-103-bortsa-opredelyat-pervykh-finalistov-obnovlyaetsya/
-/news/sports/80477-borets-iz-buryatii-rasskazal-kak-borolsya-za-ameriku/
-/news/sports/80351-boytsy-federatsii-pankrationa-buryatii-vystupili-na-urovne-dfo/
-/news/sports/80601-ministr-sporta-buryatii-chempionat-rossii-dast-novyy-impuls-razvitiyu-volnoy-borby-v-respublike/
-/news/sports/80507-buryatiya-utverdila-okonchatelnyy-sostav-sbornoy-dlya-uchastiya-v-chempionate-rossii-po-volnoy-borbe/
-/news/sports/80494-borits-iz-buryatii-zakryli-na-karantin-v-rime-/
-/news/sports/80472-na-lyzhakh-s-lukom-buryatskaya-sportsmenka-vyigrala-chetyre-medali-na-pervenstve-rossii/
-/news/sports/80547-beskompromissnye-igry-buryatskikh-shakhmatistov-na-dalnem-vostoke/
-/news/sports/80354-pervyy-bortsovskiy-internat-buryatii-vzyal-na-vooruzhenie-igru-go/
-/news/sports/80568-v-stolitse-buryatii-proshla-pervaya-zherebevka-chempionata-rossii-po-volnoy-borbe/
-/news/sports/80525-glavnyy-favorit-gryadushchego-chempionata-rossii-v-buryatii-ozvuchil-svoy-sostav/
-/news/sports/80470-v-buryatii-gryadet-bitva-titanov/
-/news/sports/80452-sportsmeny-iz-eravninskogo-rayona-buryatii-oderzhali-dve-pobedy/
-/news/sports/80345-znamenityy-rossiyskiy-futbolist-vyigral-baykalskiy-marafon-v-buryatii/
-/news/incidents/80181-v-buryatii-stali-bolshe-pit/
-/news/incidents/80467-v-buryatii-vyyavili-pyat-narusheniy-protivoepidemicheskikh-mer/
-/news/incidents/80408-byvshemu-rukovoditelyu-energeticheskoy-kompanii-iz-buryatii-inkriminiruyut-sozdanie-opg/
-/news/incidents/80517-kommunisty-buryatii-obvinili-organizatorov-baykalskoy-mili/
-/news/incidents/80453-v-kabanskom-rayone-buryatii-zhiteli-zamerzayut-v-svoikh-domakh-/
-/news/incidents/80461-v-bichure-podtopilo-pushkina-/
-/news/incidents/80226-v-buryatii-vynesli-verdikt-po-delu-o-napadenii-sobak-na-pervoklassnika/
-/news/incidents/80471-v-belorussii-izbili-futbolnogo-trenera-iz-buryatii/
-/news/incidents/80530-delo-o-zaderzhanii-brakonerov-v-buryatii-prokommentiroval-skr-/
-/news/incidents/80411-na-baykale-zamorozili-demontazh-zavoda-po-rozlivu-baykalskoy-vody/
-/news/incidents/80399-vrachi-rasskazali-o-sostoyanii-rebenka-ranennogo-v-buryatii/
-/news/incidents/80545-v-buryatii-pri-pozhare-v-zhilom-dome-spaslis-mat-i-dvoe-detey/
-/news/incidents/80582-v-kurumkanskom-rayone-buryatii-zaderzhan-pokhititel-myasa/
-/news/incidents/80349-musornaya-reforma-v-buryatii-stala-povodom-dlya-politicheskikh-sporov/
-/news/incidents/80380-v-buryatii-snova-otlichilsya-voditel-leksusa/
-/news/incidents/80422-sledstvennyy-komitet-po-buryatii-dal-kommentariy-po-povodu-zaderzhaniya-sergeya-ivanova/
-/news/incidents/80191-intsident-na-baykale-pod-kontrolem-pravitelstva-buryatii/
-/news/incidents/80326-v-buryatii-obyasnili-prichiny-tramvaynogo-kollapsa-v-ulan-ude/
-/news/incidents/80529-materialnyy-ushcherb-po-faktu-gibeli-podrostkov-v-buryatii-vzyshchut-s-vladeltsa-avto/
-/news/incidents/80492-v-buryatii-obvinyayut-inspektora-kotoryy-zaderzhal-vliyatelnykh-brakonerov-/
-/news/politics/80591-ministr-sporta-buryatii-otmetil-naplyv-imenitykh-sportsmenov-v-dni-chempionata-rossii-po-volnoy-borb/
-/news/politics/80588-glava-buryatii-prinyal-uchastie-v-soveshchanii-generalnogo-direktora-oao-rzhd/
-/news/politics/80590-glava-buryatii-vyekhal-s-rabochey-poezdkoy-v-mukhorshibirskiy-rayon/
-/news/politics/80498-aleksey-tsydenov-pozdravil-vsekh-zhenshchin-buryatii-s-8-marta-/
-/news/politics/80538-delo-o-zaderzhanii-brakonerov-v-buryatii-poruchil-vzyat-pod-kontrol-yuriy-trutnev/
-/news/politics/80480-glava-buryatii-vmeste-s-poslom-izrailya-v-rossii-pochtili-pamyat-geroev-vov/
-/news/politics/80482-glava-buryatii-predlozhil-poslu-izrailya-sotrudnichestvo-v-sfere-turizma-i-meditsiny/
-/news/politics/80436-glava-buryatii-i-prezident-tatarstana-obsudili-sotrudnichestvo-mezhdu-regionami/
-/news/politics/80456-glavnyy-kommunist-buryatii-ne-voshyel-v-short-list-ot-kprf/
-/news/politics/80508-glava-buryatii-nagradil-laureatov-gosudarstvennykh-premiy-respubliki-v-sfere-kultury-i-iskusstva//news/society/80553-pionery-buryatskogo-biznesa/
-/news/society/80527-zurkhay-na-10-marta-27-lunnyy-den/
-/news/society/80543-nakanune-10-marta-v-buryatii-ushel-iz-zhizni-eks-rektor-bgskha-aleksandr-popov/
-/news/society/80575-v-buryatii-startoval-federalnyy-proekt-chistaya-voda/
-/news/society/80548-v-stolitsu-buryatii-pribyl-arsen-fadzaev/
-/news/culture/80594-polozhenie-o-konkurse-rasskaza-2021/
-/news/society/80583-vystavka-okhotnichikh-laek-proshla-v-zakamenskom-rayone-buryatii/
-/news/society/80574-v-vuzakh-buryatii-poyavyatsya-prorektory-po-tsifrovizatsii/
-/news/society/80552-poyushchaya-garga-kak-zhivyet-poyushchee-selskoe-poselenie-v-kurumkane/
-/news/society/80572-zurkhay-na-11-marta-28-lunnyy-den/
-/news/society/80555-buryatiya-voshla-v-chislo-regionov-kotorye-podderzhali-pevitsu-manizhu/
-/news/society/80581-zhivaya-legenda-volnoy-borby-rossii-pribyl-v-buryatiyu-v-svoy-den-rozhdeniya/
-/news/society/80540-v-stolitsu-buryatii-nachali-pribyvat-uchastniki-i-gosti-chempionata-rossii-po-volnoy-borbe/
-/news/society/80542-stalo-izvestno-komu-iz-zhiteley-buryatii-udastsya-vyyti-na-pensiyu-dosrochno/
-/news/society/80602-v-buryatskom-sele-vydrino-otremontiruyut-detskuyu-shkolu-iskusstv/
-/news/society/80573-preimushchestvenno-bez-osadkov-dnyem-do-5-tepla-ozhidaetsya-v-buryatii-segodnya-11-marta-/
-/news/culture/79137-pamyati-geroev-rabotniki-sudebnoy-sistemy-zabaykalya-vypustili-knigu-vospominaniy-ob-uchastnikakh-vo/
-/news/society/80558-irkutskuyu-oblast-i-buryatiyu-vozmozhno-svyazhet-noveyshaya-elektrichka/
-/news/society/80535-v-buryatii-podveli-itogi-baykalskoy-mili/
-/news/society/80565-v-zaigraevskom-rayone-buryatii-otkrylas-novaya-shkola-na-450-mest/
-/news/society/80577-na-buryatiyu-obrushitsya-anomalnoe-poteplenie/
-/news/society/80584-v-buryatii-nachalis-meropriyatiya-po-profilaktike-ledyanykh-zatorov/
-/news/culture/80559-proverit-svoe-znanie-buryatskoy-grammatiki-smogut-zhiteli-buryatii/
-/news/culture/80475-dusha-v-tantse-studenty-kolledzha-iskusstv-dali-uroki-yekhora-dlya-zhurnalistov/
-/news/culture/80511-v-buryatii-otkrylas-vystavka-eksperimentalnogo-iskusstva-ii-sulde/
-/news/culture/80515-reper-iz-buryatii-zapisal-klip-s-pevitsey-kotoraya-vystupit-na-evrovidenii/
-/news/culture/80387-smysly-zurkhaya-geshe-tsyren-lama-o-tonkostyakh-buddiyskoy-astrologii-/
-/news/culture/80388-v-ulan-ude-sostoyalas-premera-opery-knyazya-igorya/
-/news/culture/80384-v-ulan-ude-vozvrashchaetsya-festival-uu-sound-/
-/news/culture/80521-v-ulan-ude-predstavili-dokumentalnyy-film-o-buryatskom-kostyume/
-/news/culture/80390-yunye-tsirkovye-artisty-buryatii-vpervye-vystupyat-v-tyumenskom-tsirke/
-/news/culture/80416-buryatskiy-ansambl-bulzhamuur-stal-laureatom-vserossiyskogo-festivalya/
-/news/culture/80592-v-ulan-ude-proshyel-kontsert-etnicheskoy-muzyki/
-/news/culture/80560-v-ulan-ude-nachinaet-svoyu-rabotu-etnokovorking/
-/news/culture/80536-knyaz-igor-v-buryatii-otkuda-v-stepi-drevnerusskaya-grust/
-/news/culture/80509-sezon-dozhdey-dadut-kontsert-v-ulan-ude/
-/news/culture/80557-koster-na-glavnoy-ploshchadi-buryatiya-gotovitsya-k-maslenitse/
-/news/culture/80522-v-ulan-ude-proshla-vstrecha-vesny-kak-zavershenie-vostochnogo-novogo-goda/
-/news/culture/80397-muzhchina-iz-severobaykalska-poluchil-realnyy-srok-za-povtornuyu-ezdu-v-pyanom-vide/
-/news/culture/80420-v-buryatii-izdali-knigu-minii-nyutag/
-/news/culture/80389-artisty-teatra-baykal-vyydut-v-efir-radio-mayak/
-/news/economy/80551-v-buryatii-rastet-spros-na-novye-avtomobili/
-/news/economy/80400-agrarii-buryatii-vernuli-chast-zemel-merii-ulan-ude/
-/news/economy/80462-v-buryatii-otremontiruyut-dorogu-v-posele/
-/news/economy/80447-biznes-buryatii-poluchit-vozmozhnosti-opravitsya-ot-posledstviy-pandemii/
-/news/economy/80549-prognoznyy-poleznyy-otpusk-elektroenergii-i-moshchnosti-po-tarifnym-gruppam/
-/news/economy/80427-v-buryatii-proydet-zasedanie-soveta-direktorov-kholdinga-vertolyety-rossii/
-/news/economy/80506-v-buryatii-otremontiruyut-odnu-iz-samykh-populyarnykh-avtodorog/
-/news/economy/80429-buryatiya-poluchit-okolo-3-milliardov-rubley-na-dorogi/
-/news/economy/80415-v-ulan-ude-startoval-akselerator-proektov-upravlentsev-buryatii/
-/news/economy/80432-buryatiya-poluchit-bolee-28-mln-rubley-na-podderzhku-proektov-sotsialno-orientirovannykh-organizatsi/
-/news/economy/80413-usloviya-dalnevostochnaya-ipoteka-namereny-uluchshit-v-buryatii-i-po-vsemu-dfo/
-/news/economy/80340-tunkinskiy-rayon-buryatii-poluchit-sredstva-na-razvitie-turizma/
-/news/economy/80435-ivan-alkheev-naznachen-zampredom-pravitelstva-buryatii/
-/news/economy/80433-na-ulan-udenskom-aviazavode-vpervye-proshlo-zasedanie-soveta-direktorov-vertolety-rossii/
-/news/economy/80317-bolee-2-tysyach-zhiteley-buryatii-prinyali-uchastie-v-biznes-vstreche-s-alekseem-tsydenovym/
-/news/economy/80478-produktsiya-iz-buryatii-vyshla-na-rynok-germanii/
-/news/economy/80412-buryatiya-priobretet-paket-aktsiy-avrora/
-/news/economy/80488-v-buryatii-razdeli-mestorozhdenie-urana/
-/news/economy/80344-pereezd-na-dalniy-vostok-pozvolit-buryatii-reanimirovat-dva-krupnykh-proekta/
-/news/economy/80491-buryatiya-voshla-v-zonu-modernizatsii-zheleznykh-dorog/
-/news/sports/80463-vdokhnovlyayushchie-rezultaty-novye-vzlyety-khudozhestvennoy-gimnastiki-buryatii/
-/news/sports/80600-v-kabanskom-rayone-buryatii-otkryli-sportivnuyu-ploshchadku/
-/news/sports/80546-studentki-iz-buryatii-na-pedestale-rossiyskogo-urovnya/
-/news/sports/80331-boksery-iz-buryatii-zavoevali-shest-medaley-na-vserossiyskom-turnire-klassa-a/
-/news/sports/80520-final-kubka-buryatii-ne-doigrali-no-pobeditelya-opredelili/
-/news/sports/80578-na-chempionate-rossii-v-ulan-ude-103-bortsa-opredelyat-pervykh-finalistov-obnovlyaetsya/
-/news/sports/80477-borets-iz-buryatii-rasskazal-kak-borolsya-za-ameriku/
-/news/sports/80351-boytsy-federatsii-pankrationa-buryatii-vystupili-na-urovne-dfo/
-/news/sports/80601-ministr-sporta-buryatii-chempionat-rossii-dast-novyy-impuls-razvitiyu-volnoy-borby-v-respublike/
-/news/sports/80507-buryatiya-utverdila-okonchatelnyy-sostav-sbornoy-dlya-uchastiya-v-chempionate-rossii-po-volnoy-borbe/
-/news/sports/80494-borits-iz-buryatii-zakryli-na-karantin-v-rime-/
-/news/sports/80472-na-lyzhakh-s-lukom-buryatskaya-sportsmenka-vyigrala-chetyre-medali-na-pervenstve-rossii/
-/news/sports/80547-beskompromissnye-igry-buryatskikh-shakhmatistov-na-dalnem-vostoke/
-/news/sports/80354-pervyy-bortsovskiy-internat-buryatii-vzyal-na-vooruzhenie-igru-go/
-/news/sports/80568-v-stolitse-buryatii-proshla-pervaya-zherebevka-chempionata-rossii-po-volnoy-borbe/
-/news/sports/80525-glavnyy-favorit-gryadushchego-chempionata-rossii-v-buryatii-ozvuchil-svoy-sostav/
-/news/sports/80470-v-buryatii-gryadet-bitva-titanov/
-/news/sports/80452-sportsmeny-iz-eravninskogo-rayona-buryatii-oderzhali-dve-pobedy/
-/news/sports/80345-znamenityy-rossiyskiy-futbolist-vyigral-baykalskiy-marafon-v-buryatii/
-/news/incidents/80181-v-buryatii-stali-bolshe-pit/
-/news/incidents/80467-v-buryatii-vyyavili-pyat-narusheniy-protivoepidemicheskikh-mer/
-/news/incidents/80408-byvshemu-rukovoditelyu-energeticheskoy-kompanii-iz-buryatii-inkriminiruyut-sozdanie-opg/
-/news/incidents/80517-kommunisty-buryatii-obvinili-organizatorov-baykalskoy-mili/
-/news/incidents/80453-v-kabanskom-rayone-buryatii-zhiteli-zamerzayut-v-svoikh-domakh-/
-/news/incidents/80461-v-bichure-podtopilo-pushkina-/
-/news/incidents/80226-v-buryatii-vynesli-verdikt-po-delu-o-napadenii-sobak-na-pervoklassnika/
-/news/incidents/80471-v-belorussii-izbili-futbolnogo-trenera-iz-buryatii/
-/news/incidents/80530-delo-o-zaderzhanii-brakonerov-v-buryatii-prokommentiroval-skr-/
-/news/incidents/80411-na-baykale-zamorozili-demontazh-zavoda-po-rozlivu-baykalskoy-vody/
-/news/incidents/80399-vrachi-rasskazali-o-sostoyanii-rebenka-ranennogo-v-buryatii/
-/news/incidents/80545-v-buryatii-pri-pozhare-v-zhilom-dome-spaslis-mat-i-dvoe-detey/
-/news/incidents/80582-v-kurumkanskom-rayone-buryatii-zaderzhan-pokhititel-myasa/
-/news/incidents/80349-musornaya-reforma-v-buryatii-stala-povodom-dlya-politicheskikh-sporov/
-/news/incidents/80380-v-buryatii-snova-otlichilsya-voditel-leksusa/
-/news/incidents/80422-sledstvennyy-komitet-po-buryatii-dal-kommentariy-po-povodu-zaderzhaniya-sergeya-ivanova/
-/news/incidents/80191-intsident-na-baykale-pod-kontrolem-pravitelstva-buryatii/
-/news/incidents/80326-v-buryatii-obyasnili-prichiny-tramvaynogo-kollapsa-v-ulan-ude/
-/news/incidents/80529-materialnyy-ushcherb-po-faktu-gibeli-podrostkov-v-buryatii-vzyshchut-s-vladeltsa-avto/
-/news/incidents/80492-v-buryatii-obvinyayut-inspektora-kotoryy-zaderzhal-vliyatelnykh-brakonerov-/
-/news/politics/80591-ministr-sporta-buryatii-otmetil-naplyv-imenitykh-sportsmenov-v-dni-chempionata-rossii-po-volnoy-borb/
-/news/politics/80588-glava-buryatii-prinyal-uchastie-v-soveshchanii-generalnogo-direktora-oao-rzhd/
-/news/politics/80590-glava-buryatii-vyekhal-s-rabochey-poezdkoy-v-mukhorshibirskiy-rayon/
-/news/politics/80498-aleksey-tsydenov-pozdravil-vsekh-zhenshchin-buryatii-s-8-marta-/
-/news/politics/80538-delo-o-zaderzhanii-brakonerov-v-buryatii-poruchil-vzyat-pod-kontrol-yuriy-trutnev/
-/news/politics/80480-glava-buryatii-vmeste-s-poslom-izrailya-v-rossii-pochtili-pamyat-geroev-vov/
-/news/politics/80482-glava-buryatii-predlozhil-poslu-izrailya-sotrudnichestvo-v-sfere-turizma-i-meditsiny/
-/news/politics/80436-glava-buryatii-i-prezident-tatarstana-obsudili-sotrudnichestvo-mezhdu-regionami/
-/news/politics/80456-glavnyy-kommunist-buryatii-ne-voshyel-v-short-list-ot-kprf/
-/news/politics/80508-glava-buryatii-nagradil-laureatov-gosudarstvennykh-premiy-respubliki-v-sfere-kultury-i-iskusstva/
\ No newline at end of file
diff --git a/scrapper.py b/scrapper.py
index eb1295f2..bf64ac64 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -51,8 +51,6 @@ def __init__(self, seed_urls: list, total_max_articles: int, max_articles_per_se
     @staticmethod
     def _extract_url(article_bs, seen):
         extracted = list({link['href'] for link in article_bs.find_all('a', href=True)})
-        # print(extracted)
-        # print('          ',extracted)
         return list(filter(lambda x: x.startswith('/news/')
                            and x not in seen, extracted))
 
@@ -101,7 +99,11 @@ def _crawl(self, pool: list):
         for link in pool:
             if self.is_waiting:
                 wait(randint(0, 10))
-            article_bs = BeautifulSoup(requests.get(URL_START + link, 'html.parser').text, 'html.parser')
+            try:
+                article_bs = BeautifulSoup(requests.get(URL_START + link, 'html.parser').text, 'html.parser')
+            except requests.exceptions.ConnectionError:
+                wait(10)
+                article_bs = BeautifulSoup(requests.get(URL_START + link, 'html.parser').text, 'html.parser')
             newfound = self._extract_url(article_bs, self.urls)
             newfound = [i for i in newfound if len(i) > 20
                         and not any(map(lambda y: y.isupper(), i))]
@@ -162,7 +164,9 @@ def __init__(self, full__url: str, article_id: int):
     def _fill_article_with_text(self, article_soup):
         try:
             text = article_soup.find('div', {'class': 'text letter', 'itemprop': 'articleBody'}).text.strip()
-            self.article.text = text
+            text = [i for i in text.split('\n') if 'Фото:' not in i and 'Автор:' not in i
+                    and '© фото:' not in i and 'Источник:' not in i]
+            self.article.text = '\n'.join(text).strip()
         except AttributeError:
             print('    unable to parse', self.full_url)
 
@@ -216,7 +220,6 @@ def parse(self):
         """
         Parses each article
         """
-        # print(self.full_url)
         self.article.url = self.full_url
         self.article.article_id = self.article_id
         html = requests.get(self.full_url, 'html.parser').text

From f7aa7f6e18c57acf17958b1e04091618c8f67aa8 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Thu, 11 Mar 2021 22:05:57 +0300
Subject: [PATCH 21/50] uhm

---
 scrapper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scrapper.py b/scrapper.py
index bf64ac64..062b493e 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -182,7 +182,7 @@ def _fill_article_with_meta_information(self, article_soup):
                 author = article_soup.find('div', {'class': 'credits t-caption'}).text.strip()
                 author = author.split('\n')[0][9:].strip()
             else:
-                author = ''
+                author = 'Not found'
             self.article.author = author
             when = article_soup.find('div', {'class': 'b-caption'}).text.strip().split('\n')[1]
             self.article.date = self.unify_date_format(when)

From e75381c3c0e1d8aab8dc5b6fc3eb5c7d06f50b5d Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Sat, 13 Mar 2021 11:51:05 +0300
Subject: [PATCH 22/50] fixed some problems from review

---
 config/constants.py                    |  10 --
 config/raw_metadata_score_four_test.py |   1 -
 constants.py                           |   6 +-
 links/links.txt                        |   0
 scrapper.py                            | 162 +++++++++++--------------
 5 files changed, 75 insertions(+), 104 deletions(-)
 delete mode 100644 config/constants.py
 create mode 100644 links/links.txt

diff --git a/config/constants.py b/config/constants.py
deleted file mode 100644
index 28a84b06..00000000
--- a/config/constants.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""
-Useful constant variables
-"""
-
-import os
-
-PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__))
-ASSETS_PATH = os.path.join(PROJECT_ROOT, 'tmp', 'articles')
-CRAWLER_CONFIG_PATH = os.path.join(PROJECT_ROOT, 'crawler_config.json')
-LINKS_STORAGE = os.path.join(PROJECT_ROOT, 'links')
diff --git a/config/raw_metadata_score_four_test.py b/config/raw_metadata_score_four_test.py
index e6900c05..d8691879 100644
--- a/config/raw_metadata_score_four_test.py
+++ b/config/raw_metadata_score_four_test.py
@@ -16,7 +16,6 @@ def setUp(self) -> None:
 
     def test_validate_sort(self):
         list_ids = [pair[0] for pair in self.texts]
-        print(list_ids)
         for i in range(1, len(list_ids)+1):
             self.assertTrue(i in list_ids,
                             msg="""Articles ids are not homogeneous. E.g. numbers are not from 1 to N""")
diff --git a/constants.py b/constants.py
index 3a7976d0..938510b9 100644
--- a/constants.py
+++ b/constants.py
@@ -7,5 +7,9 @@
 PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__))
 ASSETS_PATH = os.path.join(PROJECT_ROOT, 'tmp', 'articles')
 CRAWLER_CONFIG_PATH = os.path.join(PROJECT_ROOT, 'crawler_config.json')
-LINKS_STORAGE = os.path.join(PROJECT_ROOT, 'links')
+LINKS_STORAGE_DIR = os.path.join(PROJECT_ROOT, 'links')
+LINKS_STORAGE_FILE = os.path.join(LINKS_STORAGE_DIR, 'links.txt')
+
 URL_START = 'https://burunen.ru'
+HEADERS = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1)'
+                         ' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
\ No newline at end of file
diff --git a/links/links.txt b/links/links.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/scrapper.py b/scrapper.py
index 062b493e..372973e7 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -2,16 +2,18 @@
 Crawler implementation
 """
 
-import os
-import json
 from datetime import date
+import json
+import os
 from random import randint
 from time import sleep as wait
+
+from bs4 import BeautifulSoup
 import requests
 from requests.exceptions import RequestException
-from bs4 import BeautifulSoup
+
 from article import Article
-from constants import CRAWLER_CONFIG_PATH, ASSETS_PATH, LINKS_STORAGE, URL_START
+from constants import CRAWLER_CONFIG_PATH, ASSETS_PATH, LINKS_STORAGE_DIR, URL_START, HEADERS, LINKS_STORAGE_FILE
 
 
 class IncorrectURLError(Exception):
@@ -38,6 +40,12 @@ class UnknownConfigError(Exception):
     """
 
 
+class NoBackUpEnabled(Exception):
+    """
+    Custom Error
+    """
+
+
 class Crawler:
     """
     Crawler implementation
@@ -59,27 +67,28 @@ def find_articles(self):
         Finds articles
         """
         for link in self.seed_urls:
-            article_bs = BeautifulSoup(requests.get(link, 'html.parser').text, 'html.parser')
+            article_bs = BeautifulSoup(requests.get(link, 'html.parser', headers=HEADERS).text, 'html.parser')
             newfound = self._extract_url(article_bs, self.urls)
             self.urls.extend(newfound[:self.max_articles_per_seed])
         self.urls = [i for i in self.urls if len(i) > 20
                      and not any(map(lambda y: y.isupper(), i))][:self.total_max_articles]
         print('Scraped seed urls, overall number of urls is', len(self.urls))
-        old = len(self.urls)
         while len(self.urls) < self.total_max_articles:
             print('Due to insufficient number started further iteration')
             print('current number', len(self.urls), ', required', self.total_max_articles)
+            old = len(self.urls)
             for link in self.urls:
-                article_bs = BeautifulSoup(requests.get(URL_START + link, 'html.parser').text, 'html.parser')
+                article_bs = BeautifulSoup(requests.get(URL_START + link,
+                                                        'html.parser', headers=HEADERS).text, 'html.parser')
                 newfound = list(filter(lambda x: len(x) > 20, self._extract_url(article_bs, self.urls)))
-                print('    checked new url, found', len(newfound), 'articles')
+                print('checked new url, found', len(newfound), 'articles')
                 self.urls.extend(newfound[:self.max_articles_per_seed])
                 if len(self.urls) > self.total_max_articles:
                     break
             if len(self.urls) == old:
-                print('     Something is wrong with scraping parameters')
+                print('There are no unseen urls found in all of the available addresses')
+                print(f'crawling finished with {len(self.urls)}')
                 break
-
             self.urls = self.urls[:self.total_max_articles]
 
     def get_search_urls(self):
@@ -96,60 +105,54 @@ def __init__(self, seed_urls: list, total_max_articles: int, max_articles_per_se
         self.is_waiting = to_wait
 
     def _crawl(self, pool: list):
+        found = []
         for link in pool:
             if self.is_waiting:
                 wait(randint(0, 10))
-            try:
-                article_bs = BeautifulSoup(requests.get(URL_START + link, 'html.parser').text, 'html.parser')
-            except requests.exceptions.ConnectionError:
-                wait(10)
-                article_bs = BeautifulSoup(requests.get(URL_START + link, 'html.parser').text, 'html.parser')
+            article_bs = BeautifulSoup(requests.get(URL_START + link, headers=HEADERS).text, 'html.parser')
             newfound = self._extract_url(article_bs, self.urls)
             newfound = [i for i in newfound if len(i) > 20
                         and not any(map(lambda y: y.isupper(), i))]
-            return newfound
+            found.extend(newfound)
+        return list(set(found))
 
     def find_articles(self):
-        if self.get_backedup():
+        if self.read_backedup():
             print('backed up urls found, starting iteration')
         if not self.urls:
-            self.urls = self._crawl(self.seed_urls)
-            with open('links/url_backup.txt', 'w', encoding='utf-8') as file:
-                file.write('\n'.join(self.urls))
-            print(f'Scraped {len(self.urls)} from seed')
+            pool = self.seed_urls
+        else:
+            pool = self.urls
+        newfound = self._crawl(pool)
+        if not newfound:
+            print(f'there are no unseen links found\nrecursive crawling finished with {len(self.urls)} urls.')
+        else:
+            self.urls.extend(newfound)
+            with open(LINKS_STORAGE_FILE, 'a', encoding='utf-8') as file:
+                file.write('\n'.join(newfound))
+            print(f'found {len(newfound)} new urls')
             if self.verify_proceed():
-                print('starting recursive scraping')
+                print('starting new iteration')
                 self.find_articles()
             else:
                 print(f'recursive crawling finished with {len(self.urls)} urls.')
-        else:
-            newfound = self._crawl(self.urls)
-            if not newfound:
-                print(f'there are no unseen links found\nrecursive crawling finished with {len(self.urls)} urls.')
-            else:
-                self.urls.extend(newfound)
-                with open('links/url_backup.txt', 'a', encoding='utf-8') as file:
-                    file.write('\n'.join(newfound))
-                print(f'found {len(newfound)} new urls')
-                if self.verify_proceed():
-                    print('starting new iteration')
-                    self.find_articles()
-                else:
-                    print(f'recursive crawling finished with {len(self.urls)} urls.')
 
     @staticmethod
     def verify_proceed():
         answer = input('Would you like to proceed? yes or no: ').strip()
         return answer == 'yes'
 
-    def get_backedup(self):
+    def read_backedup(self):
         try:
-            with open('links/url_backup.txt', 'r', encoding='utf-8') as file:
+            with open(LINKS_STORAGE_FILE, 'r', encoding='utf-8') as file:
                 sources = file.read().split('\n')
                 self.urls = sources
-                return True
+                if self.urls:
+                    print('backed up urls found')
         except FileNotFoundError:
-            return False
+            print('no backed up files found')
+            with open(LINKS_STORAGE_FILE, 'w', encoding='utf-8'):
+                pass
 
 
 class ArticleParser:
@@ -163,36 +166,28 @@ def __init__(self, full__url: str, article_id: int):
 
     def _fill_article_with_text(self, article_soup):
         try:
-            text = article_soup.find('div', {'class': 'text letter', 'itemprop': 'articleBody'}).text.strip()
-            text = [i for i in text.split('\n') if 'Фото:' not in i and 'Автор:' not in i
-                    and '© фото:' not in i and 'Источник:' not in i]
-            self.article.text = '\n'.join(text).strip()
+            text = article_soup.find('div', {'class': 'text letter',
+                                             'itemprop': 'articleBody'}).text.strip().split('\n')
+            stopws = ['Фото:', 'Автор:', 'Источник:',  '© фото:']
+            self.article.text = ' '.join(filter(lambda line:
+                                         all(map(lambda stopw: stopw not in line, stopws)), text)).strip()
         except AttributeError:
-            print('    unable to parse', self.full_url)
+            print('unable to parse', self.full_url)
+            self.article.text = 'ERROR'
 
     def _fill_article_with_meta_information(self, article_soup):
         try:
             title = article_soup.title.text
             self.article.title = title
-
-            credit = article_soup.find('div', {'class': 'credits t-caption'}).text.strip().split('\n')[0]
-            if 'Автор:' in credit:
-                author = article_soup.find('div', {'class': 'credits t-caption'}).text.strip().split('\n')[0][7:]
-            elif 'Источник:' in credit:
-                author = article_soup.find('div', {'class': 'credits t-caption'}).text.strip()
-                author = author.split('\n')[0][9:].strip()
-            else:
-                author = 'Not found'
-            self.article.author = author
+            author = article_soup.find('div',
+                                       {'class': 'credits t-caption'}).text.strip().split('\n')[0].split(': ')[-1]
+            self.article.author = author.strip()
             when = article_soup.find('div', {'class': 'b-caption'}).text.strip().split('\n')[1]
             self.article.date = self.unify_date_format(when)
-
             topic = article_soup.find('div', {'class': 'b-caption'}).text.strip().split('\n')[0]
             self.article.topics = topic
         except AttributeError:
-            print('    something is off with', self.full_url)
-        # print(title)
-        # self.article.title = title
+            print('something is off with', self.full_url)
 
     @staticmethod
     def unify_date_format(date_str):
@@ -220,29 +215,22 @@ def parse(self):
         """
         Parses each article
         """
-        self.article.url = self.full_url
-        self.article.article_id = self.article_id
-        html = requests.get(self.full_url, 'html.parser').text
+        html = requests.get(self.full_url, 'html.parser', headers=HEADERS).text
         article_bs = BeautifulSoup(html, 'html.parser')
         self._fill_article_with_text(article_bs)
         self._fill_article_with_meta_information(article_bs)
-        self.article.save_raw()
+        return self.article
 
 
-def prepare_environment(base_path):
+def prepare_environment(base_path, backup_path_dir):
     """
     Creates ASSETS_PATH folder if not created and removes existing folder
     """
     if not os.path.exists(base_path):
         os.makedirs(base_path)
-
-
-def enable_backup(base_path):
-    """
-    Creates folder for backup links if not created
-    """
-    if not os.path.exists(base_path):
-        os.makedirs(base_path)
+    if not os.path.exists(backup_path_dir):
+        print('GOT HERE WITH PATH', backup_path_dir)
+        os.makedirs(backup_path_dir)
 
 
 def validate_config(crawler_path):
@@ -251,13 +239,8 @@ def validate_config(crawler_path):
     """
     with open(crawler_path) as crawler_config:
         config = json.load(crawler_config)
-    try:
-        good_response = list(map(lambda link: requests.get(link).status_code == 200,
-                                 config['base_urls']))
-    except RequestException as exception:
-        raise IncorrectURLError from exception
-    except Exception as exception:
-        raise UnknownConfigError from exception
+    good_response = list(map(lambda link: link.startswith('https://'),
+                             config['base_urls']))
     if not all(good_response):
         raise IncorrectURLError
     try:
@@ -265,11 +248,6 @@ def validate_config(crawler_path):
             raise IncorrectNumberOfArticlesError
         if config['total_articles_to_find_and_parse'] > 1000:
             raise NumberOfArticlesOutOfRangeError
-        # if not isinstance(config['max_number_articles_to_get_from_one_seed'], int):
-        #     raise IncorrectNumberOfArticlesError
-        # if not config['total_articles_to_find_and_parse'] < config['max_number_articles_to_get_from_one_seed']\
-        #    * len(good_response):
-        #     raise NumberOfArticlesOutOfRangeError
     except KeyError as exception:
         raise IncorrectNumberOfArticlesError from exception
     try:
@@ -280,22 +258,22 @@ def validate_config(crawler_path):
 
 
 if __name__ == '__main__':
-    prepare_environment(ASSETS_PATH)
-    enable_backup(LINKS_STORAGE)
+    prepare_environment(ASSETS_PATH, LINKS_STORAGE_DIR)
     seedurls, max_articles, max_arts_per_seed = validate_config(CRAWLER_CONFIG_PATH)
     if not max_arts_per_seed:
         max_arts_per_seed = max_articles
-    crawler = CrawlerRecursive(seed_urls=seedurls,
-                               total_max_articles=max_articles,
-                               max_articles_per_seed=max_arts_per_seed)
+    crawler = Crawler(seed_urls=seedurls,
+                      total_max_articles=max_articles,
+                      max_articles_per_seed=max_arts_per_seed)
 
     crawler.find_articles()
-    # print('Scraped', len(crawler.urls), 'articles')
 
     print('onto parsing')
 
-    for n, url in enumerate(crawler.urls):
+    for n, url in enumerate(crawler.urls[:4]):
         full_url = URL_START + url
         parser = ArticleParser(full_url, n + 1)
-        parser.parse()
+        article = parser.parse()
+        article.save_raw()
     print('parsing is finished')
+#

From 225e72a1fcbbcce7b082863ed3c161f6c5fcd5d4 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Sat, 13 Mar 2021 11:58:30 +0300
Subject: [PATCH 23/50] fixed lint

---
 constants.py | 2 +-
 scrapper.py  | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/constants.py b/constants.py
index 938510b9..21dcf665 100644
--- a/constants.py
+++ b/constants.py
@@ -12,4 +12,4 @@
 
 URL_START = 'https://burunen.ru'
 HEADERS = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1)'
-                         ' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
\ No newline at end of file
+                         ' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
diff --git a/scrapper.py b/scrapper.py
index 372973e7..165bea21 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -10,7 +10,6 @@
 
 from bs4 import BeautifulSoup
 import requests
-from requests.exceptions import RequestException
 
 from article import Article
 from constants import CRAWLER_CONFIG_PATH, ASSETS_PATH, LINKS_STORAGE_DIR, URL_START, HEADERS, LINKS_STORAGE_FILE

From dc7c08a46f00fa07b34a1c810ee6dd8bbe968663 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Sat, 13 Mar 2021 12:01:47 +0300
Subject: [PATCH 24/50] fixed config valid

---
 scrapper.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scrapper.py b/scrapper.py
index 165bea21..6cff2894 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -238,8 +238,11 @@ def validate_config(crawler_path):
     """
     with open(crawler_path) as crawler_config:
         config = json.load(crawler_config)
-    good_response = list(map(lambda link: link.startswith('https://'),
+    try:
+        good_response = list(map(lambda link: link.startswith('https://'),
                              config['base_urls']))
+    except AttributeError as exception:
+        raise IncorrectURLError from exception
     if not all(good_response):
         raise IncorrectURLError
     try:

From b147b47deb712b91e221b1224862358f318e08b2 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Sat, 13 Mar 2021 14:21:57 +0300
Subject: [PATCH 25/50] optimized

---
 scrapper.py | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/scrapper.py b/scrapper.py
index 6cff2894..212b6988 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -61,14 +61,22 @@ def _extract_url(article_bs, seen):
         return list(filter(lambda x: x.startswith('/news/')
                            and x not in seen, extracted))
 
+    def _crawl(self, pool: iter):
+        found = []
+        for link in pool:
+            article_bs = BeautifulSoup(requests.get(link, headers=HEADERS).text, 'html.parser')
+            newfound = self._extract_url(article_bs, self.urls)
+            newfound = [i for i in newfound if len(i) > 20
+                        and not any(map(lambda y: y.isupper(), i))]
+            found.extend(newfound)
+        return list(set(found))[:self.max_articles_per_seed]
+
     def find_articles(self):
         """
         Finds articles
         """
-        for link in self.seed_urls:
-            article_bs = BeautifulSoup(requests.get(link, 'html.parser', headers=HEADERS).text, 'html.parser')
-            newfound = self._extract_url(article_bs, self.urls)
-            self.urls.extend(newfound[:self.max_articles_per_seed])
+        found = self._crawl(self.seed_urls)
+        self.urls.extend(found)
         self.urls = [i for i in self.urls if len(i) > 20
                      and not any(map(lambda y: y.isupper(), i))][:self.total_max_articles]
         print('Scraped seed urls, overall number of urls is', len(self.urls))
@@ -76,14 +84,9 @@ def find_articles(self):
             print('Due to insufficient number started further iteration')
             print('current number', len(self.urls), ', required', self.total_max_articles)
             old = len(self.urls)
-            for link in self.urls:
-                article_bs = BeautifulSoup(requests.get(URL_START + link,
-                                                        'html.parser', headers=HEADERS).text, 'html.parser')
-                newfound = list(filter(lambda x: len(x) > 20, self._extract_url(article_bs, self.urls)))
-                print('checked new url, found', len(newfound), 'articles')
-                self.urls.extend(newfound[:self.max_articles_per_seed])
-                if len(self.urls) > self.total_max_articles:
-                    break
+            pool = tuple(map(lambda x: URL_START + x, self.urls))
+            found = self._crawl(pool)
+            self.urls.extend(found)
             if len(self.urls) == old:
                 print('There are no unseen urls found in all of the available addresses')
                 print(f'crawling finished with {len(self.urls)}')
@@ -272,7 +275,7 @@ def validate_config(crawler_path):
 
     print('onto parsing')
 
-    for n, url in enumerate(crawler.urls[:4]):
+    for n, url in enumerate(crawler.urls):
         full_url = URL_START + url
         parser = ArticleParser(full_url, n + 1)
         article = parser.parse()

From d09c9f3b2ccdba61f7dced1578efa425c212d9c4 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Sat, 13 Mar 2021 14:35:44 +0300
Subject: [PATCH 26/50] fixed date format

---
 article.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/article.py b/article.py
index f471cb6e..718a3b3b 100644
--- a/article.py
+++ b/article.py
@@ -99,7 +99,7 @@ def _date_to_text(self):
         """
         Converts datetime object to text
         """
-        return self.date.strftime("%Y-%m-%d")
+        return self.date.strftime("%Y-%m-%d %H:%M:%S")
 
     def _get_raw_text_path(self):
         """

From 731448ce9b0b4f332a4b1791e434208f315849a9 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Mon, 15 Mar 2021 16:04:23 +0300
Subject: [PATCH 27/50] removed user interaction and links folder

---
 crawler_config.json |  2 +-
 links/links.txt     |  0
 scrapper.py         | 54 ++++++++++++++++++++++-----------------------
 3 files changed, 28 insertions(+), 28 deletions(-)
 delete mode 100644 links/links.txt

diff --git a/crawler_config.json b/crawler_config.json
index 80f0c5d8..01928e35 100644
--- a/crawler_config.json
+++ b/crawler_config.json
@@ -6,6 +6,6 @@
                   "https://burunen.ru/news/incidents/",
                   "https://burunen.ru/news/politic/"
     ],
-    "total_articles_to_find_and_parse": 20,
+    "total_articles_to_find_and_parse": 100,
     "max_number_articles_to_get_from_one_seed": 25
 }
\ No newline at end of file
diff --git a/links/links.txt b/links/links.txt
deleted file mode 100644
index e69de29b..00000000
diff --git a/scrapper.py b/scrapper.py
index 212b6988..932588dd 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -12,7 +12,7 @@
 import requests
 
 from article import Article
-from constants import CRAWLER_CONFIG_PATH, ASSETS_PATH, LINKS_STORAGE_DIR, URL_START, HEADERS, LINKS_STORAGE_FILE
+from constants import CRAWLER_CONFIG_PATH, ASSETS_PATH, LINKS_STORAGE_DIR, HEADERS, LINKS_STORAGE_FILE, URL_START
 
 
 class IncorrectURLError(Exception):
@@ -64,6 +64,8 @@ def _extract_url(article_bs, seen):
     def _crawl(self, pool: iter):
         found = []
         for link in pool:
+            if not link.startswith('https'):
+                link = URL_START + link
             article_bs = BeautifulSoup(requests.get(link, headers=HEADERS).text, 'html.parser')
             newfound = self._extract_url(article_bs, self.urls)
             newfound = [i for i in newfound if len(i) > 20
@@ -81,15 +83,15 @@ def find_articles(self):
                      and not any(map(lambda y: y.isupper(), i))][:self.total_max_articles]
         print('Scraped seed urls, overall number of urls is', len(self.urls))
         while len(self.urls) < self.total_max_articles:
-            print('Due to insufficient number started further iteration')
-            print('current number', len(self.urls), ', required', self.total_max_articles)
+            print('Due to insufficient number of urls started further iteration')
+            print('current number is', len(self.urls), ', required', self.total_max_articles)
             old = len(self.urls)
             pool = tuple(map(lambda x: URL_START + x, self.urls))
             found = self._crawl(pool)
             self.urls.extend(found)
             if len(self.urls) == old:
                 print('There are no unseen urls found in all of the available addresses')
-                print(f'crawling finished with {len(self.urls)}')
+                print(f'crawling finished with {len(self.urls)} urls')
                 break
             self.urls = self.urls[:self.total_max_articles]
 
@@ -101,17 +103,24 @@ def get_search_urls(self):
 
 
 class CrawlerRecursive(Crawler):
-
-    def __init__(self, seed_urls: list, total_max_articles: int, max_articles_per_seed: int, to_wait=False):
+    """
+    Crawler implementation
+    Scrapes all the articles from the source
+    Uses advanced user imitation with fake headers and random waiting time
+    """
+    def __init__(self, seed_urls: list, total_max_articles: int,
+                 max_articles_per_seed: int, to_wait=False):
         super().__init__(seed_urls, total_max_articles, max_articles_per_seed)
         self.is_waiting = to_wait
 
-    def _crawl(self, pool: list):
+    def _crawl(self, pool: iter):
         found = []
         for link in pool:
             if self.is_waiting:
-                wait(randint(0, 10))
-            article_bs = BeautifulSoup(requests.get(URL_START + link, headers=HEADERS).text, 'html.parser')
+                wait(randint(1, 10))
+            if not link.startswith('https'):
+                link = URL_START + link
+            article_bs = BeautifulSoup(requests.get(link, headers=HEADERS).text, 'html.parser')
             newfound = self._extract_url(article_bs, self.urls)
             newfound = [i for i in newfound if len(i) > 20
                         and not any(map(lambda y: y.isupper(), i))]
@@ -119,8 +128,9 @@ def _crawl(self, pool: list):
         return list(set(found))
 
     def find_articles(self):
-        if self.read_backedup():
-            print('backed up urls found, starting iteration')
+        if not self.urls:
+            if self.read_backedup():
+                print('backed up urls found, starting iteration')
         if not self.urls:
             pool = self.seed_urls
         else:
@@ -132,17 +142,9 @@ def find_articles(self):
             self.urls.extend(newfound)
             with open(LINKS_STORAGE_FILE, 'a', encoding='utf-8') as file:
                 file.write('\n'.join(newfound))
-            print(f'found {len(newfound)} new urls')
-            if self.verify_proceed():
-                print('starting new iteration')
-                self.find_articles()
-            else:
-                print(f'recursive crawling finished with {len(self.urls)} urls.')
-
-    @staticmethod
-    def verify_proceed():
-        answer = input('Would you like to proceed? yes or no: ').strip()
-        return answer == 'yes'
+            print(f'found {len(newfound)} new urls, overall number is {len(self.urls)}')
+            print('starting new iteration')
+            self.find_articles()
 
     def read_backedup(self):
         try:
@@ -231,7 +233,6 @@ def prepare_environment(base_path, backup_path_dir):
     if not os.path.exists(base_path):
         os.makedirs(base_path)
     if not os.path.exists(backup_path_dir):
-        print('GOT HERE WITH PATH', backup_path_dir)
         os.makedirs(backup_path_dir)
 
 
@@ -267,9 +268,9 @@ def validate_config(crawler_path):
     seedurls, max_articles, max_arts_per_seed = validate_config(CRAWLER_CONFIG_PATH)
     if not max_arts_per_seed:
         max_arts_per_seed = max_articles
-    crawler = Crawler(seed_urls=seedurls,
-                      total_max_articles=max_articles,
-                      max_articles_per_seed=max_arts_per_seed)
+    crawler = CrawlerRecursive(seed_urls=seedurls,
+                               total_max_articles=max_articles,
+                               max_articles_per_seed=max_arts_per_seed)
 
     crawler.find_articles()
 
@@ -281,4 +282,3 @@ def validate_config(crawler_path):
         article = parser.parse()
         article.save_raw()
     print('parsing is finished')
-#

From fd304d74ca27f7e7d914bcbd0fa807e8affd6ede Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Mon, 15 Mar 2021 16:11:02 +0300
Subject: [PATCH 28/50] removed user interaction and links folder[2]

---
 crawler_config.json | 4 ++--
 scrapper.py         | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/crawler_config.json b/crawler_config.json
index 01928e35..c765cac3 100644
--- a/crawler_config.json
+++ b/crawler_config.json
@@ -6,6 +6,6 @@
                   "https://burunen.ru/news/incidents/",
                   "https://burunen.ru/news/politic/"
     ],
-    "total_articles_to_find_and_parse": 100,
-    "max_number_articles_to_get_from_one_seed": 25
+    "total_articles_to_find_and_parse": 20,
+    "max_number_articles_to_get_from_one_seed": 10
 }
\ No newline at end of file
diff --git a/scrapper.py b/scrapper.py
index 932588dd..8ac37d99 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -268,7 +268,7 @@ def validate_config(crawler_path):
     seedurls, max_articles, max_arts_per_seed = validate_config(CRAWLER_CONFIG_PATH)
     if not max_arts_per_seed:
         max_arts_per_seed = max_articles
-    crawler = CrawlerRecursive(seed_urls=seedurls,
+    crawler = Crawler(seed_urls=seedurls,
                                total_max_articles=max_articles,
                                max_articles_per_seed=max_arts_per_seed)
 

From 23182bb1d0499ed48cf8a4867c421716fa6309af Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Fri, 26 Mar 2021 16:21:11 +0300
Subject: [PATCH 29/50] initial commit with the build for 8 and bad lintering

---
 crawler_config.json |  2 +-
 pipeline.py         | 78 +++++++++++++++++++++++++++++++++++++--------
 requirements.txt    |  2 ++
 target_score.txt    |  2 +-
 4 files changed, 69 insertions(+), 15 deletions(-)

diff --git a/crawler_config.json b/crawler_config.json
index c765cac3..d65a60dd 100644
--- a/crawler_config.json
+++ b/crawler_config.json
@@ -6,6 +6,6 @@
                   "https://burunen.ru/news/incidents/",
                   "https://burunen.ru/news/politic/"
     ],
-    "total_articles_to_find_and_parse": 20,
+    "total_articles_to_find_and_parse": 10,
     "max_number_articles_to_get_from_one_seed": 10
 }
\ No newline at end of file
diff --git a/pipeline.py b/pipeline.py
index d603e2e1..11a4b911 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -1,9 +1,15 @@
 """
 Pipeline for text processing implementation
 """
-
+import os
 from typing import List
 
+from pymorphy2 import MorphAnalyzer
+from pymystem3 import Mystem
+
+from article import Article
+from constants import ASSETS_PATH
+
 
 class EmptyDirectoryError(Exception):
     """
@@ -34,10 +40,13 @@ class MorphologicalToken:
     Stores language params for each processed token
     """
     def __init__(self, original_word, normalized_form):
-        pass
+        self.original = original_word
+        self.normalized = normalized_form
+        self.mystem_tags = ''
+        self.pymorphy_tags = ''
 
     def __str__(self):
-        return "MorphologicalToken instance here"
+        return self.normalized + '<' + self.mystem_tags + '>' + '(' + str(self.pymorphy_tags) + ')'
 
 
 class CorpusManager:
@@ -45,19 +54,24 @@ class CorpusManager:
     Works with articles and stores them
     """
     def __init__(self, path_to_raw_txt_data: str):
-        pass
+        self.path_to_raw = path_to_raw_txt_data
+        self._storage = {}
 
     def _scan_dataset(self):
         """
         Register each dataset entry
         """
-        pass
+        for file in os.listdir(ASSETS_PATH):
+            if file.endswith('_raw.txt'):
+                index = file.split('_raw.txt')[0]
+                self._storage[index] = Article(url=None, article_id=index)
 
     def get_articles(self):
         """
         Returns storage params
         """
-        pass
+        self._scan_dataset()
+        return self._storage
 
 
 class TextProcessingPipeline:
@@ -65,30 +79,68 @@ class TextProcessingPipeline:
     Process articles from corpus manager
     """
     def __init__(self, corpus_manager: CorpusManager):
-        pass
+        self.corpus = corpus_manager
 
     def run(self):
         """
         Runs pipeline process scenario
         """
-        pass
-
-    def _process(self) -> List[type(MorphologicalToken)]:
+        print(f'there are {self.corpus.get_articles()} articles to process')
+        for index, article in self.corpus.get_articles().items():
+            raw_text = article.get_raw_text()
+            tokens = self._process(raw_text)
+            processed = ' '.join(map(lambda token: str(token), tokens))
+            article.save_processed(processed)
+
+    @staticmethod
+    def _process(text) -> List[type(MorphologicalToken)]:
         """
         Performs processing of each text
         """
-        pass
+        mystem = Mystem()
+        pymorphy = MorphAnalyzer()
+        words = mystem.analyze(text)
+        tokens = []
+        for word in words:
+            orig = word['text'].strip()
+            if orig.isalpha():
+                try:
+                    token = MorphologicalToken(original_word=orig, normalized_form=word['analysis'][0]['lex'])
+                    token.mystem_tags = word['analysis'][0]['gr']
+                    token.pymorphy_tags = pymorphy.parse(orig)[0].tag
+                    tokens.append(token)
+                except IndexError:
+                    token = MorphologicalToken(original_word=orig, normalized_form=orig)
+                    tokens.append(token)
+        return tokens
 
 
 def validate_dataset(path_to_validate):
     """
     Validates folder with assets
     """
-    pass
+    if not os.path.exists(path_to_validate):
+        raise UnknownDatasetError
+    if not os.path.isdir(path_to_validate):
+        raise NotADirectoryError
+    if not os.listdir(path_to_validate):
+        raise EmptyDirectoryError
+    metas, raws = 0, 0
+    for file in os.listdir(ASSETS_PATH):
+        if file.endswith("_raw.txt"):
+            raws += 1
+        if file.endswith("_meta.json"):
+            metas += 1
+    if not metas == raws:
+        raise InconsistentDatasetError
 
 
 def main():
-    print('Your code goes here')
+    validate_dataset(ASSETS_PATH)
+    print('validated dataset')
+    corpus_manager = CorpusManager(ASSETS_PATH)
+    pipeline = TextProcessingPipeline(corpus_manager=corpus_manager)
+    pipeline.run()
 
 
 if __name__ == "__main__":
diff --git a/requirements.txt b/requirements.txt
index 327297ca..cb49a8af 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,4 @@
 beautifulsoup4==4.9.0
+pymorphy2==0.9.1
+pymystem3==0.2.0
 requests==2.23.0
diff --git a/target_score.txt b/target_score.txt
index 3de837b8..4bb41dfa 100644
--- a/target_score.txt
+++ b/target_score.txt
@@ -2,4 +2,4 @@
 10
 
 # Target score for pipeline.py:
-0
\ No newline at end of file
+8
\ No newline at end of file

From 0ee4d5e9eef2dda4142f78029a4bf4f5d18ac911 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Fri, 26 Mar 2021 16:34:57 +0300
Subject: [PATCH 30/50] build for 8 with a lot of lintering interface kostyly

---
 pipeline.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/pipeline.py b/pipeline.py
index 11a4b911..24a13638 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -17,10 +17,10 @@ class EmptyDirectoryError(Exception):
     """
 
 
-class NotADirectoryError(Exception):
-    """
-    Custom error
-    """
+# class NotADirectoryError(Exception):
+#     """
+#     Custom error
+#     """
 
 
 class InconsistentDatasetError(Exception):
@@ -48,6 +48,9 @@ def __init__(self, original_word, normalized_form):
     def __str__(self):
         return self.normalized + '<' + self.mystem_tags + '>' + '(' + str(self.pymorphy_tags) + ')'
 
+    def placeholder_public_method(self):
+        pass
+
 
 class CorpusManager:
     """
@@ -73,6 +76,9 @@ def get_articles(self):
         self._scan_dataset()
         return self._storage
 
+    def placeholder_public_method(self):
+        pass
+
 
 class TextProcessingPipeline:
     """
@@ -85,13 +91,16 @@ def run(self):
         """
         Runs pipeline process scenario
         """
-        print(f'there are {self.corpus.get_articles()} articles to process')
-        for index, article in self.corpus.get_articles().items():
+        print(f'there are {len(self.corpus.get_articles())} articles to process')
+        for article in self.corpus.get_articles().values():
             raw_text = article.get_raw_text()
             tokens = self._process(raw_text)
-            processed = ' '.join(map(lambda token: str(token), tokens))
+            processed = ' '.join(map(str, tokens))
             article.save_processed(processed)
 
+    def placeholder_public_method(self):
+        pass
+
     @staticmethod
     def _process(text) -> List[type(MorphologicalToken)]:
         """

From f1a59fa88bf8f29861167cac0a97e073249ed83a Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Fri, 26 Mar 2021 21:31:14 +0300
Subject: [PATCH 31/50] added features for 10, did not upd tests

---
 pipeline.py               | 24 ++++++++++++++++++++++--
 pos_frequency_pipeline.py | 32 ++++++++++++++++++++++++++++++--
 2 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/pipeline.py b/pipeline.py
index 24a13638..ea97d22b 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -9,6 +9,7 @@
 
 from article import Article
 from constants import ASSETS_PATH
+from pos_frequency_pipeline import POSFrequencyPipeline
 
 
 class EmptyDirectoryError(Exception):
@@ -49,6 +50,11 @@ def __str__(self):
         return self.normalized + '<' + self.mystem_tags + '>' + '(' + str(self.pymorphy_tags) + ')'
 
     def placeholder_public_method(self):
+        """
+        In order to pass lint check,
+        class must contain at least
+        two public methods
+        """
         pass
 
 
@@ -77,6 +83,11 @@ def get_articles(self):
         return self._storage
 
     def placeholder_public_method(self):
+        """
+        In order to pass lint check,
+        class must contain at least
+        two public methods
+        """
         pass
 
 
@@ -99,6 +110,11 @@ def run(self):
             article.save_processed(processed)
 
     def placeholder_public_method(self):
+        """
+        In order to pass lint check,
+        class must contain at least
+        two public methods
+        """
         pass
 
     @staticmethod
@@ -115,7 +131,7 @@ def _process(text) -> List[type(MorphologicalToken)]:
             if orig.isalpha():
                 try:
                     token = MorphologicalToken(original_word=orig, normalized_form=word['analysis'][0]['lex'])
-                    token.mystem_tags = word['analysis'][0]['gr']
+                    token.mystem_tags = word['analysis'][0]['gr'].strip()
                     token.pymorphy_tags = pymorphy.parse(orig)[0].tag
                     tokens.append(token)
                 except IndexError:
@@ -129,7 +145,7 @@ def validate_dataset(path_to_validate):
     Validates folder with assets
     """
     if not os.path.exists(path_to_validate):
-        raise UnknownDatasetError
+        raise FileNotFoundError
     if not os.path.isdir(path_to_validate):
         raise NotADirectoryError
     if not os.listdir(path_to_validate):
@@ -148,8 +164,12 @@ def main():
     validate_dataset(ASSETS_PATH)
     print('validated dataset')
     corpus_manager = CorpusManager(ASSETS_PATH)
+    print('onto processing')
     pipeline = TextProcessingPipeline(corpus_manager=corpus_manager)
     pipeline.run()
+    print('onto analytics')
+    visualizer = POSFrequencyPipeline(corpus_manager)
+    visualizer.run()
 
 
 if __name__ == "__main__":
diff --git a/pos_frequency_pipeline.py b/pos_frequency_pipeline.py
index 232c4d07..f8e887d1 100644
--- a/pos_frequency_pipeline.py
+++ b/pos_frequency_pipeline.py
@@ -1,8 +1,36 @@
 """
 Implementation of POSFrequencyPipeline for score ten only.
 """
+import os
+import re
+
+# from pipeline import CorpusManager
+from visualizer import visualize
+
+from constants import ASSETS_PATH
 
 
 class POSFrequencyPipeline:
-    def __init__(self, assets, destination):
-        pass
+    def __init__(self, corpus_manager):
+        self.corpus = corpus_manager
+
+    def run(self):
+        frequencies = self._count_frequencies()
+        visualize(frequencies, os.path.join(ASSETS_PATH, 'pos_frequencies.png'))
+
+    def _count_frequencies(self):
+        articles = self.corpus.get_articles()
+        tags_found = []
+        for index, article in articles.items():
+            article_path = os.path.join(ASSETS_PATH, f'{index}_processed.txt')
+            with open(article_path, encoding='utf-8') as file:
+                contents = file.read()
+                tags_found.extend(re.findall(r"<([A-Z]*)[,=]?", contents))
+        frequencies = {}
+        for tag in tags_found:
+            frequencies[tag] = tags_found.count(tag)
+        return frequencies
+
+
+if __name__ == "__main__":
+    pass

From 21e4fd8009804f16d94f3a4a0e696167c0d1844b Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Fri, 26 Mar 2021 21:52:48 +0300
Subject: [PATCH 32/50] build for 10 except i did not refactor

---
 requirements.txt | 2 ++
 target_score.txt | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index cb49a8af..46e89c07 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,5 @@ beautifulsoup4==4.9.0
 pymorphy2==0.9.1
 pymystem3==0.2.0
 requests==2.23.0
+numpy==1.20.1
+matplotlib==3.4.0
\ No newline at end of file
diff --git a/target_score.txt b/target_score.txt
index 4bb41dfa..e91213a7 100644
--- a/target_score.txt
+++ b/target_score.txt
@@ -2,4 +2,4 @@
 10
 
 # Target score for pipeline.py:
-8
\ No newline at end of file
+10
\ No newline at end of file

From 1b73cb306b5146b80cb55ef94db7da9450a4ad7c Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Fri, 26 Mar 2021 22:11:37 +0300
Subject: [PATCH 33/50] refactor from os to pathlib

---
 pipeline.py               | 22 ++++++++++++----------
 pos_frequency_pipeline.py | 12 ++++++------
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/pipeline.py b/pipeline.py
index 276a85c2..dc82fe42 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -1,9 +1,9 @@
 """
 Pipeline for text processing implementation
 """
-import os
 from typing import List
 
+from pathlib import Path
 from pymorphy2 import MorphAnalyzer
 from pymystem3 import Mystem
 
@@ -64,9 +64,10 @@ def _scan_dataset(self):
         """
         Register each dataset entry
         """
-        for file in os.listdir(ASSETS_PATH):
-            if file.endswith('_raw.txt'):
-                index = file.split('_raw.txt')[0]
+        path = Path(ASSETS_PATH)
+        for file in path.iterdir():
+            if str(file).endswith('_raw.txt'):
+                index = str(file).split('_raw.txt')[0]
                 self._storage[index] = Article(url=None, article_id=index)
 
     def get_articles(self):
@@ -138,17 +139,18 @@ def validate_dataset(path_to_validate):
     """
     Validates folder with assets
     """
-    if not os.path.exists(path_to_validate):
+    path = Path(path_to_validate)
+    if not path.exists():
         raise FileNotFoundError
-    if not os.path.isdir(path_to_validate):
+    if not path.is_dir():
         raise NotADirectoryError
-    if not os.listdir(path_to_validate):
+    if not path.iterdir():
         raise EmptyDirectoryError
     metas, raws = 0, 0
-    for file in os.listdir(ASSETS_PATH):
-        if file.endswith("_raw.txt"):
+    for file in path.iterdir():
+        if str(file).endswith("_raw.txt"):
             raws += 1
-        if file.endswith("_meta.json"):
+        if str(file).endswith("_meta.json"):
             metas += 1
     if not metas == raws:
         raise InconsistentDatasetError
diff --git a/pos_frequency_pipeline.py b/pos_frequency_pipeline.py
index 71e0d498..48ca938d 100644
--- a/pos_frequency_pipeline.py
+++ b/pos_frequency_pipeline.py
@@ -1,10 +1,10 @@
 """
 Implementation of POSFrequencyPipeline for score ten only.
 """
-import os
 import re
 
-# from pipeline import CorpusManager
+from pathlib import Path
+
 from visualizer import visualize
 
 from constants import ASSETS_PATH
@@ -16,14 +16,15 @@ def __init__(self, corpus_manager):
 
     def run(self):
         frequencies = self._count_frequencies()
-        visualize(frequencies, os.path.join(ASSETS_PATH, 'pos_frequencies.png'))
+        path = Path(ASSETS_PATH) / 'pos_frequencies.png'
+        visualize(frequencies, path)
 
     def _count_frequencies(self):
         articles = self.corpus.get_articles()
         tags_found = []
         for index, article in articles.items():
-            article_path = os.path.join(ASSETS_PATH, f'{index}_processed.txt')
-            with open(article_path, encoding='utf-8') as file:
+            path = Path(ASSETS_PATH) / f'{index}_processed.txt'
+            with open(path, encoding='utf-8') as file:
                 contents = file.read()
                 tags_found.extend(re.findall(r"<([A-Z]*)[,=]?", contents))
         frequencies = {}
@@ -34,4 +35,3 @@ def _count_frequencies(self):
 
 if __name__ == "__main__":
     pass
-

From 19ff11d234cc6ae5be032ece10c629c85f99b2be Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Tue, 30 Mar 2021 15:22:07 +0300
Subject: [PATCH 34/50] fixed everything

---
 pipeline.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pipeline.py b/pipeline.py
index dc82fe42..754df485 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -144,7 +144,8 @@ def validate_dataset(path_to_validate):
         raise FileNotFoundError
     if not path.is_dir():
         raise NotADirectoryError
-    if not path.iterdir():
+    files = [i for i in path.iterdir()]
+    if not files:
         raise EmptyDirectoryError
     metas, raws = 0, 0
     for file in path.iterdir():

From c30d85a1cc989c379cdce2436059be9ce328c285 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Tue, 30 Mar 2021 15:25:05 +0300
Subject: [PATCH 35/50] now everything

---
 pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipeline.py b/pipeline.py
index 754df485..8772fecb 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -144,7 +144,7 @@ def validate_dataset(path_to_validate):
         raise FileNotFoundError
     if not path.is_dir():
         raise NotADirectoryError
-    files = [i for i in path.iterdir()]
+    files = list(path.iterdir())
     if not files:
         raise EmptyDirectoryError
     metas, raws = 0, 0

From c58526b8ea8e4150ec026737f4789eef9287bb44 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Tue, 30 Mar 2021 15:40:36 +0300
Subject: [PATCH 36/50] i don't understand

---
 pipeline.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pipeline.py b/pipeline.py
index 8772fecb..28cd745a 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -144,8 +144,7 @@ def validate_dataset(path_to_validate):
         raise FileNotFoundError
     if not path.is_dir():
         raise NotADirectoryError
-    files = list(path.iterdir())
-    if not files:
+    if not list(path.iterdir()):
         raise EmptyDirectoryError
     metas, raws = 0, 0
     for file in path.iterdir():

From 0a9c9fe65080a7232c894b0311d8e6a24b8da0de Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Tue, 30 Mar 2021 15:45:37 +0300
Subject: [PATCH 37/50] fixed regular expression

---
 pos_frequency_pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pos_frequency_pipeline.py b/pos_frequency_pipeline.py
index 48ca938d..d292c605 100644
--- a/pos_frequency_pipeline.py
+++ b/pos_frequency_pipeline.py
@@ -26,7 +26,7 @@ def _count_frequencies(self):
             path = Path(ASSETS_PATH) / f'{index}_processed.txt'
             with open(path, encoding='utf-8') as file:
                 contents = file.read()
-                tags_found.extend(re.findall(r"<([A-Z]*)[,=]?", contents))
+                tags_found.extend(re.findall(r"<([A-Z]+)[,=]?", contents))
         frequencies = {}
         for tag in tags_found:
             frequencies[tag] = tags_found.count(tag)

From db17bf6b8d9e873e0c8551a40ab29b61db458ccb Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Tue, 30 Mar 2021 16:19:01 +0300
Subject: [PATCH 38/50] added COM to tags

---
 config/student_text_preprocess_score_eight_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/student_text_preprocess_score_eight_test.py b/config/student_text_preprocess_score_eight_test.py
index 0926e57a..2352fbd7 100644
--- a/config/student_text_preprocess_score_eight_test.py
+++ b/config/student_text_preprocess_score_eight_test.py
@@ -4,7 +4,7 @@
 from constants import ASSETS_PATH
 
 
-TAGS = ["A", "ADV", "S", "V", "PR", "ANUM", "CONJ", "SPRO", "APRO", "PART", "NUM", "ADVPRO"]
+TAGS = ["A", "ADV", "S", "V", "PR", "ANUM", "CONJ", "SPRO", "APRO", "PART", "NUM", "ADVPRO", "COM"]
 
 
 class StudentTextPreprocessTest(unittest.TestCase):

From 7ea4755aee6231831f4e045d8fa13413f8bdad16 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Sun, 4 Apr 2021 14:40:54 +0300
Subject: [PATCH 39/50] added requested changes, ready to fight linter

---
 article.py                |  5 ++-
 pipeline.py               | 70 ++++++++++++---------------------------
 pos_frequency_pipeline.py | 42 +++++++++++++++--------
 3 files changed, 55 insertions(+), 62 deletions(-)

diff --git a/article.py b/article.py
index 718a3b3b..e59d6d63 100644
--- a/article.py
+++ b/article.py
@@ -29,6 +29,7 @@ def __init__(self, url, article_id):
         self.author = ''
         self.topics = []
         self.text = ''
+        self.pos_frequencies = {}
 
     def save_raw(self):
         """
@@ -62,6 +63,7 @@ def from_meta_json(json_path: str):
         article.date = date_from_meta(meta.get('date', None))
         article.author = meta.get('author', None)
         article.topics = meta.get('topics', None)
+        article.pos_frequencies = meta.get('pos_frequencies', None)
 
         # intentionally leave it empty
         article.text = None
@@ -92,7 +94,8 @@ def _get_meta(self):
             'title': self.title,
             'date': self._date_to_text(),
             'author': self.author,
-            'topics': self.topics
+            'topics': self.topics,
+            'pos_frequencies': self.pos_frequencies
         }
 
     def _date_to_text(self):
diff --git a/pipeline.py b/pipeline.py
index 28cd745a..576bdcdb 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -1,3 +1,5 @@
+# pylint: disable=R0903
+
 """
 Pipeline for text processing implementation
 """
@@ -9,7 +11,6 @@
 
 from article import Article
 from constants import ASSETS_PATH
-from pos_frequency_pipeline import POSFrequencyPipeline
 
 
 class EmptyDirectoryError(Exception):
@@ -41,15 +42,7 @@ def __init__(self, original_word, normalized_form):
         self.pymorphy_tags = ''
 
     def __str__(self):
-        return self.normalized + '<' + self.mystem_tags + '>' + '(' + str(self.pymorphy_tags) + ')'
-
-    def placeholder_public_method(self):
-        """
-        In order to pass lint check,
-        class must contain at least
-        two public methods
-        """
-        pass
+        return f'{self.normalized}<{self.mystem_tags}>({str(self.pymorphy_tags)})'
 
 
 class CorpusManager:
@@ -66,9 +59,10 @@ def _scan_dataset(self):
         """
         path = Path(ASSETS_PATH)
         for file in path.iterdir():
-            if str(file).endswith('_raw.txt'):
-                index = str(file).split('_raw.txt')[0]
-                self._storage[index] = Article(url=None, article_id=index)
+            file_name = file.relative_to(path)
+            if str(file_name).endswith('_raw.txt'):
+                index = str(file_name).split('_raw.txt')[0]
+                self._storage[index] = Article(url=None, article_id=int(index))
 
     def get_articles(self):
         """
@@ -77,14 +71,6 @@ def get_articles(self):
         self._scan_dataset()
         return self._storage
 
-    def placeholder_public_method(self):
-        """
-        In order to pass lint check,
-        class must contain at least
-        two public methods
-        """
-        pass
-
 
 class TextProcessingPipeline:
     """
@@ -92,34 +78,22 @@ class TextProcessingPipeline:
     """
     def __init__(self, corpus_manager: CorpusManager):
         self.corpus = corpus_manager
+        self.current_raw_text = ''
 
     def run(self):
         """
         Runs pipeline process scenario
         """
-        print(f'there are {len(self.corpus.get_articles())} articles to process')
         for article in self.corpus.get_articles().values():
-            raw_text = article.get_raw_text()
-            tokens = self._process(raw_text)
+            self.current_raw_text = article.get_raw_text()
+            tokens = self._process()
             processed = ' '.join(map(str, tokens))
             article.save_processed(processed)
 
-    def placeholder_public_method(self):
-        """
-        In order to pass lint check,
-        class must contain at least
-        two public methods
-        """
-        pass
-
-    @staticmethod
-    def _process(text) -> List[type(MorphologicalToken)]:
-        """
-        Performs processing of each text
-        """
+    def _process(self) -> List[type(MorphologicalToken)]:
         mystem = Mystem()
         pymorphy = MorphAnalyzer()
-        words = mystem.analyze(text)
+        words = mystem.analyze(self.current_raw_text)
         tokens = []
         for word in words:
             orig = word['text'].strip()
@@ -131,6 +105,8 @@ def _process(text) -> List[type(MorphologicalToken)]:
                     tokens.append(token)
                 except IndexError:
                     token = MorphologicalToken(original_word=orig, normalized_form=orig)
+                    if not str(pymorphy.parse(orig)[0].tag) == 'UNKN':
+                        token.pymorphy_tags = pymorphy.parse(orig)[0].tag
                     tokens.append(token)
         return tokens
 
@@ -146,13 +122,14 @@ def validate_dataset(path_to_validate):
         raise NotADirectoryError
     if not list(path.iterdir()):
         raise EmptyDirectoryError
-    metas, raws = 0, 0
-    for file in path.iterdir():
-        if str(file).endswith("_raw.txt"):
-            raws += 1
-        if str(file).endswith("_meta.json"):
-            metas += 1
-    if not metas == raws:
+    files = [str(file.relative_to(path)) for file in path.iterdir()]
+    metas = list(filter(lambda x: x.endswith('_raw.txt'), files))
+    raws = list(filter(lambda x: x.endswith('_meta.json'), files))
+    if not len(metas) == len(raws):
+        raise InconsistentDatasetError
+    meta_indices = sorted(list(map(lambda x: int(x.split('_')[0]), metas)))
+    raw_indices = sorted(list(map(lambda x: int(x.split('_')[0]), raws)))
+    if not meta_indices == raw_indices or not meta_indices == [i + 1 for i in range(len(meta_indices))]:
         raise InconsistentDatasetError
 
 
@@ -163,9 +140,6 @@ def main():
     print('onto processing')
     pipeline = TextProcessingPipeline(corpus_manager=corpus_manager)
     pipeline.run()
-    print('onto analytics')
-    visualizer = POSFrequencyPipeline(corpus_manager)
-    visualizer.run()
 
 
 if __name__ == "__main__":
diff --git a/pos_frequency_pipeline.py b/pos_frequency_pipeline.py
index d292c605..3c226622 100644
--- a/pos_frequency_pipeline.py
+++ b/pos_frequency_pipeline.py
@@ -6,32 +6,48 @@
 from pathlib import Path
 
 from visualizer import visualize
+from pipeline import CorpusManager
 
 from constants import ASSETS_PATH
 
 
 class POSFrequencyPipeline:
-    def __init__(self, corpus_manager):
-        self.corpus = corpus_manager
+    def __init__(self, corpus: CorpusManager):
+        self.corpus = corpus
+        self.current_article = None
 
     def run(self):
-        frequencies = self._count_frequencies()
-        path = Path(ASSETS_PATH) / 'pos_frequencies.png'
-        visualize(frequencies, path)
+        articles = self.corpus.get_articles()
+        for article in articles.values():
+            self.current_article = article
+            frequencies = self._count_frequencies()
+            self._update_meta(frequencies)
+            path = Path(ASSETS_PATH) / f'{article.article_id}_image.png'
+            visualize(frequencies, path)
 
     def _count_frequencies(self):
-        articles = self.corpus.get_articles()
-        tags_found = []
-        for index, article in articles.items():
-            path = Path(ASSETS_PATH) / f'{index}_processed.txt'
-            with open(path, encoding='utf-8') as file:
-                contents = file.read()
-                tags_found.extend(re.findall(r"<([A-Z]+)[,=]?", contents))
+        path = Path(ASSETS_PATH) / f'{self.current_article.article_id}_processed.txt'
+        with open(path, encoding='utf-8') as file:
+            contents = file.read()
+        tags_found = re.findall(r"<([A-Z]+)[,=]?", contents)
         frequencies = {}
         for tag in tags_found:
             frequencies[tag] = tags_found.count(tag)
         return frequencies
 
+    def _update_meta(self, frequencies):
+        meta_path = Path(ASSETS_PATH) / f'{self.current_article.article_id}_meta.json'
+        article = self.current_article.from_meta_json(meta_path)
+        article.pos_frequencies = frequencies
+        article.text = article.get_raw_text()
+        article.save_raw()
+
+
+def main():
+    corpus_manager = CorpusManager(ASSETS_PATH)
+    visualizer = POSFrequencyPipeline(corpus_manager)
+    visualizer.run()
+
 
 if __name__ == "__main__":
-    pass
+    main()

From 7e859825febd7698dbcae3802103cdbefedef9fa Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Sun, 4 Apr 2021 14:44:28 +0300
Subject: [PATCH 40/50] fought linter

---
 article.py  | 1 +
 pipeline.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/article.py b/article.py
index e59d6d63..b62ad9dd 100644
--- a/article.py
+++ b/article.py
@@ -1,3 +1,4 @@
+# pylint: disable=R0902
 """
 Article implementation
 """
diff --git a/pipeline.py b/pipeline.py
index 576bdcdb..ef6ae50a 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -105,7 +105,7 @@ def _process(self) -> List[type(MorphologicalToken)]:
                     tokens.append(token)
                 except IndexError:
                     token = MorphologicalToken(original_word=orig, normalized_form=orig)
-                    if not str(pymorphy.parse(orig)[0].tag) == 'UNKN':
+                    if str(pymorphy.parse(orig)[0].tag) != 'UNKN':
                         token.pymorphy_tags = pymorphy.parse(orig)[0].tag
                     tokens.append(token)
         return tokens

From a83469eab66961f10ee134b9619f49e730bc1f27 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Sun, 4 Apr 2021 14:53:06 +0300
Subject: [PATCH 41/50] fixed dataset validation

---
 pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipeline.py b/pipeline.py
index ef6ae50a..466a33de 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -129,7 +129,7 @@ def validate_dataset(path_to_validate):
         raise InconsistentDatasetError
     meta_indices = sorted(list(map(lambda x: int(x.split('_')[0]), metas)))
     raw_indices = sorted(list(map(lambda x: int(x.split('_')[0]), raws)))
-    if not meta_indices == raw_indices or not meta_indices == [i + 1 for i in range(len(meta_indices))]:
+    if not meta_indices == raw_indices or not meta_indices == list(range(len(metas))):
         raise InconsistentDatasetError
 
 

From 27280a055861240812e1db565a922ee444881ecf Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Sun, 4 Apr 2021 14:58:18 +0300
Subject: [PATCH 42/50] adjusted ds validator

---
 pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipeline.py b/pipeline.py
index 466a33de..3ae0f01d 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -129,7 +129,7 @@ def validate_dataset(path_to_validate):
         raise InconsistentDatasetError
     meta_indices = sorted(list(map(lambda x: int(x.split('_')[0]), metas)))
     raw_indices = sorted(list(map(lambda x: int(x.split('_')[0]), raws)))
-    if not meta_indices == raw_indices or not meta_indices == list(range(len(metas))):
+    if not meta_indices == raw_indices:
         raise InconsistentDatasetError
 
 

From 4efc05d3e9608bcf9da39da34f661b52c8219b7f Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Sun, 4 Apr 2021 15:01:42 +0300
Subject: [PATCH 43/50] i am sorry for my terrible commit history

---
 pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipeline.py b/pipeline.py
index 3ae0f01d..50463986 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -129,7 +129,7 @@ def validate_dataset(path_to_validate):
         raise InconsistentDatasetError
     meta_indices = sorted(list(map(lambda x: int(x.split('_')[0]), metas)))
     raw_indices = sorted(list(map(lambda x: int(x.split('_')[0]), raws)))
-    if not meta_indices == raw_indices:
+    if not raw_indices == meta_indices:
         raise InconsistentDatasetError
 
 

From b45ee4022a531a2266295e5156113d3dd55acf08 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Sun, 4 Apr 2021 15:05:34 +0300
Subject: [PATCH 44/50] adjusted ds validator

---
 crawler_config.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crawler_config.json b/crawler_config.json
index d65a60dd..542d6847 100644
--- a/crawler_config.json
+++ b/crawler_config.json
@@ -7,5 +7,5 @@
                   "https://burunen.ru/news/politic/"
     ],
     "total_articles_to_find_and_parse": 10,
-    "max_number_articles_to_get_from_one_seed": 10
+    "max_number_articles_to_get_from_one_seed": 5
 }
\ No newline at end of file

From 5c3a245601797f21e805518abd7ffa65fc7b74b2 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Mon, 5 Apr 2021 15:59:14 +0300
Subject: [PATCH 45/50] fixed several drawbacks

---
 crawler_config.json       |  4 ++--
 pipeline.py               | 24 +++++++++++-------------
 pos_frequency_pipeline.py | 18 +++++++++++++++---
 scrapper.py               |  4 ++++
 4 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/crawler_config.json b/crawler_config.json
index 542d6847..36f2c523 100644
--- a/crawler_config.json
+++ b/crawler_config.json
@@ -6,6 +6,6 @@
                   "https://burunen.ru/news/incidents/",
                   "https://burunen.ru/news/politic/"
     ],
-    "total_articles_to_find_and_parse": 10,
-    "max_number_articles_to_get_from_one_seed": 5
+    "total_articles_to_find_and_parse": 2,
+    "max_number_articles_to_get_from_one_seed": 2
 }
\ No newline at end of file
diff --git a/pipeline.py b/pipeline.py
index 50463986..239ad3c8 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -96,17 +96,16 @@ def _process(self) -> List[type(MorphologicalToken)]:
         words = mystem.analyze(self.current_raw_text)
         tokens = []
         for word in words:
-            orig = word['text'].strip()
-            if orig.isalpha():
+            if word.get('analysis') and word.get('text'):
                 try:
-                    token = MorphologicalToken(original_word=orig, normalized_form=word['analysis'][0]['lex'])
+                    token = MorphologicalToken(original_word=word['text'], normalized_form=word['analysis'][0]['lex'])
                     token.mystem_tags = word['analysis'][0]['gr'].strip()
-                    token.pymorphy_tags = pymorphy.parse(orig)[0].tag
+                    token.pymorphy_tags = pymorphy.parse(word['text'])[0].tag
                     tokens.append(token)
-                except IndexError:
-                    token = MorphologicalToken(original_word=orig, normalized_form=orig)
-                    if str(pymorphy.parse(orig)[0].tag) != 'UNKN':
-                        token.pymorphy_tags = pymorphy.parse(orig)[0].tag
+                except (IndexError, KeyError):
+                    token = MorphologicalToken(original_word=word['text'], normalized_form=word['text'])
+                    if str(pymorphy.parse(word['text'])[0].tag) != 'UNKN':
+                        token.pymorphy_tags = pymorphy.parse(word['text'])[0].tag
                     tokens.append(token)
         return tokens
 
@@ -122,13 +121,12 @@ def validate_dataset(path_to_validate):
         raise NotADirectoryError
     if not list(path.iterdir()):
         raise EmptyDirectoryError
-    files = [str(file.relative_to(path)) for file in path.iterdir()]
-    metas = list(filter(lambda x: x.endswith('_raw.txt'), files))
-    raws = list(filter(lambda x: x.endswith('_meta.json'), files))
+    raws = list(path.glob('*_raw.txt'))
+    metas = list(path.glob('*_meta.json'))
     if not len(metas) == len(raws):
         raise InconsistentDatasetError
-    meta_indices = sorted(list(map(lambda x: int(x.split('_')[0]), metas)))
-    raw_indices = sorted(list(map(lambda x: int(x.split('_')[0]), raws)))
+    meta_indices = sorted(list(map(lambda x: int(x.name.split('_')[0]), metas)))
+    raw_indices = sorted(list(map(lambda x: int(x.name.split('_')[0]), raws)))
     if not raw_indices == meta_indices:
         raise InconsistentDatasetError
 
diff --git a/pos_frequency_pipeline.py b/pos_frequency_pipeline.py
index 3c226622..f1c2b6cf 100644
--- a/pos_frequency_pipeline.py
+++ b/pos_frequency_pipeline.py
@@ -11,6 +11,11 @@
 from constants import ASSETS_PATH
 
 
+class EmptyFileError(Exception):
+    """
+    Custom error
+    """
+
 class POSFrequencyPipeline:
     def __init__(self, corpus: CorpusManager):
         self.corpus = corpus
@@ -21,9 +26,12 @@ def run(self):
         for article in articles.values():
             self.current_article = article
             frequencies = self._count_frequencies()
-            self._update_meta(frequencies)
-            path = Path(ASSETS_PATH) / f'{article.article_id}_image.png'
-            visualize(frequencies, path)
+            if frequencies:
+                self._update_meta(frequencies)
+                path = Path(ASSETS_PATH) / f'{article.article_id}_image.png'
+                visualize(frequencies, path)
+            else:
+                raise EmptyFileError
 
     def _count_frequencies(self):
         path = Path(ASSETS_PATH) / f'{self.current_article.article_id}_processed.txt'
@@ -33,6 +41,10 @@ def _count_frequencies(self):
         frequencies = {}
         for tag in tags_found:
             frequencies[tag] = tags_found.count(tag)
+        if not frequencies:
+            print('THERE IS AM EMPTY FILE, CHECK ')
+            print(self.current_article.article_id)
+
         return frequencies
 
     def _update_meta(self, frequencies):
diff --git a/scrapper.py b/scrapper.py
index 8ac37d99..d9a3d5cb 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -6,6 +6,7 @@
 import json
 import os
 from random import randint
+import shutil
 from time import sleep as wait
 
 from bs4 import BeautifulSoup
@@ -232,6 +233,9 @@ def prepare_environment(base_path, backup_path_dir):
     """
     if not os.path.exists(base_path):
         os.makedirs(base_path)
+    else:
+        shutil.rmtree(os.path.dirname(base_path))
+        os.makedirs(base_path)
     if not os.path.exists(backup_path_dir):
         os.makedirs(backup_path_dir)
 

From f6c703445842a566ae49ff78f3541cd0e2be983d Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Mon, 5 Apr 2021 16:01:47 +0300
Subject: [PATCH 46/50] fixed several drawbacks

---
 crawler_config.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crawler_config.json b/crawler_config.json
index 36f2c523..0dd527c8 100644
--- a/crawler_config.json
+++ b/crawler_config.json
@@ -6,6 +6,6 @@
                   "https://burunen.ru/news/incidents/",
                   "https://burunen.ru/news/politic/"
     ],
-    "total_articles_to_find_and_parse": 2,
-    "max_number_articles_to_get_from_one_seed": 2
+    "total_articles_to_find_and_parse": 5,
+    "max_number_articles_to_get_from_one_seed": 5
 }
\ No newline at end of file

From b8399cc31effe4692c766f5e1a5e401227648e6e Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Mon, 5 Apr 2021 16:05:18 +0300
Subject: [PATCH 47/50] fixed several drawbacks

---
 crawler_config.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crawler_config.json b/crawler_config.json
index 0dd527c8..36f2c523 100644
--- a/crawler_config.json
+++ b/crawler_config.json
@@ -6,6 +6,6 @@
                   "https://burunen.ru/news/incidents/",
                   "https://burunen.ru/news/politic/"
     ],
-    "total_articles_to_find_and_parse": 5,
-    "max_number_articles_to_get_from_one_seed": 5
+    "total_articles_to_find_and_parse": 2,
+    "max_number_articles_to_get_from_one_seed": 2
 }
\ No newline at end of file

From b8dc298233335c3c29f28b550950bdd3fd478337 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Mon, 5 Apr 2021 16:17:23 +0300
Subject: [PATCH 48/50] oh well I noticed smt else

---
 pos_frequency_pipeline.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pos_frequency_pipeline.py b/pos_frequency_pipeline.py
index f1c2b6cf..6752454e 100644
--- a/pos_frequency_pipeline.py
+++ b/pos_frequency_pipeline.py
@@ -16,6 +16,7 @@ class EmptyFileError(Exception):
     Custom error
     """
 
+
 class POSFrequencyPipeline:
     def __init__(self, corpus: CorpusManager):
         self.corpus = corpus
@@ -41,10 +42,6 @@ def _count_frequencies(self):
         frequencies = {}
         for tag in tags_found:
             frequencies[tag] = tags_found.count(tag)
-        if not frequencies:
-            print('THERE IS AM EMPTY FILE, CHECK ')
-            print(self.current_article.article_id)
-
         return frequencies
 
     def _update_meta(self, frequencies):

From 96f7e8e3f707c06be35e16d0d46d1854aae755d2 Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Mon, 5 Apr 2021 16:58:33 +0300
Subject: [PATCH 49/50] turned get articles into authentic getter

---
 pipeline.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pipeline.py b/pipeline.py
index 239ad3c8..e3cf4755 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -51,24 +51,25 @@ class CorpusManager:
     """
     def __init__(self, path_to_raw_txt_data: str):
         self.path_to_raw = path_to_raw_txt_data
-        self._storage = {}
+        self._storage = self._scan_dataset()
 
     def _scan_dataset(self):
         """
         Register each dataset entry
         """
         path = Path(ASSETS_PATH)
+        arts = {}
         for file in path.iterdir():
             file_name = file.relative_to(path)
             if str(file_name).endswith('_raw.txt'):
                 index = str(file_name).split('_raw.txt')[0]
-                self._storage[index] = Article(url=None, article_id=int(index))
+                arts[index] = Article(url=None, article_id=int(index))
+        return arts
 
     def get_articles(self):
         """
         Returns storage params
         """
-        self._scan_dataset()
         return self._storage
 
 

From ff570e9d3b75d0d68f4c743751344ba8a3337e1f Mon Sep 17 00:00:00 2001
From: marina-kaz <isthismeowgoingfrommyheart@gmail.com>
Date: Mon, 5 Apr 2021 17:03:12 +0300
Subject: [PATCH 50/50] fixed lintering

---
 pipeline.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pipeline.py b/pipeline.py
index e3cf4755..ccf60eb0 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -53,7 +53,8 @@ def __init__(self, path_to_raw_txt_data: str):
         self.path_to_raw = path_to_raw_txt_data
         self._storage = self._scan_dataset()
 
-    def _scan_dataset(self):
+    @staticmethod
+    def _scan_dataset():
         """
         Register each dataset entry
         """