From c3f0eaf509b98871ae3289f9ef765a4a5c94e9f2 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 6 May 2026 15:58:53 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9A=D0=BE=D0=BC=D0=BC=D0=B8=D1=82.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 +- README.md | 97 +++++++++++++ load_data.py | 359 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 35 +++++ 4 files changed, 493 insertions(+), 1 deletion(-) create mode 100644 README.md create mode 100644 load_data.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index 723ef36..3d031da 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -.idea \ No newline at end of file +.idea +data \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..d41ff21 --- /dev/null +++ b/README.md @@ -0,0 +1,97 @@ +# Sber History To Excel + +Скрипт выгружает историю операций СберБанк Онлайн за выбранный месяц и сохраняет ее в один Excel-файл `.xlsx`. + +## Что делает + +- Получает операции через API истории операций Сбера. +- Забирает данные постранично. +- Оставляет только операции за указанный месяц. +- Убирает дубли по идентификатору операции. +- Сохраняет результат в один `.xlsx` файл. + +## Требования + +- Python 3.10+ +- Библиотека `requests` +- Актуальный Cookie авторизованной сессии СберБанк Онлайн + +Установка зависимости: + +```bash +python3 -m pip install requests +``` + +## Настройка Cookie + +Скрипт берет Cookie из переменной окружения `Cookie`. + +```bash +export Cookie='твой-cookie-из-браузера' +``` + +Не сохраняй Cookie в коде или в репозитории: это чувствительные данные доступа к аккаунту. + +## Запуск + +Пример выгрузки операций за май 2026 года: + +```bash +python3 load_data.py --year 2026 --month 5 --output sber_may_2026.xlsx +``` + +Если `--output` не указать, файл будет создан с именем: + +```text +sber_operations_2026_05.xlsx +``` + +## Использование из кода + +```python +from load_data import SberMonthHistoryExporter + +exporter = SberMonthHistoryExporter() +output_path = exporter.save_month(2026, 5, "sber_may_2026.xlsx") + +print(output_path) +``` + +Можно отдельно получить операции без сохранения: + +```python +from load_data import SberMonthHistoryExporter + +exporter = SberMonthHistoryExporter() +operations = exporter.fetch_month(2026, 5) + +print(len(operations)) +``` + +## Формат результата + +В Excel попадают основные поля операции: + +- дата +- получатель +- описание +- сумма операции +- валюта +- сумма в RUB +- бонусы Спасибо +- счет +- остаток после операции +- статус +- тип операции +- код категории +- ID операции + +## Возможные ошибки + +Если Cookie не задан: + +```text +ValueError: Set Sber auth cookie in the Cookie environment variable. +``` + +Если Cookie устарел, Сбер может вернуть ошибку авторизации. В этом случае нужно снова войти в СберБанк Онлайн в браузере и обновить значение переменной `Cookie`. diff --git a/load_data.py b/load_data.py new file mode 100644 index 0000000..2804217 --- /dev/null +++ b/load_data.py @@ -0,0 +1,359 @@ +from __future__ import annotations + +import argparse +import os +import zipfile +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Any, Iterable +from xml.sax.saxutils import escape + +import requests + + +SBER_OPERATIONS_URL = "https://web-node1.online.sberbank.ru/uoh-bh/v1/operations/list" +DATE_FORMAT = "%d.%m.%YT%H:%M:%S" + + +@dataclass +class SberMonthHistoryExporter: + """Downloads Sber operations for one month and writes them into one XLSX file.""" + + cookie: str = field(default_factory=lambda: os.getenv("Cookie", "")) + url: str = SBER_OPERATIONS_URL + page_size: int = 50 + timeout: int = 30 + max_pages: int = 500 + + def __post_init__(self) -> None: + if not self.cookie: + raise ValueError("Set Sber auth cookie in the Cookie environment variable.") + if self.page_size <= 0: + raise ValueError("page_size must be positive.") + + self.session = requests.Session() + + def fetch_month(self, year: int, month: int) -> list[dict[str, Any]]: + """Return all operations whose date belongs to the requested month.""" + start, end = self._month_bounds(year, month) + operations: list[dict[str, Any]] = [] + seen_ids: set[str] = set() + + for page_number in range(self.max_pages): + offset = page_number * self.page_size + page = self._fetch_page(offset=offset, month_start=start) + if not page: + break + + parsed_dates = [] + for operation in page: + operation_date = self._parse_operation_date(operation) + if operation_date is None: + continue + + parsed_dates.append(operation_date) + if start <= operation_date < end: + operation_id = self._operation_identity(operation) + if operation_id not in seen_ids: + operations.append(operation) + seen_ids.add(operation_id) + + if len(page) < self.page_size: + break + + # Sber returns operations from newest to oldest, so going before month start + # means that the following pages cannot contain the requested month. + if parsed_dates and min(parsed_dates) < start: + break + else: + raise RuntimeError(f"Reached max_pages={self.max_pages}; export stopped to avoid an endless loop.") + + operations.sort(key=lambda item: self._parse_operation_date(item) or datetime.min) + return operations + + def save_month(self, year: int, month: int, output_path: str | Path | None = None) -> Path: + """Fetch a month and save it to one XLSX file.""" + operations = self.fetch_month(year, month) + path = Path(output_path or f"sber_operations_{year}_{month:02d}.xlsx") + rows = [self._operation_to_row(operation) for operation in operations] + self._write_xlsx(path, rows) + return path + + def _fetch_page(self, offset: int, month_start: datetime) -> list[dict[str, Any]]: + payload = { + "paginationOffset": offset, + "paginationSize": self.page_size, + "showHidden": False, + "showNotTransactionBonuses": True, + "showOpenBanking": True, + "from": month_start.strftime(DATE_FORMAT), + } + + response = self.session.post( + self.url, + headers=self._headers(), + json=payload, + timeout=self.timeout, + ) + response.raise_for_status() + + data = response.json() + if not data.get("success", False): + raise RuntimeError(f"Sber API returned unsuccessful response: {data}") + + operations = data.get("body", {}).get("operations", []) + if not isinstance(operations, list): + raise RuntimeError(f"Unexpected operations payload: {operations!r}") + + return operations + + def _headers(self) -> dict[str, str]: + return { + "accept": "application/json, text/plain, */*", + "accept-language": "ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7", + "cache-control": "no-cache", + "content-type": "application/json;charset=UTF-8", + "origin": "https://online.sberbank.ru", + "pragma": "no-cache", + "priority": "u=1, i", + "referer": "https://online.sberbank.ru/", + "sec-ch-ua": '"Google Chrome";v="147", "Not.A/Brand";v="8", "Chromium";v="147"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": '"macOS"', + "sec-fetch-dest": "empty", + "sec-fetch-mode": "cors", + "sec-fetch-site": "same-site", + "user-agent": ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/147.0.0.0 Safari/537.36" + ), + "x-requested-with": "XMLHttpRequest", + "Cookie": self.cookie, + } + + @staticmethod + def _month_bounds(year: int, month: int) -> tuple[datetime, datetime]: + if month < 1 or month > 12: + raise ValueError("month must be between 1 and 12.") + + start = datetime(year=year, month=month, day=1) + if month == 12: + end = datetime(year=year + 1, month=1, day=1) + else: + end = datetime(year=year, month=month + 1, day=1) + + return start, end + + @staticmethod + def _parse_operation_date(operation: dict[str, Any]) -> datetime | None: + raw_date = operation.get("date") + if not isinstance(raw_date, str): + return None + + try: + return datetime.strptime(raw_date.split(".000")[0], DATE_FORMAT) + except ValueError: + return None + + @staticmethod + def _operation_identity(operation: dict[str, Any]) -> str: + for key in ("uohId", "externalId", "authorizationDocId"): + value = operation.get(key) + if value: + return str(value) + return repr(operation) + + @staticmethod + def _operation_to_row(operation: dict[str, Any]) -> dict[str, Any]: + operation_amount = operation.get("operationAmount") or {} + national_amount = operation.get("nationalAmount") or {} + billing_amount = operation.get("billingAmount") or {} + state = operation.get("state") or {} + from_resource = operation.get("fromResource") or {} + bonuses = operation.get("bonuses") or [] + + bonus_income = 0 + if isinstance(bonuses, list): + bonus_income = sum( + bonus.get("income", 0) + for bonus in bonuses + if isinstance(bonus, dict) and isinstance(bonus.get("income", 0), (int, float)) + ) + + return { + "Дата": operation.get("date", ""), + "Получатель": operation.get("correspondent", ""), + "Описание": operation.get("description", ""), + "Сумма операции": operation_amount.get("amount", ""), + "Валюта операции": operation_amount.get("currencyCode", ""), + "Сумма в RUB": national_amount.get("amount", ""), + "Валюта": national_amount.get("currencyCode", ""), + "Бонусы Спасибо": bonus_income, + "Счет": from_resource.get("displayedValue", ""), + "Остаток после операции": billing_amount.get("amount", ""), + "Статус": state.get("category", ""), + "Тип": operation.get("type", ""), + "Код категории": operation.get("classificationCode", ""), + "ID": operation.get("uohId") or operation.get("externalId", ""), + } + + @staticmethod + def _write_xlsx(path: Path, rows: Iterable[dict[str, Any]]) -> None: + rows = list(rows) + headers = list(rows[0].keys()) if rows else [ + "Дата", + "Получатель", + "Описание", + "Сумма операции", + "Валюта операции", + "Сумма в RUB", + "Валюта", + "Бонусы Спасибо", + "Счет", + "Остаток после операции", + "Статус", + "Тип", + "Код категории", + "ID", + ] + + path.parent.mkdir(parents=True, exist_ok=True) + worksheet_xml = _build_worksheet_xml(headers, rows) + + with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as archive: + archive.writestr("[Content_Types].xml", _content_types_xml()) + archive.writestr("_rels/.rels", _root_rels_xml()) + archive.writestr("xl/workbook.xml", _workbook_xml()) + archive.writestr("xl/_rels/workbook.xml.rels", _workbook_rels_xml()) + archive.writestr("xl/styles.xml", _styles_xml()) + archive.writestr("xl/worksheets/sheet1.xml", worksheet_xml) + + +def _build_worksheet_xml(headers: list[str], rows: list[dict[str, Any]]) -> str: + sheet_rows = [_build_row_xml(1, headers)] + for index, row in enumerate(rows, start=2): + sheet_rows.append(_build_row_xml(index, [row.get(header, "") for header in headers])) + + last_column = _column_name(len(headers)) + last_row = max(len(rows) + 1, 1) + + return ( + '' + '' + f'' + '' + '' + '' + f'{"".join(sheet_rows)}' + '' + '' + ) + + +def _build_row_xml(row_index: int, values: list[Any]) -> str: + cells = [] + for column_index, value in enumerate(values, start=1): + cell_reference = f"{_column_name(column_index)}{row_index}" + cells.append(_build_cell_xml(cell_reference, value)) + return f'{"".join(cells)}' + + +def _build_cell_xml(cell_reference: str, value: Any) -> str: + if value is None: + value = "" + + if isinstance(value, (int, float)) and not isinstance(value, bool): + return f'{value}' + + return f'{escape(str(value))}' + + +def _column_name(index: int) -> str: + name = "" + while index: + index, remainder = divmod(index - 1, 26) + name = chr(65 + remainder) + name + return name + + +def _content_types_xml() -> str: + return ( + '' + '' + '' + '' + '' + '' + '' + '' + ) + + +def _root_rels_xml() -> str: + return ( + '' + '' + '' + '' + ) + + +def _workbook_xml() -> str: + return ( + '' + '' + '' + '' + ) + + +def _workbook_rels_xml() -> str: + return ( + '' + '' + '' + '' + '' + ) + + +def _styles_xml() -> str: + return ( + '' + '' + '' + '' + '' + '' + '' + '' + ) + + +def _parse_args() -> argparse.Namespace: + now = datetime.now() + parser = argparse.ArgumentParser(description="Export Sber operations for one month to a single XLSX file.") + parser.add_argument("--year", type=int, default=now.year, help="Year to export, for example 2026.") + parser.add_argument("--month", type=int, default=now.month, help="Month to export, from 1 to 12.") + parser.add_argument("--output", type=Path, default=None, help="Output XLSX path.") + return parser.parse_args() + + +if __name__ == "__main__": + args = _parse_args() + exporter = SberMonthHistoryExporter() + output = exporter.save_month(args.year, args.month, args.output) + print(f"Saved {args.year}-{args.month:02d} operations to {output}") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d533c63 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,35 @@ +appnope==0.1.4 +asttokens==3.0.1 +certifi==2026.4.22 +charset-normalizer==3.4.7 +comm==0.2.3 +debugpy==1.8.20 +decorator==5.2.1 +executing==2.2.1 +idna==3.13 +ipykernel==7.2.0 +ipython==9.13.0 +ipython_pygments_lexers==1.1.1 +jedi==0.20.0 +jupyter_client==8.8.0 +jupyter_core==5.9.1 +matplotlib-inline==0.2.1 +nest-asyncio==1.6.0 +packaging==26.2 +parso==0.8.7 +pexpect==4.9.0 +platformdirs==4.9.6 +prompt_toolkit==3.0.52 +psutil==7.2.2 +ptyprocess==0.7.0 +pure_eval==0.2.3 +Pygments==2.20.0 +python-dateutil==2.9.0.post0 +pyzmq==27.1.0 +requests==2.33.1 +six==1.17.0 +stack-data==0.6.3 +tornado==6.5.5 +traitlets==5.15.0 +urllib3==2.6.3 +wcwidth==0.7.0