SberHistoryToExcel/load_data.py
2026-05-06 15:58:53 +03:00

360 lines
14 KiB
Python

from __future__ import annotations
import argparse
import os
import zipfile
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, Iterable
from xml.sax.saxutils import escape
import requests
SBER_OPERATIONS_URL = "https://web-node1.online.sberbank.ru/uoh-bh/v1/operations/list"
DATE_FORMAT = "%d.%m.%YT%H:%M:%S"
@dataclass
class SberMonthHistoryExporter:
"""Downloads Sber operations for one month and writes them into one XLSX file."""
cookie: str = field(default_factory=lambda: os.getenv("Cookie", ""))
url: str = SBER_OPERATIONS_URL
page_size: int = 50
timeout: int = 30
max_pages: int = 500
def __post_init__(self) -> None:
if not self.cookie:
raise ValueError("Set Sber auth cookie in the Cookie environment variable.")
if self.page_size <= 0:
raise ValueError("page_size must be positive.")
self.session = requests.Session()
def fetch_month(self, year: int, month: int) -> list[dict[str, Any]]:
"""Return all operations whose date belongs to the requested month."""
start, end = self._month_bounds(year, month)
operations: list[dict[str, Any]] = []
seen_ids: set[str] = set()
for page_number in range(self.max_pages):
offset = page_number * self.page_size
page = self._fetch_page(offset=offset, month_start=start)
if not page:
break
parsed_dates = []
for operation in page:
operation_date = self._parse_operation_date(operation)
if operation_date is None:
continue
parsed_dates.append(operation_date)
if start <= operation_date < end:
operation_id = self._operation_identity(operation)
if operation_id not in seen_ids:
operations.append(operation)
seen_ids.add(operation_id)
if len(page) < self.page_size:
break
# Sber returns operations from newest to oldest, so going before month start
# means that the following pages cannot contain the requested month.
if parsed_dates and min(parsed_dates) < start:
break
else:
raise RuntimeError(f"Reached max_pages={self.max_pages}; export stopped to avoid an endless loop.")
operations.sort(key=lambda item: self._parse_operation_date(item) or datetime.min)
return operations
def save_month(self, year: int, month: int, output_path: str | Path | None = None) -> Path:
"""Fetch a month and save it to one XLSX file."""
operations = self.fetch_month(year, month)
path = Path(output_path or f"sber_operations_{year}_{month:02d}.xlsx")
rows = [self._operation_to_row(operation) for operation in operations]
self._write_xlsx(path, rows)
return path
def _fetch_page(self, offset: int, month_start: datetime) -> list[dict[str, Any]]:
payload = {
"paginationOffset": offset,
"paginationSize": self.page_size,
"showHidden": False,
"showNotTransactionBonuses": True,
"showOpenBanking": True,
"from": month_start.strftime(DATE_FORMAT),
}
response = self.session.post(
self.url,
headers=self._headers(),
json=payload,
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
if not data.get("success", False):
raise RuntimeError(f"Sber API returned unsuccessful response: {data}")
operations = data.get("body", {}).get("operations", [])
if not isinstance(operations, list):
raise RuntimeError(f"Unexpected operations payload: {operations!r}")
return operations
def _headers(self) -> dict[str, str]:
return {
"accept": "application/json, text/plain, */*",
"accept-language": "ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7",
"cache-control": "no-cache",
"content-type": "application/json;charset=UTF-8",
"origin": "https://online.sberbank.ru",
"pragma": "no-cache",
"priority": "u=1, i",
"referer": "https://online.sberbank.ru/",
"sec-ch-ua": '"Google Chrome";v="147", "Not.A/Brand";v="8", "Chromium";v="147"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"macOS"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/147.0.0.0 Safari/537.36"
),
"x-requested-with": "XMLHttpRequest",
"Cookie": self.cookie,
}
@staticmethod
def _month_bounds(year: int, month: int) -> tuple[datetime, datetime]:
if month < 1 or month > 12:
raise ValueError("month must be between 1 and 12.")
start = datetime(year=year, month=month, day=1)
if month == 12:
end = datetime(year=year + 1, month=1, day=1)
else:
end = datetime(year=year, month=month + 1, day=1)
return start, end
@staticmethod
def _parse_operation_date(operation: dict[str, Any]) -> datetime | None:
raw_date = operation.get("date")
if not isinstance(raw_date, str):
return None
try:
return datetime.strptime(raw_date.split(".000")[0], DATE_FORMAT)
except ValueError:
return None
@staticmethod
def _operation_identity(operation: dict[str, Any]) -> str:
for key in ("uohId", "externalId", "authorizationDocId"):
value = operation.get(key)
if value:
return str(value)
return repr(operation)
@staticmethod
def _operation_to_row(operation: dict[str, Any]) -> dict[str, Any]:
operation_amount = operation.get("operationAmount") or {}
national_amount = operation.get("nationalAmount") or {}
billing_amount = operation.get("billingAmount") or {}
state = operation.get("state") or {}
from_resource = operation.get("fromResource") or {}
bonuses = operation.get("bonuses") or []
bonus_income = 0
if isinstance(bonuses, list):
bonus_income = sum(
bonus.get("income", 0)
for bonus in bonuses
if isinstance(bonus, dict) and isinstance(bonus.get("income", 0), (int, float))
)
return {
"Дата": operation.get("date", ""),
"Получатель": operation.get("correspondent", ""),
"Описание": operation.get("description", ""),
"Сумма операции": operation_amount.get("amount", ""),
"Валюта операции": operation_amount.get("currencyCode", ""),
"Сумма в RUB": national_amount.get("amount", ""),
"Валюта": national_amount.get("currencyCode", ""),
"Бонусы Спасибо": bonus_income,
"Счет": from_resource.get("displayedValue", ""),
"Остаток после операции": billing_amount.get("amount", ""),
"Статус": state.get("category", ""),
"Тип": operation.get("type", ""),
"Код категории": operation.get("classificationCode", ""),
"ID": operation.get("uohId") or operation.get("externalId", ""),
}
@staticmethod
def _write_xlsx(path: Path, rows: Iterable[dict[str, Any]]) -> None:
rows = list(rows)
headers = list(rows[0].keys()) if rows else [
"Дата",
"Получатель",
"Описание",
"Сумма операции",
"Валюта операции",
"Сумма в RUB",
"Валюта",
"Бонусы Спасибо",
"Счет",
"Остаток после операции",
"Статус",
"Тип",
"Код категории",
"ID",
]
path.parent.mkdir(parents=True, exist_ok=True)
worksheet_xml = _build_worksheet_xml(headers, rows)
with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as archive:
archive.writestr("[Content_Types].xml", _content_types_xml())
archive.writestr("_rels/.rels", _root_rels_xml())
archive.writestr("xl/workbook.xml", _workbook_xml())
archive.writestr("xl/_rels/workbook.xml.rels", _workbook_rels_xml())
archive.writestr("xl/styles.xml", _styles_xml())
archive.writestr("xl/worksheets/sheet1.xml", worksheet_xml)
def _build_worksheet_xml(headers: list[str], rows: list[dict[str, Any]]) -> str:
sheet_rows = [_build_row_xml(1, headers)]
for index, row in enumerate(rows, start=2):
sheet_rows.append(_build_row_xml(index, [row.get(header, "") for header in headers]))
last_column = _column_name(len(headers))
last_row = max(len(rows) + 1, 1)
return (
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
'<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" '
'xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">'
f'<dimension ref="A1:{last_column}{last_row}"/>'
'<sheetViews><sheetView workbookViewId="0"/></sheetViews>'
'<sheetFormatPr defaultRowHeight="15"/>'
'<sheetData>'
f'{"".join(sheet_rows)}'
'</sheetData>'
'</worksheet>'
)
def _build_row_xml(row_index: int, values: list[Any]) -> str:
cells = []
for column_index, value in enumerate(values, start=1):
cell_reference = f"{_column_name(column_index)}{row_index}"
cells.append(_build_cell_xml(cell_reference, value))
return f'<row r="{row_index}">{"".join(cells)}</row>'
def _build_cell_xml(cell_reference: str, value: Any) -> str:
if value is None:
value = ""
if isinstance(value, (int, float)) and not isinstance(value, bool):
return f'<c r="{cell_reference}"><v>{value}</v></c>'
return f'<c r="{cell_reference}" t="inlineStr"><is><t>{escape(str(value))}</t></is></c>'
def _column_name(index: int) -> str:
name = ""
while index:
index, remainder = divmod(index - 1, 26)
name = chr(65 + remainder) + name
return name
def _content_types_xml() -> str:
return (
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
'<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
'<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
'<Default Extension="xml" ContentType="application/xml"/>'
'<Override PartName="/xl/workbook.xml" '
'ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>'
'<Override PartName="/xl/worksheets/sheet1.xml" '
'ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>'
'<Override PartName="/xl/styles.xml" '
'ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml"/>'
'</Types>'
)
def _root_rels_xml() -> str:
return (
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
'<Relationship Id="rId1" '
'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" '
'Target="xl/workbook.xml"/>'
'</Relationships>'
)
def _workbook_xml() -> str:
return (
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
'<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" '
'xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">'
'<sheets><sheet name="Operations" sheetId="1" r:id="rId1"/></sheets>'
'</workbook>'
)
def _workbook_rels_xml() -> str:
return (
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
'<Relationship Id="rId1" '
'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" '
'Target="worksheets/sheet1.xml"/>'
'<Relationship Id="rId2" '
'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" '
'Target="styles.xml"/>'
'</Relationships>'
)
def _styles_xml() -> str:
return (
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
'<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">'
'<fonts count="1"><font><sz val="11"/><name val="Calibri"/></font></fonts>'
'<fills count="1"><fill><patternFill patternType="none"/></fill></fills>'
'<borders count="1"><border><left/><right/><top/><bottom/><diagonal/></border></borders>'
'<cellStyleXfs count="1"><xf numFmtId="0" fontId="0" fillId="0" borderId="0"/></cellStyleXfs>'
'<cellXfs count="1"><xf numFmtId="0" fontId="0" fillId="0" borderId="0" xfId="0"/></cellXfs>'
'</styleSheet>'
)
def _parse_args() -> argparse.Namespace:
now = datetime.now()
parser = argparse.ArgumentParser(description="Export Sber operations for one month to a single XLSX file.")
parser.add_argument("--year", type=int, default=now.year, help="Year to export, for example 2026.")
parser.add_argument("--month", type=int, default=now.month, help="Month to export, from 1 to 12.")
parser.add_argument("--output", type=Path, default=None, help="Output XLSX path.")
return parser.parse_args()
if __name__ == "__main__":
args = _parse_args()
exporter = SberMonthHistoryExporter()
output = exporter.save_month(args.year, args.month, args.output)
print(f"Saved {args.year}-{args.month:02d} operations to {output}")