#!/usr/bin/env python3
"""
Конвертер Excel в JSON с агрегацией дубликатов по id_accrual
"""
import json
import sys
from pathlib import Path
from datetime import datetime
from openpyxl import load_workbook
def convert_excel_to_json(excel_path, json_path=None):
"""
Конвертирует Excel в JSON, агрегируя дубликаты по id_accrual
"""
if json_path is None:
json_path = Path(excel_path).stem + ".json"
print(f"📂 Парсинг Excel: {Path(excel_path).name}")
wb = load_workbook(excel_path)
ws = wb.active
# Заголовки из 2й строки
headers = [cell.value for cell in ws[2]]
print(f"📋 Найдено заголовков: {len(headers)}")
# Маппинг колонн
column_mapping = {
"ID начисления": "id_accrual",
"Дата начисления": "accrual_date",
"Группа сервиса": "service_group",
"Тип начисления": "accrual_type",
"Артикул": "article",
"SKU": "sku",
"Название товара": "product_name",
"Количество": "quantity",
"Цена продавца": "seller_price",
"Дата принятия заказа в обработку или оказания услуги": "order_received_date",
"Платформа продажи": "sales_platform",
"Схема работы": "work_scheme",
"Вознаграждение Ozon, %": "ozon_fee_pct",
"Индекс локализации, %": "localization_index_pct",
"Среднее время доставки, часы": "avg_delivery_hours",
"Сумма итого, руб.": "total_amount_rub",
}
# Индекс столбцов
column_indices = {}
for idx, header in enumerate(headers, start=1):
if header in column_mapping:
column_indices[column_mapping[header]] = idx
elif header:
for col_name, field_name in column_mapping.items():
if col_name.lower() in str(header).lower():
column_indices[field_name] = idx
break
print(f"✓ Найдено {len(column_indices)} из {len(column_mapping)} полей")
# Парсим с агрегацией
accruals_dict = {}
total_rows = 0
for row_num, row in enumerate(ws.iter_rows(min_row=3, values_only=False), start=3):
accrual = {}
for field_name, col_idx in column_indices.items():
cell = row[col_idx - 1]
value = cell.value
# Конвертируем типы
if field_name.endswith("_date") and value:
if isinstance(value, datetime):
accrual[field_name] = value.isoformat()
else:
try:
parsed_date = datetime.fromisoformat(str(value))
accrual[field_name] = parsed_date.isoformat()
except:
accrual[field_name] = None
elif field_name in ["quantity", "ozon_fee_pct", "localization_index_pct", "avg_delivery_hours", "seller_price"]:
if value is not None:
try:
accrual[field_name] = float(value)
except:
accrual[field_name] = None
else:
accrual[field_name] = None
elif field_name == "total_amount_rub":
if value is not None:
try:
accrual[field_name] = float(value)
except:
accrual[field_name] = 0.0
else:
accrual[field_name] = 0.0
else:
accrual[field_name] = str(value) if value else None
# Группируем по id_accrual
if accrual.get("id_accrual") and accrual.get("accrual_date"):
total_rows += 1
id_accrual = accrual["id_accrual"]
if id_accrual not in accruals_dict:
accruals_dict[id_accrual] = accrual
else:
# Суммируем total_amount_rub
accruals_dict[id_accrual]["total_amount_rub"] += accrual.get("total_amount_rub", 0.0)
accruals = list(accruals_dict.values())
print(f"✓ Обработано {total_rows} строк")
print(f"✓ Агрегировано до {len(accruals)} уникальных id_accrual")
# Сохраняем JSON
with open(json_path, 'w', encoding='utf-8') as f:
json.dump({
"metadata": {
"total_rows": total_rows,
"unique_records": len(accruals),
"generated_at": datetime.now().isoformat(),
},
"data": accruals
}, f, ensure_ascii=False, indent=2)
print(f"\n✅ JSON сохранён: {json_path}")
print(f" Размер файла: {Path(json_path).stat().st_size / 1024 / 1024:.2f} MB")
return json_path
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Использование: python3 excel_to_json.py <excel_file> [json_file]")
sys.exit(1)
excel_file = sys.argv[1]
json_file = sys.argv[2] if len(sys.argv) > 2 else None
if not Path(excel_file).exists():
print(f"❌ Файл не найден: {excel_file}")
sys.exit(1)
convert_excel_to_json(excel_file, json_file)