Add new CLI command for file name extraction and hash generation

- Implemented `get_filenames` command in `batch_extractor.py` to extract file names from a specified directory and generate a dated hash for each file.
- Updated README.md with instructions for using the new command.
- Enhanced PDF generation in `build_pdf.py` by modifying file naming conventions to include article and size.
- Added example PowerShell script for batch processing tasks.
- Introduced new example Excel file for user reference.
This commit is contained in:
2026-02-28 12:59:56 +03:00
parent 6850f3672e
commit 6a2c0d0d35
6 changed files with 104 additions and 15 deletions

View File

@@ -6,6 +6,23 @@ from read_image import read_datamatrix_zxing, extract_barcodes_from_pdf
from pathlib import Path
import click
import render_eps
import hashlib
import random
from datetime import datetime
def get_dated_hash(text: str) -> str:
"""
Генерирует хэш с префиксом даты в формате ГГ_ММ_ДД.
Пример вывода: 26_02_21_d6f7a6b2c1
"""
# 1. Получаем текущую дату в формате ГГ_ММ_ДД (например, 26_02_21)
date_prefix = datetime.now().strftime("%y_%m_%d")
text = f"{text}_{random.randint(0,100000000)}"
# 2. Генерируем хэш-часть (SHA-256, 10 символов)
hash_part = hashlib.sha256(text.encode()).hexdigest()[:10]
# 3. Соединяем через нижнее подчеркивание
return f"{date_prefix}_{hash_part}"
def extract_eps_from_zip(zip_path: str) -> list:
"""
@@ -124,5 +141,15 @@ def from_pdf(input_pdf: Path, output_xlsx: Path):
except Exception as e:
click.secho(f"Ошибка при обработке PDF: {e}", fg="red")
@cli.command(help="Подготавливает excel с именами файлов в директории")
@click.argument('input_dir', type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path))
@click.argument('output_xlsx', type=click.Path(dir_okay=False, writable=True, path_type=Path))
def get_filenames(input_dir: Path, output_xlsx: Path):
# Используем .glob('*') или .iterdir()
filenames = [f.name for f in input_dir.iterdir() if f.is_file()]
filenames_with_hashes = [(get_dated_hash(f), f) for f in filenames]
save_to_excel(filenames_with_hashes, str(output_xlsx))
if __name__ == "__main__":
cli()