UpdateDB/sync.py

import logging
import time
from datetime import datetime, timezone

from db import Database
from wechat import WeChatClient, WeChatError

logger = logging.getLogger(__name__)


class SyncService:
    def __init__(self, wechat: WeChatClient, db: Database):
        self.wechat = wechat
        self.db = db

    def run_sync(self):
        logger.info("=== Sync started ===")
        for material_type in ["news", "video"]:
            sync_key = f"wechat_{material_type}_sync"
            try:
                self.db.update_sync_state(sync_key, {"status": "syncing", "count": 0})
                if material_type == "news":
                    count = self._sync_published_articles()
                else:
                    count = self._sync_materials(material_type)
                self.db.update_sync_state(sync_key, {
                    "status": "idle",
                    "count": count,
                    "last_sync": datetime.now(tz=timezone.utc).isoformat(),
                })
                logger.info("Sync %s completed, %d items processed", material_type, count)
            except Exception as e:
                logger.error("Sync %s failed: %s", material_type, e, exc_info=True)
                self.db.update_sync_state(sync_key, {
                    "status": "error",
                    "error": str(e),
                    "last_sync": datetime.now(tz=timezone.utc).isoformat(),
                })
        logger.info("=== Sync finished ===")

    # --- Published articles (freepublish API) ---

    def _sync_published_articles(self) -> int:
        """Sync articles using draft API (personal subscription account)."""
        offset = 0
        processed = 0
        all_media_ids = set()

        while True:
            batch = self.wechat.batch_get_drafts(offset=offset, count=20, no_content=0)
            items = batch.get("item", [])
            total = batch.get("total_count", 0)
            item_count = batch.get("item_count", 0)

            if not items:
                break

            for item in items:
                try:
                    media_id = item.get("media_id", "")
                    all_media_ids.add(media_id)
                    self._sync_draft_item(item)
                    processed += 1
                except Exception as e:
                    logger.error("Error processing draft %s: %s",
                                 item.get("media_id", "?"), e)

            offset += item_count
            if offset >= total:
                break
            time.sleep(0.5)

        # Delete articles no longer in drafts
        self.db.delete_missing_articles(all_media_ids)
        return processed

    def _sync_draft_item(self, item: dict):
        media_id = item["media_id"]
        update_time = item.get("update_time", 0)
        news_items = item.get("content", {}).get("news_item", [])

        for idx, news in enumerate(news_items):
            wechat_article_id = f"{media_id}_{idx}"

            article = {
                "wechat_article_id": wechat_article_id,
                "title": news.get("title", "")[:200],
                "content": news.get("content", ""),
                "cover_url": news.get("thumb_url", "")[:500] if news.get("thumb_url") else None,
                "author": news.get("author", "")[:100] if news.get("author") else None,
                "publish_date": datetime.fromtimestamp(update_time, tz=timezone.utc).date() if update_time else None,
                "source_url": news.get("url", "")[:1000] if news.get("url") else None,
            }
            self.db.upsert_article(article)

    # --- Materials (video) ---

    def _sync_materials(self, material_type: str) -> int:
        counts = self.wechat.get_material_count()
        total_key = f"{material_type}_count"
        total = counts.get(total_key, 0)
        logger.info("Total %s materials: %d", material_type, total)

        if total == 0:
            return 0

        offset = 0
        processed = 0
        all_media_ids = set()
        while offset < total:
            batch = self.wechat.batch_get_materials(material_type, offset, count=20)
            items = batch.get("item", [])
            item_count = batch.get("item_count", 0)

            for item in items:
                try:
                    all_media_ids.add(item["media_id"])
                    if material_type == "video":
                        self._sync_video_item(item)
                    processed += 1
                except Exception as e:
                    logger.error("Error processing %s item %s: %s",
                                 material_type, item.get("media_id", "?"), e)

            offset += item_count
            if offset < total:
                time.sleep(0.5)

        # Delete items that exist in DB but no longer on WeChat
        if material_type == "video":
            self.db.delete_missing_videos(all_media_ids)

        return processed

    def _sync_video_item(self, item: dict):
        media_id = item["media_id"]
        update_time = item.get("update_time", 0)
        wechat_time = datetime.fromtimestamp(update_time, tz=timezone.utc) if update_time else None

        video = {
            "media_id": media_id,
            "name": item.get("name", ""),
            "url": item.get("url", ""),
            "title": None,
            "description": None,
            "down_url": None,
            "wechat_update_time": wechat_time,
        }

        if self.db.should_fetch_video_detail(media_id, update_time):
            try:
                detail = self.wechat.get_material(media_id)
                video["title"] = detail.get("title")
                video["description"] = detail.get("description")
                video["down_url"] = detail.get("down_url")
            except WeChatError as e:
                logger.warning("Failed to fetch video detail %s: %s", media_id, e)

        self.db.upsert_video(video)