#!/usr/bin/env python3 """带翻页功能的完整搜索 - 支持自动点击翻页按钮""" import sys sys.path.insert(0, '.') from server import FunstatMCPServer import asyncio import re import json from datetime import datetime async def search_all_pages(server, keyword, max_pages=20): """ 搜索所有页面 Args: server: FunstatMCPServer实例 keyword: 搜索关键词 max_pages: 最大翻页数(防止无限循环) Returns: list: 所有页的结果 """ all_results = [] current_page = 1 print(f"\n🔍 搜索关键词: {keyword}") # 发送搜索命令 await server.client.send_message(server.bot_entity, f'/search {keyword}') await asyncio.sleep(2) while current_page <= max_pages: # 获取最新消息 messages = await server.client.get_messages(server.bot_entity, limit=1) msg = messages[0] # 提取数据 text = msg.text ids = re.findall(r'`(\d+)`', text) usernames = re.findall(r'@(\w+)', text) + re.findall(r't\.me/(\w+)', text) # 记录当前页结果 page_count = len(ids) + len(usernames) print(f" 第 {current_page} 页: +{page_count} 条结果", end='') for uid in ids: all_results.append({'type': 'id', 'value': uid, 'keyword': keyword, 'page': current_page}) for username in usernames: if username: all_results.append({'type': 'username', 'value': username, 'keyword': keyword, 'page': current_page}) # 检查是否有下一页按钮 next_page_button_index = None if msg.reply_markup and hasattr(msg.reply_markup, 'rows'): button_index = 0 for row in msg.reply_markup.rows: for button in row.buttons: # 寻找 "➡️ X" 格式的按钮 if '➡️' in button.text: next_page_button_index = button_index next_page_button_text = button.text break button_index += 1 if next_page_button_index is not None: break if next_page_button_index is not None: print(f" → 发现翻页按钮: {next_page_button_text}") # 点击下一页 try: await msg.click(next_page_button_index) await asyncio.sleep(2) # 等待页面加载 current_page += 1 except Exception as e: print(f" → 点击失败: {e}") break else: print(" → 没有更多页面") break # 防止过快请求 await asyncio.sleep(0.5) print(f" ✅ 完成! 共翻了 {current_page} 页") return all_results async def main(): server = FunstatMCPServer() await server.initialize() results = [] seen = set() keywords = [ '翻译', 'translation', 'translate', 'translator', '字幕组', 'subtitle', 'fansub' ] print(f"🚀 开始带翻页的完整搜索") print(f"📋 关键词数量: {len(keywords)}") print(f"📄 每个关键词自动翻页至所有结果") print("=" * 80) for i, kw in enumerate(keywords, 1): print(f"\n[{i:2d}/{len(keywords)}] 关键词: {kw:20s}") try: # 搜索所有页 page_results = await search_all_pages(server, kw, max_pages=10) # 去重 new_count = 0 for item in page_results: if item['type'] == 'id': key = f"ID:{item['value']}" else: key = f"@{item['value']}" if key not in seen: seen.add(key) results.append(item) new_count += 1 print(f" 📊 新增独特记录: {new_count} 条 (总计: {len(results)})") except Exception as e: print(f" ❌ 错误: {e}") # 稍作延迟 await asyncio.sleep(1) # 保存文件 txt_file = '/Users/lucas/chat--1003255561049/translation_users_paginated.txt' json_file = '/Users/lucas/chat--1003255561049/translation_users_paginated.json' with open(txt_file, 'w', encoding='utf-8') as f: f.write("=" * 80 + "\n") f.write("翻译相关用户/群组完整列表 (支持翻页)\n") f.write("=" * 80 + "\n") f.write(f"总数: {len(results)} 条\n") f.write(f"搜索时间: {datetime.now()}\n") f.write(f"数据来源: funstat BOT (@openaiw_bot)\n") f.write(f"搜索方式: 多关键词 + 自动翻页\n") f.write("=" * 80 + "\n\n") for i, item in enumerate(results, 1): if item['type'] == 'id': f.write(f"{i:4d}. ID: {item['value']:15s} (来源: {item['keyword']}, 第{item['page']}页)\n") else: f.write(f"{i:4d}. @{item['value']:30s} (来源: {item['keyword']}, 第{item['page']}页)\n") with open(json_file, 'w', encoding='utf-8') as f: json.dump({ 'total': len(results), 'timestamp': str(datetime.now()), 'method': 'multi-keyword + pagination', 'results': results }, f, ensure_ascii=False, indent=2) print("\n") print("=" * 80) print(f"✅ 搜索完成!共找到 {len(results)} 条独特记录") print("=" * 80) print(f"📄 文本文件: {txt_file}") print(f"📄 JSON文件: {json_file}") print("") # 显示统计 print(f"🎯 最终统计:") print(f" 总记录数: {len(results)}") print(f" ID数量: {sum(1 for r in results if r['type'] == 'id')}") print(f" 用户名数量: {sum(1 for r in results if r['type'] == 'username')}") # 统计每个关键词的页数 print(f"\n📊 每个关键词的翻页统计:") keyword_pages = {} for item in results: kw = item['keyword'] page = item['page'] if kw not in keyword_pages: keyword_pages[kw] = set() keyword_pages[kw].add(page) for kw, pages in keyword_pages.items(): print(f" {kw:20s}: {len(pages)} 页") await server.client.disconnect() if __name__ == '__main__': asyncio.run(main())