chore: initial commit
This commit is contained in:
192
core/search_with_pagination.py
Normal file
192
core/search_with_pagination.py
Normal file
@@ -0,0 +1,192 @@
|
||||
#!/usr/bin/env python3
|
||||
"""带翻页功能的完整搜索 - 支持自动点击翻页按钮"""
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, '.')
|
||||
from server import FunstatMCPServer
|
||||
import asyncio
|
||||
import re
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
async def search_all_pages(server, keyword, max_pages=20):
|
||||
"""
|
||||
搜索所有页面
|
||||
|
||||
Args:
|
||||
server: FunstatMCPServer实例
|
||||
keyword: 搜索关键词
|
||||
max_pages: 最大翻页数(防止无限循环)
|
||||
|
||||
Returns:
|
||||
list: 所有页的结果
|
||||
"""
|
||||
all_results = []
|
||||
current_page = 1
|
||||
|
||||
print(f"\n🔍 搜索关键词: {keyword}")
|
||||
|
||||
# 发送搜索命令
|
||||
await server.client.send_message(server.bot_entity, f'/search {keyword}')
|
||||
await asyncio.sleep(2)
|
||||
|
||||
while current_page <= max_pages:
|
||||
# 获取最新消息
|
||||
messages = await server.client.get_messages(server.bot_entity, limit=1)
|
||||
msg = messages[0]
|
||||
|
||||
# 提取数据
|
||||
text = msg.text
|
||||
ids = re.findall(r'`(\d+)`', text)
|
||||
usernames = re.findall(r'@(\w+)', text) + re.findall(r't\.me/(\w+)', text)
|
||||
|
||||
# 记录当前页结果
|
||||
page_count = len(ids) + len(usernames)
|
||||
print(f" 第 {current_page} 页: +{page_count} 条结果", end='')
|
||||
|
||||
for uid in ids:
|
||||
all_results.append({'type': 'id', 'value': uid, 'keyword': keyword, 'page': current_page})
|
||||
|
||||
for username in usernames:
|
||||
if username:
|
||||
all_results.append({'type': 'username', 'value': username, 'keyword': keyword, 'page': current_page})
|
||||
|
||||
# 检查是否有下一页按钮
|
||||
next_page_button_index = None
|
||||
if msg.reply_markup and hasattr(msg.reply_markup, 'rows'):
|
||||
button_index = 0
|
||||
for row in msg.reply_markup.rows:
|
||||
for button in row.buttons:
|
||||
# 寻找 "➡️ X" 格式的按钮
|
||||
if '➡️' in button.text:
|
||||
next_page_button_index = button_index
|
||||
next_page_button_text = button.text
|
||||
break
|
||||
button_index += 1
|
||||
if next_page_button_index is not None:
|
||||
break
|
||||
|
||||
if next_page_button_index is not None:
|
||||
print(f" → 发现翻页按钮: {next_page_button_text}")
|
||||
# 点击下一页
|
||||
try:
|
||||
await msg.click(next_page_button_index)
|
||||
await asyncio.sleep(2) # 等待页面加载
|
||||
current_page += 1
|
||||
except Exception as e:
|
||||
print(f" → 点击失败: {e}")
|
||||
break
|
||||
else:
|
||||
print(" → 没有更多页面")
|
||||
break
|
||||
|
||||
# 防止过快请求
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
print(f" ✅ 完成! 共翻了 {current_page} 页")
|
||||
return all_results
|
||||
|
||||
async def main():
|
||||
server = FunstatMCPServer()
|
||||
await server.initialize()
|
||||
|
||||
results = []
|
||||
seen = set()
|
||||
|
||||
keywords = [
|
||||
'翻译', 'translation', 'translate', 'translator',
|
||||
'字幕组', 'subtitle', 'fansub'
|
||||
]
|
||||
|
||||
print(f"🚀 开始带翻页的完整搜索")
|
||||
print(f"📋 关键词数量: {len(keywords)}")
|
||||
print(f"📄 每个关键词自动翻页至所有结果")
|
||||
print("=" * 80)
|
||||
|
||||
for i, kw in enumerate(keywords, 1):
|
||||
print(f"\n[{i:2d}/{len(keywords)}] 关键词: {kw:20s}")
|
||||
|
||||
try:
|
||||
# 搜索所有页
|
||||
page_results = await search_all_pages(server, kw, max_pages=10)
|
||||
|
||||
# 去重
|
||||
new_count = 0
|
||||
for item in page_results:
|
||||
if item['type'] == 'id':
|
||||
key = f"ID:{item['value']}"
|
||||
else:
|
||||
key = f"@{item['value']}"
|
||||
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
results.append(item)
|
||||
new_count += 1
|
||||
|
||||
print(f" 📊 新增独特记录: {new_count} 条 (总计: {len(results)})")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ 错误: {e}")
|
||||
|
||||
# 稍作延迟
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# 保存文件
|
||||
txt_file = '/Users/lucas/chat--1003255561049/translation_users_paginated.txt'
|
||||
json_file = '/Users/lucas/chat--1003255561049/translation_users_paginated.json'
|
||||
|
||||
with open(txt_file, 'w', encoding='utf-8') as f:
|
||||
f.write("=" * 80 + "\n")
|
||||
f.write("翻译相关用户/群组完整列表 (支持翻页)\n")
|
||||
f.write("=" * 80 + "\n")
|
||||
f.write(f"总数: {len(results)} 条\n")
|
||||
f.write(f"搜索时间: {datetime.now()}\n")
|
||||
f.write(f"数据来源: funstat BOT (@openaiw_bot)\n")
|
||||
f.write(f"搜索方式: 多关键词 + 自动翻页\n")
|
||||
f.write("=" * 80 + "\n\n")
|
||||
|
||||
for i, item in enumerate(results, 1):
|
||||
if item['type'] == 'id':
|
||||
f.write(f"{i:4d}. ID: {item['value']:15s} (来源: {item['keyword']}, 第{item['page']}页)\n")
|
||||
else:
|
||||
f.write(f"{i:4d}. @{item['value']:30s} (来源: {item['keyword']}, 第{item['page']}页)\n")
|
||||
|
||||
with open(json_file, 'w', encoding='utf-8') as f:
|
||||
json.dump({
|
||||
'total': len(results),
|
||||
'timestamp': str(datetime.now()),
|
||||
'method': 'multi-keyword + pagination',
|
||||
'results': results
|
||||
}, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print("\n")
|
||||
print("=" * 80)
|
||||
print(f"✅ 搜索完成!共找到 {len(results)} 条独特记录")
|
||||
print("=" * 80)
|
||||
print(f"📄 文本文件: {txt_file}")
|
||||
print(f"📄 JSON文件: {json_file}")
|
||||
print("")
|
||||
|
||||
# 显示统计
|
||||
print(f"🎯 最终统计:")
|
||||
print(f" 总记录数: {len(results)}")
|
||||
print(f" ID数量: {sum(1 for r in results if r['type'] == 'id')}")
|
||||
print(f" 用户名数量: {sum(1 for r in results if r['type'] == 'username')}")
|
||||
|
||||
# 统计每个关键词的页数
|
||||
print(f"\n📊 每个关键词的翻页统计:")
|
||||
keyword_pages = {}
|
||||
for item in results:
|
||||
kw = item['keyword']
|
||||
page = item['page']
|
||||
if kw not in keyword_pages:
|
||||
keyword_pages[kw] = set()
|
||||
keyword_pages[kw].add(page)
|
||||
|
||||
for kw, pages in keyword_pages.items():
|
||||
print(f" {kw:20s}: {len(pages)} 页")
|
||||
|
||||
await server.client.disconnect()
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user