1.购买服务器阿里云:服务器购买地址https://t.aliyun.com/U/55RK8C若失效,可用地址
阿里云:
服务器购买地址
https://t.aliyun.com/U/55RK8C若失效,可用地址
https://www.aliyun.com/daily-act/ecs/activity_selection?source=5176.29345612&userCode=49hts92d腾讯云:
https://curl.qcloud.com/wJpWmSfU若失效,可用地址
https://cloud.tencent.com/act/cps/redirect?redirect=2446&cps_key=ad201ee2ef3b771157f72ee5464b1fea&from=console华为云
https://activity.huaweicloud.com/cps.html?fromacct=64b5cf7cc11b4840bb4ed2ea0b2f4468&utm_source=V1g3MDY4NTY=&utm_medium=cps&utm_campaign=2019052.部署教程
3.代码如下
# -*- coding: utf-8 -*-import osimport reimport timeimport jsonfrom urllib.parse import urljoin, urlparseimport requestsfrom bs4 import BeautifulSoupBASE_URLS = ["https://www.china.com/","https://passport.china.com/",]KEYWORDS = ["签到", "打卡", "任务", "积分", "红包", "领取", "福利", "每日", "checkin", "sign", "reward", "mission"]USER_AGENT = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ""(KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36")class ChinaComCheckinProber:def __init__(self):self.session = requests.Session()self.session.headers.update({"User-Agent": USER_AGENT,"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",})cookie = os.getenv("CHINA_COOKIE", "").strip()if cookie:# 直接把整段 cookie 塞进请求头(最通用的方式)self.session.headers["Cookie"] = cookieself.candidates = [] # [(title, url)]self.checked = set()def _safe_get(self, url: str, timeout=15) -> requests.Response | None:try:resp = self.session.get(url, timeout=timeout, allow_redirects=True)return respexcept Exception as e:print(f"❌ GET 失败: {url}\n 错误: {e}")return Nonedef _extract_links(self, html: str, base_url: str):soup = BeautifulSoup(html, "lxml")for a in soup.find_all("a", href=True):href = a.get("href", "").strip()text = (a.get_text() or "").strip()# 规范化 URLfull_url = urljoin(base_url, href)parsed = urlparse(full_url)if parsed.scheme not in ("http", "https"):continue# 只保留 china.com / passport.china.com 域if not (parsed.netloc.endswith("china.com")):continue# 关键词命中:标题或链接hay = f"{text} {full_url}".lower()if any(k.lower() in hay for k in KEYWORDS):self.candidates.append((text or "(no-title)", full_url))def discover_candidates(self):print(" 第一步:抓取基础页面")for url in BASE_URLS:resp = self._safe_get(url)if not resp:continueprint(f" 已抓取: {resp.url} (status={resp.status_code})")self._extract_links(resp.text, resp.url)# 去重uniq = {}for title, link in self.candidates:uniq[link] = titleself.candidates = [(t, u) for u, t in uniq.items()]# 排序:让更像"签到"的靠前def score(item):title, link = items = 0for kw in ["签到", "打卡", "checkin", "sign"]:if kw.lower() in (title + " " + link).lower():s += 10for kw in ["积分", "红包", "任务", "福利", "领取", "reward", "mission"]:if kw.lower() in (title + " " + link).lower():s += 3return -sself.candidates.sort(key=score)print(f"📌 共发现入口: {len(self.candidates)} 个")for i, (title, link) in enumerate(self.candidates[:30], 1):print(f" {i:02d}. {title} -> {link}")def try_checkin_like_endpoints(self, max_try=10):if not self.candidates:print(" 发现入口。")returnprint("\n 第二步:尝试对最像签到/打卡的入口进行试探 ...")tries = 0for title, link in self.candidates:if tries >= max_try:breakif link in self.checked:continueself.checked.add(link)tries += 1print(f"\n[{tries}/{max_try}] 试探: {title} -> {link}")resp = self._safe_get(link)if not resp:continuect = resp.headers.get("Content-Type", "")print(f" status={resp.status_code}, content-type={ct}, final_url={resp.url}")# 如果直接返回 JSON,打印一部分if "application/json" in ct.lower():try:data = resp.json()print(" 返回JSON(截断预览):")print(json.dumps(data, ensure_ascii=False, indent=2)[:1200])continueexcept Exception:print(" JSON 解析失败,打印文本预览:")print(resp.text[:800])continue# HTML 页面:做关键字判断text_preview = re.sub(r"\s+", " ", resp.text)[:1200]hit = [kw for kw in ["签到", "打卡", "已签到", "领取", "积分", "红包"] if kw in resp.text]if hit:print(f" 页面命中关键词: {hit}")else:print(" 页面未签到(可能需要登录/JS渲染/APP内页)")# 额外:粗略扫描可能的接口路径(仅用于提示)api_like = set()for m in re.findall(r"https?://[^\s\"']+", resp.text):if any(x in m.lower() for x in ["checkin", "signin", "signIn", "mission", "task", "reward", "point", "score"]):api_like.add(m)if api_like:print("发现接口/活动链接(节选):")for x in list(api_like)[:10]:print(f" - {x}")print("\n 结束。")def run(self):print("=== china.com 签到入口脚本 ===")self.discover_candidates()self.try_checkin_like_endpoints(max_try=10)if not self.candidates:print("\n结论:公开页面未签到")else:print("\n结论:已签到")if __name__ == "__main__":ChinaComCheckinProber().run()
自动探测入口:从
china.com首页 +passport.china.com入口页抓取 HTML,提取站内链接,并按"签到/打卡/积分/任务/红包"等关键词筛出候选链接。自动试探签到可能性:对最像签到/打卡的前 N 个候选链接逐个访问:
若返回 JSON:直接解析并打印关键内容(通常接口类会返回 code/msg/data)
若返回 HTML:判断是否包含"签到/已签到/领取/积分"等关键字,并扫描页面中出现的疑似接口 URL
输出可落地信息:把"候选入口 + 试探结果 + 疑似接口链接"打印出来,用于你进一步确认最终真实签到接口。
主要方法
discover_candidates()
负责抓取基础页面并调用_extract_links()提取候选入口,最后做去重与排序,把最像"签到"的入口排在前面。_extract_links(html, base_url)
负责解析 HTML,提取<a href=...>链接,统一转成绝对 URL,并根据关键词过滤,只保留*.china.com域名下、且疑似与签到/积分相关的链接。try_checkin_like_endpoints(max_try=10)
负责对候选链接做自动试探:请求候选页面,判断是 JSON 还是 HTML,并尝试从 HTML 中扫描可能的checkin/sign/task/reward/score等接口或活动链接,帮助你锁定签到路径。_safe_get(url)
统一封装 GET 请求,带超时、异常捕获,避免脚本中途崩掉。
注意:
本文部分变量已做脱敏处理,仅用于测试和学习研究,禁止用于商业用途,不能保证其合法性,准确性,完整性和有效性,请根据情况自行判断。技术层面需要提供帮助,可以通过打赏的方式进行探讨。
没有评论:
发表评论