From b261576a74bd7149071f0d76bf71390090fc4455 Mon Sep 17 00:00:00 2001 From: John Xina Date: Sat, 15 Jul 2023 20:01:35 +0800 Subject: [PATCH] Optimize builtin proxy, Remove extra not native API, Fix the port issue --- README.md | 3 +- app.py | 47 +++++++++----- extra.py | 36 +---------- main.py | 161 ----------------------------------------------- proxify.py | 90 ++++++++++++++++++++++++++ requirements.txt | 6 +- shared.py | 10 ++- 7 files changed, 133 insertions(+), 220 deletions(-) delete mode 100644 main.py create mode 100644 proxify.py diff --git a/README.md b/README.md index 762e693..b6f69c3 100644 --- a/README.md +++ b/README.md @@ -25,8 +25,7 @@ $ . venv/bin/activate ## 配置方法 -打开 `shared.py` 修改 `app.config['SERVER_NAME']` 中冒号后的数字来改变端口号。 -因为 twisted 的原因,你需要将代理指向那个端口号 -1 的端口上。 +打开 `shared.py` 修改 `port` 后的数字来改变端口号。 其中的 `should_fetch_comments` 用于控制是否获取楼中楼。如果浏览性能不佳可以考虑 关闭楼中楼获取。 diff --git a/app.py b/app.py index 728899a..840cfc4 100644 --- a/app.py +++ b/app.py @@ -1,5 +1,6 @@ import asyncio import aiotieba +import uvicorn from aioflask import render_template, request, escape from urllib.parse import quote_plus @@ -8,6 +9,8 @@ from datetime import datetime from aiotieba.api.get_posts._classdef import * from aiotieba.api._classdef.contents import * +from proxify import AsgiproxifyHandler + from shared import * from extra import * @@ -26,6 +29,10 @@ async def cache_name_from_id(c, i): r = await c.get_user_info(i, require=aiotieba.enums.ReqUInfo.USER_NAME) cache.set(i, r) +# Normalize unicode characters to ASCII form. +def normalize_utf8(s): + return s.encode('unicode_escape').decode('ascii').replace('\\', '') + ###################################################################### # Convert a timestamp to its simpliest readable date format. @@ -78,7 +85,7 @@ async def _jinja2_filter_translate(frags, reply_id=0): elif isinstance(frag, FragEmoji_p): htmlfmt = append_with_leading_clean(htmlfmt, f'[{ frag.desc }]') + f'src="/static/emoticons/{ normalize_utf8(frag.desc) }.png">') if i+1 < len(frags) and isinstance(frags[i+1], FragImage_p): htmlfmt += '
' elif isinstance(frag, FragLink): @@ -136,21 +143,17 @@ async def forum_view(): sort = int(request.args.get('sort') or 0) async with aiotieba.Client() as tieba: - if only_use_native_api: - forum_info, threads = await asyncio.gather(tieba.get_forum_detail(fname), - tieba.get_threads(fname, pn=pn, sort=sort)) - if hasattr(forum_info, 'slogan'): - forum_info = { 'avatar': extract_image_name(forum_info.origin_avatar), - 'topic': forum_info.post_num, 'thread': forum_info.post_num, - 'member': forum_info.member_num, 'desc': forum_info.slogan, - 'name': forum_info.fname } - else: - forum_info = { 'avatar': 'a6efce1b9d16fdfa6291460ab98f8c5495ee7b51.jpg', - 'topic': forum_info.post_num, 'thread': forum_info.post_num, - 'member': forum_info.member_num, 'desc': '贴吧描述暂不可用', 'name': forum_info.fname } + forum_info, threads = await asyncio.gather(tieba.get_forum_detail(fname), + tieba.get_threads(fname, pn=pn, sort=sort)) + if hasattr(forum_info, 'slogan'): + forum_info = { 'avatar': extract_image_name(forum_info.origin_avatar), + 'topic': forum_info.post_num, 'thread': forum_info.post_num, + 'member': forum_info.member_num, 'desc': forum_info.slogan, + 'name': forum_info.fname } else: - forum_info, threads = await asyncio.gather(awaitify(find_tieba_info)(fname), - tieba.get_threads(fname, pn=pn, sort=sort)) + forum_info = { 'avatar': 'a6efce1b9d16fdfa6291460ab98f8c5495ee7b51.jpg', + 'topic': forum_info.post_num, 'thread': forum_info.post_num, + 'member': forum_info.member_num, 'desc': '贴吧描述暂不可用', 'name': forum_info.fname } if threads.page.current_page > threads.page.total_page or pn < 1: return await render_template('error.html', msg = \ @@ -195,5 +198,17 @@ async def runtime_error_view(e): async def general_error_view(e): return await render_template('error.html', msg=e) +###################################################################### + +@proxified.register('/proxy/avatar/') +class AvatarProxyHandler(AsgiproxifyHandler): + def make_request_url(self): + return 'http://tb.himg.baidu.com/sys/portraith/item/' + self.scope['path'][14:] + +@proxified.register('/proxy/pic/') +class PictureProxyHandler(AsgiproxifyHandler): + def make_request_url(self): + return 'http://imgsa.baidu.com/forum/pic/item/' + self.scope['path'][11:] + if __name__ == '__main__': - app.run(debug=True) + uvicorn.run(proxified, host=host, port=port) diff --git a/extra.py b/extra.py index 2565ada..ef5fe68 100644 --- a/extra.py +++ b/extra.py @@ -12,38 +12,4 @@ def extract_image_name(url): try: return match.group(1) + '.jpg' except: - return '404.jpg' - -@cache.memoize(timeout=60) -def find_tieba_info(tname): - """Get the tiebat avatar for the forum name. - - :param tname: the name of the target forum. - :returns: the internal ID of the corresponding avatar. - - """ - info = { 'name': tname } - - res = requests.get('https://tieba.baidu.com/f', - params={'kw': tname}, - allow_redirects=False) - - # Baidu will bring us to the search page, so we ignore it. - if res.status_code == 302: - raise ValueError('您搜索的贴吧不存在') - - soup = bs4.BeautifulSoup(res.text, 'html.parser') - - elems = soup.select('#forum-card-head') - info['avatar'] = extract_image_name(elems[0]['src']) - - footer = soup.select('.th_footer_l')[0] - stat_elems = footer.findAll('span', {'class': 'red_text'}, recursive=False) - stats = list(map(lambda x: int(x.text), stat_elems)) - - info |= { 'topic': stats[0], 'thread': stats[1], 'member': stats[2] } - - slogan = soup.select('.card_slogan')[0] - info['desc'] = slogan.text - - return info + return 'a6efce1b9d16fdfa6291460ab98f8c5495ee7b51.jpg' diff --git a/main.py b/main.py deleted file mode 100644 index aaa0056..0000000 --- a/main.py +++ /dev/null @@ -1,161 +0,0 @@ -import multiprocessing - -from app import app - -from urllib.parse import quote as urlquote, urlparse, urlunparse -from twisted.web.http import _QUEUED_SENTINEL, HTTPChannel, HTTPClient, Request -from twisted.web.resource import Resource -from twisted.web import proxy, server -from twisted.web.static import File -from twisted.internet.protocol import ClientFactory -from twisted.internet import reactor, utils - -plain_cookies = {} - -################################################################################ -# Modified Dynamic Proxy (from twisted) -################################################################################ - -class ProxyClient(HTTPClient): - _finished = False - - def __init__(self, command, rest, version, headers, data, father): - self.father = father - self.command = command - self.rest = rest - if b"proxy-connection" in headers: - del headers[b"proxy-connection"] - headers[b"connection"] = b"close" - headers.pop(b"keep-alive", None) - self.headers = headers - self.data = data - - def connectionMade(self): - self.sendCommand(self.command, self.rest) - for header, value in self.headers.items(): - self.sendHeader(header, value) - self.endHeaders() - self.transport.write(self.data) - - def handleStatus(self, version, code, message): - self.father.setResponseCode(int(code), message) - - def handleHeader(self, key, value): - if key.lower() in [b"server", b"date", b"content-type"]: - self.father.responseHeaders.setRawHeaders(key, [value]) - else: - self.father.responseHeaders.addRawHeader(key, value) - - def handleResponsePart(self, buffer): - self.father.write(buffer) - - def handleResponseEnd(self): - if not self._finished: - self._finished = True - self.father.notifyFinish().addErrback(lambda x: None) - self.transport.loseConnection() - -class ProxyClientFactory(ClientFactory): - protocol = ProxyClient - - def __init__(self, command, rest, version, headers, data, father): - self.father = father - self.command = command - self.rest = rest - self.headers = headers - self.data = data - self.version = version - - def buildProtocol(self, addr): - return self.protocol( - self.command, self.rest, self.version, self.headers, self.data, self.father - ) - - def clientConnectionFailed(self, connector, reason): - self.father.setResponseCode(501, b"Gateway error") - self.father.responseHeaders.addRawHeader(b"Content-Type", b"text/html") - self.father.write(b"

Could not connect

") - self.father.finish() - -class ReverseProxyResource(Resource): - def __init__(self, path, reactor=reactor): - Resource.__init__(self) - self.path = path - self.reactor = reactor - - def getChild(self, path, request): - return ReverseProxyResource( - self.path + b'/' + urlquote(path, safe=b'').encode("utf-8"), - self.reactor - ) - - def render_proxy_avatar(self, request, req_path): - portrait = req_path[14:] - - request.requestHeaders.setRawHeaders(b'host', [b'tb.himg.baidu.com']) - request.content.seek(0, 0) - - clientFactory = ProxyClientFactory( - b'GET', ('http://tb.himg.baidu.com/sys/portraith/item/' + portrait).encode('utf-8'), - request.clientproto, - request.getAllHeaders(), - request.content.read(), - request, - ) - - self.reactor.connectTCP('tb.himg.baidu.com', 80, clientFactory) - return server.NOT_DONE_YET - - def render_proxy_pic(self, request, req_path): - pic = req_path[11:] - - request.requestHeaders.setRawHeaders(b'host', [b'imgsa.baidu.com']) - request.content.seek(0, 0) - - clientFactory = ProxyClientFactory( - b'GET', ('http://imgsa.baidu.com/forum/pic/item/' + pic).encode('utf-8'), - request.clientproto, - request.getAllHeaders(), - request.content.read(), - request, - ) - - self.reactor.connectTCP('imgsa.baidu.com', 80, clientFactory) - return server.NOT_DONE_YET - - def render(self, request): - # Justify the request path. - req_path = self.path.decode('utf-8') - if req_path.startswith('/proxy/avatar/'): - return self.render_proxy_avatar(request, req_path) - elif req_path.startswith('/proxy/pic/'): - return self.render_proxy_pic(request, req_path) - else: - request.setResponseCode(418, b'I\'m a teapot') - return - -################################################################################ - -# To start this function for testing: python -c 'import main; main.twisted_start()' -def twisted_start(): - flask_port = int(app.config['SERVER_NAME'].split(':')[1]) - flask_res = proxy.ReverseProxyResource('127.0.0.1', flask_port, b'') - flask_res.putChild(b'proxy', ReverseProxyResource(b'/proxy')) - flask_res.putChild(b'static', File('static')) - - print(f' *** SERVER IS RUNNING ON PORT {flask_port-1} ***') - - site = server.Site(flask_res) - reactor.listenTCP(flask_port-1, site) - reactor.run() - -# To start this function for testing: python -c 'import main; main.flask_start()' -def flask_start(): - app.run() - -# If we're executed directly, also start the flask daemon. -if __name__ == '__main__': - flask_task = multiprocessing.Process(target=flask_start) - flask_task.daemon = True # Exit the child if the parent was killed :-( - flask_task.start() - twisted_start() diff --git a/proxify.py b/proxify.py new file mode 100644 index 0000000..17f47f3 --- /dev/null +++ b/proxify.py @@ -0,0 +1,90 @@ +import asyncio +import aiohttp + +class AsgiproxifyHandler(): + def __init__(self, scope): + self.scope = scope + + def make_request_url(self): + return 'http://example.org/' + + def make_request_cookies(self): + return {} + + def make_request_headers(self): + req_headers = {k.decode(): v.decode() for k, v in self.scope['headers']} + req_headers.pop('host', None) + return req_headers + + def make_response_headers(self, upstream_headers): + headers = dict(upstream_headers) + headers.pop('Server', None) + headers.pop('Date', None) + + resp_headers = [(k, v) for k, v in headers.items()] + return resp_headers + + def make_request(self, session): + return session.request('GET', self.make_request_url(), + cookies=self.make_request_cookies(), + headers=self.make_request_headers(),) + +class Asgiproxify(): + app = None + reg = {} + + def __init__(self, app=None): + self.to(app) + + def to(self, app): + self.app = app + + def install(self, leading_path, handler): + self.reg[leading_path] = handler + + def register(self, leading_path): + def decorator(c): + self.install(leading_path, c) + return decorator + + async def handle_proxy(self, scope, receive, send, handler): + handler_i = handler(scope) + request = await receive() + + async def reverse_proxy_task(): + async with aiohttp.ClientSession(auto_decompress=False) as session: + async with handler_i.make_request(session) as resp: + await send({ + 'type': 'http.response.start', + 'status': resp.status, + 'headers': handler_i.make_response_headers(resp.headers), + }) + async for chunk, end_of_resp in resp.content.iter_chunks(): + await send({ + 'type': 'http.response.body', + 'body': chunk, + 'more_body': True, + }) + await send({ 'type': 'http.response.body' }) + + task = asyncio.create_task(reverse_proxy_task()) + while True: + ev = await receive() + if ev['type'] == 'http.disconnect': + task.cancel() + return + + async def __call__(self, scope, receive, send): + if scope['type'] != 'http': + return await self.app(scope, receive, send) + + handler = None + + for leading_path, proxy_handler in self.reg.items(): + if scope['path'].startswith(leading_path): + handler = proxy_handler + + if not handler: + return await self.app(scope, receive, send) + else: + return await self.handle_proxy(scope, receive, send, handler) diff --git a/requirements.txt b/requirements.txt index 84ebd8c..f11108f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,5 @@ aioflask==0.4.0 flask==2.1.3 -aiotieba==3.5.0 +aiotieb +aiohttp Flask-Caching -beautifulsoup4 -requests -twisted diff --git a/shared.py b/shared.py index f954354..6b5bbac 100644 --- a/shared.py +++ b/shared.py @@ -3,6 +3,8 @@ from flask_caching import Cache from functools import wraps +from proxify import Asgiproxify + def awaitify(sync_func): """Wrap a synchronous callable to allow ``await``'ing it""" @wraps(sync_func) @@ -11,13 +13,17 @@ def awaitify(sync_func): return async_func app = Flask(__name__) +proxified = Asgiproxify(app) ###################################################################### -app.config['SERVER_NAME'] = '127.0.0.1:8886' +host = 'localhost' +port = 8885 should_fetch_comments = True -only_use_native_api = True + +app.config['DEBUG'] = False +app.config['TESTING'] = False ######################################################################