Optimize builtin proxy, Remove extra not native API, Fix the port issue

This commit is contained in:
John Xina 2023-07-15 20:01:35 +08:00
parent c73d6408f6
commit b261576a74
7 changed files with 133 additions and 220 deletions

View File

@ -25,8 +25,7 @@ $ . venv/bin/activate
## 配置方法 ## 配置方法
打开 `shared.py` 修改 `app.config['SERVER_NAME']` 中冒号后的数字来改变端口号。 打开 `shared.py` 修改 `port` 后的数字来改变端口号。
因为 twisted 的原因,你需要将代理指向那个端口号 -1 的端口上。
其中的 `should_fetch_comments` 用于控制是否获取楼中楼。如果浏览性能不佳可以考虑 其中的 `should_fetch_comments` 用于控制是否获取楼中楼。如果浏览性能不佳可以考虑
关闭楼中楼获取。 关闭楼中楼获取。

47
app.py
View File

@ -1,5 +1,6 @@
import asyncio import asyncio
import aiotieba import aiotieba
import uvicorn
from aioflask import render_template, request, escape from aioflask import render_template, request, escape
from urllib.parse import quote_plus from urllib.parse import quote_plus
@ -8,6 +9,8 @@ from datetime import datetime
from aiotieba.api.get_posts._classdef import * from aiotieba.api.get_posts._classdef import *
from aiotieba.api._classdef.contents import * from aiotieba.api._classdef.contents import *
from proxify import AsgiproxifyHandler
from shared import * from shared import *
from extra import * from extra import *
@ -26,6 +29,10 @@ async def cache_name_from_id(c, i):
r = await c.get_user_info(i, require=aiotieba.enums.ReqUInfo.USER_NAME) r = await c.get_user_info(i, require=aiotieba.enums.ReqUInfo.USER_NAME)
cache.set(i, r) cache.set(i, r)
# Normalize unicode characters to ASCII form.
def normalize_utf8(s):
return s.encode('unicode_escape').decode('ascii').replace('\\', '')
###################################################################### ######################################################################
# Convert a timestamp to its simpliest readable date format. # Convert a timestamp to its simpliest readable date format.
@ -78,7 +85,7 @@ async def _jinja2_filter_translate(frags, reply_id=0):
elif isinstance(frag, FragEmoji_p): elif isinstance(frag, FragEmoji_p):
htmlfmt = append_with_leading_clean(htmlfmt, htmlfmt = append_with_leading_clean(htmlfmt,
f'<img class="emoticons" alt="[{ frag.desc }]"' f'<img class="emoticons" alt="[{ frag.desc }]"'
f'src="/static/emoticons/{ quote_plus(frag.desc) }.png">') f'src="/static/emoticons/{ normalize_utf8(frag.desc) }.png">')
if i+1 < len(frags) and isinstance(frags[i+1], FragImage_p): if i+1 < len(frags) and isinstance(frags[i+1], FragImage_p):
htmlfmt += '<br>' htmlfmt += '<br>'
elif isinstance(frag, FragLink): elif isinstance(frag, FragLink):
@ -136,21 +143,17 @@ async def forum_view():
sort = int(request.args.get('sort') or 0) sort = int(request.args.get('sort') or 0)
async with aiotieba.Client() as tieba: async with aiotieba.Client() as tieba:
if only_use_native_api: forum_info, threads = await asyncio.gather(tieba.get_forum_detail(fname),
forum_info, threads = await asyncio.gather(tieba.get_forum_detail(fname), tieba.get_threads(fname, pn=pn, sort=sort))
tieba.get_threads(fname, pn=pn, sort=sort)) if hasattr(forum_info, 'slogan'):
if hasattr(forum_info, 'slogan'): forum_info = { 'avatar': extract_image_name(forum_info.origin_avatar),
forum_info = { 'avatar': extract_image_name(forum_info.origin_avatar), 'topic': forum_info.post_num, 'thread': forum_info.post_num,
'topic': forum_info.post_num, 'thread': forum_info.post_num, 'member': forum_info.member_num, 'desc': forum_info.slogan,
'member': forum_info.member_num, 'desc': forum_info.slogan, 'name': forum_info.fname }
'name': forum_info.fname }
else:
forum_info = { 'avatar': 'a6efce1b9d16fdfa6291460ab98f8c5495ee7b51.jpg',
'topic': forum_info.post_num, 'thread': forum_info.post_num,
'member': forum_info.member_num, 'desc': '贴吧描述暂不可用', 'name': forum_info.fname }
else: else:
forum_info, threads = await asyncio.gather(awaitify(find_tieba_info)(fname), forum_info = { 'avatar': 'a6efce1b9d16fdfa6291460ab98f8c5495ee7b51.jpg',
tieba.get_threads(fname, pn=pn, sort=sort)) 'topic': forum_info.post_num, 'thread': forum_info.post_num,
'member': forum_info.member_num, 'desc': '贴吧描述暂不可用', 'name': forum_info.fname }
if threads.page.current_page > threads.page.total_page or pn < 1: if threads.page.current_page > threads.page.total_page or pn < 1:
return await render_template('error.html', msg = \ return await render_template('error.html', msg = \
@ -195,5 +198,17 @@ async def runtime_error_view(e):
async def general_error_view(e): async def general_error_view(e):
return await render_template('error.html', msg=e) return await render_template('error.html', msg=e)
######################################################################
@proxified.register('/proxy/avatar/')
class AvatarProxyHandler(AsgiproxifyHandler):
def make_request_url(self):
return 'http://tb.himg.baidu.com/sys/portraith/item/' + self.scope['path'][14:]
@proxified.register('/proxy/pic/')
class PictureProxyHandler(AsgiproxifyHandler):
def make_request_url(self):
return 'http://imgsa.baidu.com/forum/pic/item/' + self.scope['path'][11:]
if __name__ == '__main__': if __name__ == '__main__':
app.run(debug=True) uvicorn.run(proxified, host=host, port=port)

View File

@ -12,38 +12,4 @@ def extract_image_name(url):
try: try:
return match.group(1) + '.jpg' return match.group(1) + '.jpg'
except: except:
return '404.jpg' return 'a6efce1b9d16fdfa6291460ab98f8c5495ee7b51.jpg'
@cache.memoize(timeout=60)
def find_tieba_info(tname):
"""Get the tiebat avatar for the forum name.
:param tname: the name of the target forum.
:returns: the internal ID of the corresponding avatar.
"""
info = { 'name': tname }
res = requests.get('https://tieba.baidu.com/f',
params={'kw': tname},
allow_redirects=False)
# Baidu will bring us to the search page, so we ignore it.
if res.status_code == 302:
raise ValueError('您搜索的贴吧不存在')
soup = bs4.BeautifulSoup(res.text, 'html.parser')
elems = soup.select('#forum-card-head')
info['avatar'] = extract_image_name(elems[0]['src'])
footer = soup.select('.th_footer_l')[0]
stat_elems = footer.findAll('span', {'class': 'red_text'}, recursive=False)
stats = list(map(lambda x: int(x.text), stat_elems))
info |= { 'topic': stats[0], 'thread': stats[1], 'member': stats[2] }
slogan = soup.select('.card_slogan')[0]
info['desc'] = slogan.text
return info

161
main.py
View File

@ -1,161 +0,0 @@
import multiprocessing
from app import app
from urllib.parse import quote as urlquote, urlparse, urlunparse
from twisted.web.http import _QUEUED_SENTINEL, HTTPChannel, HTTPClient, Request
from twisted.web.resource import Resource
from twisted.web import proxy, server
from twisted.web.static import File
from twisted.internet.protocol import ClientFactory
from twisted.internet import reactor, utils
plain_cookies = {}
################################################################################
# Modified Dynamic Proxy (from twisted)
################################################################################
class ProxyClient(HTTPClient):
_finished = False
def __init__(self, command, rest, version, headers, data, father):
self.father = father
self.command = command
self.rest = rest
if b"proxy-connection" in headers:
del headers[b"proxy-connection"]
headers[b"connection"] = b"close"
headers.pop(b"keep-alive", None)
self.headers = headers
self.data = data
def connectionMade(self):
self.sendCommand(self.command, self.rest)
for header, value in self.headers.items():
self.sendHeader(header, value)
self.endHeaders()
self.transport.write(self.data)
def handleStatus(self, version, code, message):
self.father.setResponseCode(int(code), message)
def handleHeader(self, key, value):
if key.lower() in [b"server", b"date", b"content-type"]:
self.father.responseHeaders.setRawHeaders(key, [value])
else:
self.father.responseHeaders.addRawHeader(key, value)
def handleResponsePart(self, buffer):
self.father.write(buffer)
def handleResponseEnd(self):
if not self._finished:
self._finished = True
self.father.notifyFinish().addErrback(lambda x: None)
self.transport.loseConnection()
class ProxyClientFactory(ClientFactory):
protocol = ProxyClient
def __init__(self, command, rest, version, headers, data, father):
self.father = father
self.command = command
self.rest = rest
self.headers = headers
self.data = data
self.version = version
def buildProtocol(self, addr):
return self.protocol(
self.command, self.rest, self.version, self.headers, self.data, self.father
)
def clientConnectionFailed(self, connector, reason):
self.father.setResponseCode(501, b"Gateway error")
self.father.responseHeaders.addRawHeader(b"Content-Type", b"text/html")
self.father.write(b"<H1>Could not connect</H1>")
self.father.finish()
class ReverseProxyResource(Resource):
def __init__(self, path, reactor=reactor):
Resource.__init__(self)
self.path = path
self.reactor = reactor
def getChild(self, path, request):
return ReverseProxyResource(
self.path + b'/' + urlquote(path, safe=b'').encode("utf-8"),
self.reactor
)
def render_proxy_avatar(self, request, req_path):
portrait = req_path[14:]
request.requestHeaders.setRawHeaders(b'host', [b'tb.himg.baidu.com'])
request.content.seek(0, 0)
clientFactory = ProxyClientFactory(
b'GET', ('http://tb.himg.baidu.com/sys/portraith/item/' + portrait).encode('utf-8'),
request.clientproto,
request.getAllHeaders(),
request.content.read(),
request,
)
self.reactor.connectTCP('tb.himg.baidu.com', 80, clientFactory)
return server.NOT_DONE_YET
def render_proxy_pic(self, request, req_path):
pic = req_path[11:]
request.requestHeaders.setRawHeaders(b'host', [b'imgsa.baidu.com'])
request.content.seek(0, 0)
clientFactory = ProxyClientFactory(
b'GET', ('http://imgsa.baidu.com/forum/pic/item/' + pic).encode('utf-8'),
request.clientproto,
request.getAllHeaders(),
request.content.read(),
request,
)
self.reactor.connectTCP('imgsa.baidu.com', 80, clientFactory)
return server.NOT_DONE_YET
def render(self, request):
# Justify the request path.
req_path = self.path.decode('utf-8')
if req_path.startswith('/proxy/avatar/'):
return self.render_proxy_avatar(request, req_path)
elif req_path.startswith('/proxy/pic/'):
return self.render_proxy_pic(request, req_path)
else:
request.setResponseCode(418, b'I\'m a teapot')
return
################################################################################
# To start this function for testing: python -c 'import main; main.twisted_start()'
def twisted_start():
flask_port = int(app.config['SERVER_NAME'].split(':')[1])
flask_res = proxy.ReverseProxyResource('127.0.0.1', flask_port, b'')
flask_res.putChild(b'proxy', ReverseProxyResource(b'/proxy'))
flask_res.putChild(b'static', File('static'))
print(f' *** SERVER IS RUNNING ON PORT {flask_port-1} ***')
site = server.Site(flask_res)
reactor.listenTCP(flask_port-1, site)
reactor.run()
# To start this function for testing: python -c 'import main; main.flask_start()'
def flask_start():
app.run()
# If we're executed directly, also start the flask daemon.
if __name__ == '__main__':
flask_task = multiprocessing.Process(target=flask_start)
flask_task.daemon = True # Exit the child if the parent was killed :-(
flask_task.start()
twisted_start()

90
proxify.py Normal file
View File

@ -0,0 +1,90 @@
import asyncio
import aiohttp
class AsgiproxifyHandler():
def __init__(self, scope):
self.scope = scope
def make_request_url(self):
return 'http://example.org/'
def make_request_cookies(self):
return {}
def make_request_headers(self):
req_headers = {k.decode(): v.decode() for k, v in self.scope['headers']}
req_headers.pop('host', None)
return req_headers
def make_response_headers(self, upstream_headers):
headers = dict(upstream_headers)
headers.pop('Server', None)
headers.pop('Date', None)
resp_headers = [(k, v) for k, v in headers.items()]
return resp_headers
def make_request(self, session):
return session.request('GET', self.make_request_url(),
cookies=self.make_request_cookies(),
headers=self.make_request_headers(),)
class Asgiproxify():
app = None
reg = {}
def __init__(self, app=None):
self.to(app)
def to(self, app):
self.app = app
def install(self, leading_path, handler):
self.reg[leading_path] = handler
def register(self, leading_path):
def decorator(c):
self.install(leading_path, c)
return decorator
async def handle_proxy(self, scope, receive, send, handler):
handler_i = handler(scope)
request = await receive()
async def reverse_proxy_task():
async with aiohttp.ClientSession(auto_decompress=False) as session:
async with handler_i.make_request(session) as resp:
await send({
'type': 'http.response.start',
'status': resp.status,
'headers': handler_i.make_response_headers(resp.headers),
})
async for chunk, end_of_resp in resp.content.iter_chunks():
await send({
'type': 'http.response.body',
'body': chunk,
'more_body': True,
})
await send({ 'type': 'http.response.body' })
task = asyncio.create_task(reverse_proxy_task())
while True:
ev = await receive()
if ev['type'] == 'http.disconnect':
task.cancel()
return
async def __call__(self, scope, receive, send):
if scope['type'] != 'http':
return await self.app(scope, receive, send)
handler = None
for leading_path, proxy_handler in self.reg.items():
if scope['path'].startswith(leading_path):
handler = proxy_handler
if not handler:
return await self.app(scope, receive, send)
else:
return await self.handle_proxy(scope, receive, send, handler)

View File

@ -1,7 +1,5 @@
aioflask==0.4.0 aioflask==0.4.0
flask==2.1.3 flask==2.1.3
aiotieba==3.5.0 aiotieb
aiohttp
Flask-Caching Flask-Caching
beautifulsoup4
requests
twisted

View File

@ -3,6 +3,8 @@ from flask_caching import Cache
from functools import wraps from functools import wraps
from proxify import Asgiproxify
def awaitify(sync_func): def awaitify(sync_func):
"""Wrap a synchronous callable to allow ``await``'ing it""" """Wrap a synchronous callable to allow ``await``'ing it"""
@wraps(sync_func) @wraps(sync_func)
@ -11,13 +13,17 @@ def awaitify(sync_func):
return async_func return async_func
app = Flask(__name__) app = Flask(__name__)
proxified = Asgiproxify(app)
###################################################################### ######################################################################
app.config['SERVER_NAME'] = '127.0.0.1:8886' host = 'localhost'
port = 8885
should_fetch_comments = True should_fetch_comments = True
only_use_native_api = True
app.config['DEBUG'] = False
app.config['TESTING'] = False
###################################################################### ######################################################################