openresty正向代理搭建
文章图片
image.png 1、需求
图里是需求,爬虫写死,更换代理不由它去做处理。自动会更换。
2 方案选型
- 1、之前有个Python版本的正向代理。忘了叫啥了。因为项目建立和写文章不是同时期,所以找不到了。内存占用什么的特别大,速度也不是很快。所以选择了方案2
- 2、openresty:通过nginx做正向代理。lua脚本更换代理,代理由redis读出来。
yum install yum-utils
yum-config-manager --add-repo https://openresty.org/package/centos/openresty.repo
yum install openresty
或者下面这种装法(任选其一)
wget https://openresty.org/package/centos/openresty.repo
sudo mv openresty.repo /etc/yum.repos.d/
sudo yum check-update
sudo yum install -y openresty
mac安装:
brew install openresty/brew/openresty
编写配置文件 新建
nginx_redis.conf
redis单机版
worker_processes16;
#nginx worker 数量
error_log /data/logs/openresty/error.log;
#指定错误日志文件路径
events {
worker_connections 1024;
}stream {
## TCP 代理日志格式定义
log_format tcp_proxy '$remote_addr [$time_local] '
'$protocol $status $bytes_sent $bytes_received '
'$session_time "$upstream_addr" '
'"$upstream_bytes_sent" "$upstream_bytes_received" "$upstream_connect_time"';
## TCP 代理日志配置
access_log /data/logs/openresty/tcp-access.log tcp_proxy;
open_log_file_cache off;
## TCP 代理配置
upstream backend{
server 127.0.0.2:1101;
# 爱写啥写啥反正下面的代码也给你改了
balancer_by_lua_block {
-- 初始化balancer
local balancer = require "ngx.balancer"
local host = "127.0.0.3"
local port = 3888# 这是代理机的代理端口
host = ngx.ctx.proxy_host
-- 设置 balancer
local ok, err = balancer.set_current_peer(host, port)
if not ok then
ngx.log(ngx.ERR, "failed to set the peer: ", err)
end
}
} server {
preread_by_lua_block{local redis = require("resty.redis")
--创建实例
local redis_instance = redis:new()
--设置超时(毫秒)
redis_instance:set_timeout(3000)
--建立连接
local rhost = "10.8.181.1"
local rport = 6379
local ok, err = redis_instance:connect(rhost, rport)
local oke, err = redis_instance:select(15)
if not oke then
ngx.log(ngx.ERR,"connect to redis error : ", err)
return redis_instance:close()
end
local res, err = redis_instance:rpoplpush("vps","vps")
-- ngx.log(ngx.ERR,"res num error : ", res)
if not res then
ngx.log(ngx.ERR,"res num error : ", err)
return redis_instance:close()
end
-- ngx.log(ngx.ERR,"redis data = "https://www.it610.com/article/,res..":3888");
ngx.ctx.proxy_host = res
redis_instance:close()
}
#下面是本机的端口,也就是爬虫固定写死的端口
listen 0.0.0.0:3889;
#监听本机地址和端口,当使用keeplived的情况下使用keeplived VIP
proxy_connect_timeout 3s;
proxy_timeout 10s;
#set_by_lua_file $backend set.lua;
#proxy_pass $backend;
#这里填写对端的地址
proxy_pass backend;
#这里填写对端的地址
}}
redis集群版
worker_processes16;
#nginx worker 数量
error_log /data/logs/openresty/error-pa-redis.log;
#指定错误日志文件路径
events {
worker_connections 1024;
}stream {
## TCP 代理日志格式定义
log_format tcp_proxy '$remote_addr [$time_local] '
'$protocol $status $bytes_sent $bytes_received '
'$session_time "$upstream_addr" '
'"$upstream_bytes_sent" "$upstream_bytes_received" "$upstream_connect_time"';
## TCP 代理日志配置
access_log /data/logs/openresty/tcp-access-pa-redis.log tcp_proxy;
open_log_file_cache off;
## TCP 代理配置
upstream backend{
server 127.0.0.2:11201;
balancer_by_lua_block {
-- 初始化balancer
local balancer = require "ngx.balancer"
local host = "127.0.0.3"
local port = 3888
host = ngx.ctx.proxy_host
-- 设置 balancer
local ok, err = balancer.set_current_peer(host, port)
if not ok then
ngx.log(ngx.ERR, "failed to set the peer: ", err)
end
}
} lua_shared_dict redis_cluster_slot_locks 100k;
server {preread_by_lua_block{
local config = {
name = "mengmugai",--rediscluster name
serv_list = {--redis cluster node list(host and port),
{ ip = "10.8.181.1", port = 16379 },
{ ip = "10.8.181.2", port = 16379 },
{ ip = "10.8.181.3", port = 16379 },
{ ip = "10.8.181.4", port = 16379 }
},
keepalive_timeout = 60000,--redis connection pool idle timeout
keepalive_cons = 1000,--redis connection pool size
connect_timeout = 1000,--timeout while connecting
read_timeout = 1000,--timeout while reading
send_timeout = 1000,--timeout while sending
max_redirection = 5,--maximum retry attempts for redirection,
max_connection_attempts = 1,--maximum retry attempts for connection
auth = "renzhengmima"--set password while setting auth
}local redis_cluster = require "rediscluster"
local red_c = redis_cluster:new(config)local res, err = red_c:rpoplpush("vps","vps")
if err then
ngx.log(ngx.ERR, "pa redis err: ", err)
else
ngx.log(ngx.ERR,"redis data = "https://www.it610.com/article/,res..":3888");
end
ngx.ctx.proxy_host = res
red_c:close()
}listen 0.0.0.0:3889;
#监听本机地址和端口,当使用keeplived的情况下使用keeplived VIP
proxy_connect_timeout 3s;
proxy_timeout 10s;
#set_by_lua_file $backend set.lua;
#proxy_pass $backend;
#这里填写对端的地址
proxy_pass backend;
#这里填写对端的地址
}}
运行 如果是单机版的话 直接
/usr/local/openresty/nginx/sbin/nginx -c /data/openresty-proxy/conf/nginx_redis.conf
就行了。具体文件路径 还有代码里的日志路径自己去抉择
如果是集群版的话 连接redis集群需要用到 lua-resty-redis-cluster模块
github地址https://github.com/cuiweixie/lua-resty-redis-cluster
下载之后,需要用2个文件rediscluster.lua和redis_slot.c 都在lib里面
复制包中的 redis_slot.c和rediscluster.lu 到openresty安装目录的lualib下
【openresty正向代理搭建】.c文件无法在Nginx配置文件中引入,需要编译成.so文件,编译命令
# 安装gcc、c++编译器以及内核文件
yum -y install gcc gcc-c++ kernel-devel
# centos自带lua需要执行此命令再编译,自己安装过lua不需要
yum install lua-devel#编译命令
gcc redis_slot.c -fPIC -shared -o libredis_slot.so#查看结果
文章图片
img 直接
/usr/local/openresty/nginx/sbin/nginx -c /data/openresty-proxy/conf/nginx_redis.conf
就行了最后 爬虫写好代理试一下ip:3889试试就行了。文章可能因为脱敏有点改乱了
参考:
https://blog.csdn.net/qq_22494169/article/details/109357667
https://blog.csdn.net/zyt425916200/article/details/78113547
https://github.com/openresty/lua-resty-redis#connect
推荐阅读
- 事件代理
- 设计模式-代理模式-Proxy
- java静态代理模式
- Kotlin基础(10)-代理模式在kotlin中的使用
- 叨叨酱|叨叨酱 |《奇葩大会》宠物正向训练师宁蔚(动物使用着最高级的社交媒体)
- Nginx进阶(1)——nginx本地代理配置访问node服务
- 怎么选择一家靠谱的财务代理公司()
- java动态代理技术解析
- cmd设置代理命令
- nginx反向代理命令