作者 钟来

初始提交

正在显示 78 个修改的文件 包含 4745 行增加0 行删除

要显示太多修改。

为保证性能只显示 78 of 78+ 个文件。

  1 +.idea
  2 +**/*.iml
  3 +**/target
  4 +**/*.pyc
  1 +# 抖音爬虫
  2 +----
  3 +<p>用于爬取指定用户的'作品', 以及指定用户的'喜欢'。
  4 +<p>用手机挂代理刷抖音, 当你访问其他用户的个人空间时, 就自动把这些用户信息、头像、视频封面、视频mp4、mp3等都爬取下来了。
  5 +
  6 +
  7 +---
  8 +本项目使用语言及版本:
  9 +* jdk: 14-preview (idea请使用2020.1尝鲜版, 否则不支持jdk14新语法)
  10 +* python: 3.8
  11 +
  12 +
  13 +----
  14 +模块介绍:
  15 +* douyin-scanner \[python\] 本模块用于将抖音信息以mitmdump代理形式拦截, 然后以宽表形式写入到数据库中, 方便douyin-downloader模块做后续的处理。
  16 +* douyin-downloader \[java\] 使用vertx框架。本模块用于将爬取下来的信息做后续的分析、重组、下载。
  17 +
  18 +----
  19 +本项目使用技术:
  20 +* mitmdump + python做代理拦截
  21 +* vertx作为整个项目的主要框架
  22 +* 裸写sql不方便, 自己实现了一个sqlBuilder, 方便拼接sql (最开始是使用的第三方依赖[sqlBuilder](https://github.com/jkrasnay/sqlbuilder)但是这个用起来有很多不足之处, 比如不支持limit, 不支持prepare等等
  23 +* 自己用反射实现了一个对象关系应该工具类com/aries/crawler/tools/Orm.java , 弥补了vertx没有orm的不便利之处。美其名曰:几十行代码实现了一个orm。
  24 +
  25 +----
  26 +为什么不用spring和mybatis
  27 +1. 我不喜欢自己的项目里有一大堆眼花缭乱的第三方依赖(你可以看一下本项目的pom.xml, 目前只有vertx-core、vertx-mysql, 还有一个用于单元测试的junit)
  28 +1. 不喜欢无脑使用spring和mybatis的行为. 经常见到一些java工程师打算新建个项目写点东西时, 第一件事情就是想都不想就直接引入一套spring(醒醒啊喂, 你是java工程师, 不是spring工程师)。 并不是认为这些不好, 只是认为这并不是解决问题的通用方案, 更不是完美方案。
  29 +1. 这是我第一次使用vertx, 但不是第一次不使用spring。(netty/jFinal/play/akka都是很不错的框架呀)
  30 +----
  31 +本项目仅供学习研究, 不提供任何反爬虫等功能, 请不要恶意爬取。 恶意使用本代码者, 后果自负!
  1 +<?xml version="1.0" encoding="UTF-8"?>
  2 +
  3 +<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"
  4 + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 + <modelVersion>4.0.0</modelVersion>
  6 +
  7 + <groupId>com.aries.crawler</groupId>
  8 + <artifactId>douyin-downloader</artifactId>
  9 + <version>1.0-SNAPSHOT</version>
  10 +
  11 + <name>douyin-downloader</name>
  12 +
  13 + <properties>
  14 + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  15 + <maven.compiler.source>14</maven.compiler.source>
  16 + <maven.compiler.target>14</maven.compiler.target>
  17 + </properties>
  18 +
  19 + <dependencies>
  20 + <dependency>
  21 + <groupId>io.vertx</groupId>
  22 + <artifactId>vertx-core</artifactId>
  23 + <version>3.8.5</version>
  24 + </dependency>
  25 + <dependency>
  26 + <groupId>mysql</groupId>
  27 + <artifactId>mysql-connector-java</artifactId>
  28 + <version>8.0.13</version>
  29 + </dependency>
  30 + <dependency>
  31 + <groupId>io.vertx</groupId>
  32 + <artifactId>vertx-jdbc-client</artifactId>
  33 + <version>3.8.5</version>
  34 + </dependency>
  35 + <dependency>
  36 + <groupId>junit</groupId>
  37 + <artifactId>junit</artifactId>
  38 + <version>4.13</version>
  39 + <scope>test</scope>
  40 + </dependency>
  41 + </dependencies>
  42 + <build>
  43 + <plugins>
  44 + <plugin>
  45 + <groupId>org.apache.maven.plugins</groupId>
  46 + <artifactId>maven-compiler-plugin</artifactId>
  47 + <version>3.8.1</version>
  48 + <configuration>
  49 + <release>14</release> <!-- <release>13</release> -->
  50 + <compilerArgs>--enable-preview</compilerArgs>
  51 + </configuration>
  52 + </plugin>
  53 + </plugins>
  54 + </build>
  55 +</project>
  1 +package com.aries.crawler;
  2 +
  3 +import com.aries.crawler.trans.codec.CommonMessageCodec;
  4 +import com.aries.crawler.trans.message.*;
  5 +import com.aries.crawler.verticles.*;
  6 +import io.vertx.core.CompositeFuture;
  7 +import io.vertx.core.DeploymentOptions;
  8 +import io.vertx.core.Future;
  9 +import io.vertx.core.Vertx;
  10 +import io.vertx.core.logging.Logger;
  11 +import io.vertx.core.logging.LoggerFactory;
  12 +
  13 +import java.util.ArrayList;
  14 +import java.util.List;
  15 +import java.util.concurrent.TimeUnit;
  16 +
  17 +/**
  18 + * @author arowana
  19 + */
  20 +public class Starter {
  21 + private static final Logger logger = LoggerFactory.getLogger(Starter.class);
  22 +
  23 + public static void main(String[] args) {
  24 + // Force to use slf4j. 参考自dgate:https://github.com/DTeam-Top/dgate/blob/master/src/main/java/top/dteam/dgate/Launcher.java
  25 + System.setProperty("vertx.logger-delegate-factory-class-name", "io.vertx.core.logging.SLF4JLogDelegateFactory");
  26 +
  27 + var vertx = Vertx.vertx();
  28 +
  29 + logger.info("register codec");
  30 + vertx.eventBus().registerCodec(new CommonMessageCodec<>());
  31 + vertx.eventBus().registerDefaultCodec(SimpleInt64Message.class, new CommonMessageCodec<>());
  32 + vertx.eventBus().registerDefaultCodec(DouyinUserInfoMessage.class, new CommonMessageCodec<>());
  33 + vertx.eventBus().registerDefaultCodec(DouyinVideoInfoMessage.class, new CommonMessageCodec<>());
  34 + vertx.eventBus().registerDefaultCodec(DouyinWideDataMessage.class, new CommonMessageCodec<>());
  35 + vertx.eventBus().registerDefaultCodec(CommonResponseMessage.class, new CommonMessageCodec<>());
  36 +
  37 + logger.info("deploying verticles");
  38 + List<Future> futures = new ArrayList<>() {
  39 + {
  40 + add(optionalDeploy(vertx, UserInsertVerticle.class, 1));
  41 + add(optionalDeploy(vertx, VideoInsertVerticle.class, 1));
  42 + add(optionalDeploy(vertx, UpdateDataVerticle.class, 1));
  43 + add(optionalDeploy(vertx, WideDataDispatchVerticle.class, 1));
  44 + add(optionalDeploy(vertx, WideDataPickUpVerticle.class, 1));
  45 + add(optionalDeploy(vertx, VideoDataPickUpVerticle.class, 1));
  46 + add(optionalDeploy(vertx, VideoDownloadVerticle.class, 1));
  47 + }
  48 + };
  49 +
  50 + CompositeFuture.all(futures).setHandler(ar -> {
  51 + if (ar.succeeded()) {
  52 + logger.info("all verticle start");
  53 + } else {
  54 + logger.error("verticle(s) failed: " + ar.cause());
  55 + }
  56 + });
  57 + }
  58 +
  59 + /**
  60 + * 简单部署
  61 + *
  62 + * @param vertx 全局vertx
  63 + * @param verticleType 部署的verticel类
  64 + * @return 部署后的Future回调
  65 + */
  66 + public static Future<Void> simpleDeploy(Vertx vertx, Class<?> verticleType) {
  67 + return Future.future(res -> {
  68 + vertx.deployVerticle(verticleType.getTypeName(), deployRes -> {
  69 + if (deployRes.succeeded()) {
  70 + res.complete();
  71 + } else {
  72 + res.fail(deployRes.cause());
  73 + }
  74 + });
  75 + });
  76 + }
  77 +
  78 + /**
  79 + * 配置化部署
  80 + *
  81 + * @param vertx 全局vertx
  82 + * @param verticleType 部署的verticel类
  83 + * @param workerSize 实例个数
  84 + * @return 部署后的Future回调
  85 + */
  86 + public static Future<Void> optionalDeploy(Vertx vertx, Class<?> verticleType, int workerSize) {
  87 + var options = new DeploymentOptions()
  88 + .setWorker(true)
  89 + .setWorkerPoolName(verticleType.getTypeName() + "-pool")
  90 + .setWorkerPoolSize(workerSize)
  91 + .setMaxWorkerExecuteTime(TimeUnit.SECONDS.toNanos(10))
  92 + .setInstances(workerSize);
  93 +
  94 + return Future.future(res -> {
  95 + vertx.deployVerticle(verticleType.getTypeName(), options, deployRes -> {
  96 + if (deployRes.succeeded()) {
  97 + res.complete();
  98 + } else {
  99 + res.fail(deployRes.cause());
  100 + }
  101 + });
  102 + });
  103 + }
  104 +}
  1 +package com.aries.crawler.annotation;
  2 +
  3 +import java.lang.annotation.ElementType;
  4 +import java.lang.annotation.Retention;
  5 +import java.lang.annotation.RetentionPolicy;
  6 +import java.lang.annotation.Target;
  7 +
  8 +/**
  9 + * @author arowana
  10 + */
  11 +@Retention(RetentionPolicy.RUNTIME)
  12 +@Target(value = {ElementType.FIELD})
  13 +public @interface MysqlField {
  14 + /**
  15 + * 对应的数据库字段名
  16 + */
  17 + String alias();
  18 +
  19 + /**
  20 + * 对应的类型
  21 + */
  22 + Class<?> type();
  23 +}
  1 +package com.aries.crawler.model;
  2 +
  3 +/**
  4 + * 数据模型的抽象类。
  5 + *
  6 + * @author arowana
  7 + */
  8 +public interface DataModelable {
  9 +
  10 +}
  1 +package com.aries.crawler.model.douyincrawler;
  2 +
  3 +import com.aries.crawler.annotation.MysqlField;
  4 +import com.aries.crawler.model.DataModelable;
  5 +
  6 +import java.math.BigInteger;
  7 +
  8 +/**
  9 + * 抖音数据宽表
  10 + *
  11 + * @author arowana
  12 + */
  13 +public class DouyinCrawlerLogModel implements DataModelable {
  14 + /**
  15 + * 提取用户数据完成
  16 + */
  17 + public static final Integer STATUS_USER_DONE = 1;
  18 +
  19 + /**
  20 + * 提取视频信息完成
  21 + */
  22 + public static final Integer STATUS_VIDEO_DONE = 2;
  23 +
  24 + /**
  25 + * 用户信息、视频信息 都提取完成
  26 + */
  27 + public static final Integer STATUS_ALL_DONE = 3;
  28 +
  29 + public static final String TABLE = "douyin_crawler_log";
  30 + @MysqlField(alias = "aweme_id", type = Long.class)
  31 + public Long awemeId;
  32 + @MysqlField(alias = "aweme_desc", type = String.class)
  33 + public String awemeDesc;
  34 + @MysqlField(alias = "aweme_create_time", type = Long.class)
  35 + public Long awemeCreateTime;
  36 + @MysqlField(alias = "author_uid", type = Long.class)
  37 + public Long authorUid;
  38 + @MysqlField(alias = "author_short_id", type = Long.class)
  39 + public Long authorShortId;
  40 + @MysqlField(alias = "author_nickname", type = String.class)
  41 + public String authorNickname;
  42 + @MysqlField(alias = "author_signature", type = String.class)
  43 + public String authorSignature;
  44 + @MysqlField(alias = "avatar_larger_url", type = String.class)
  45 + public String avatarLargerUrl;
  46 + @MysqlField(alias = "author_share_info_qrcode_url", type = String.class)
  47 + public String authorShareInfoQrcodeUrl;
  48 + @MysqlField(alias = "video_cover_url", type = String.class)
  49 + public String videoCoverUrl;
  50 + @MysqlField(alias = "video_dynamic_cover_url", type = String.class)
  51 + public String videoDynamicCoverUrl;
  52 + @MysqlField(alias = "video_download_addr_url", type = String.class)
  53 + public String videoDownloadAddrUrl;
  54 + @MysqlField(alias = "video_share_url", type = String.class)
  55 + public String videoShareUrl;
  56 + @MysqlField(alias = "video_tag", type = String.class)
  57 + public String videoVideoTag;
  58 + @MysqlField(alias = "video_duration", type = BigInteger.class)
  59 + public Long videoDuration;
  60 + @MysqlField(alias = "type", type = Integer.class)
  61 + public Integer type;
  62 + @MysqlField(alias = "status", type = Integer.class)
  63 + public Integer status;
  64 + @MysqlField(alias = "ct", type = String.class)
  65 + public String ct;
  66 + @MysqlField(alias = "ut", type = String.class)
  67 + public String ut;
  68 + @MysqlField(alias = "id", type = BigInteger.class)
  69 + private BigInteger id;
  70 +
  71 + public static Integer getStatusUserDone() {
  72 + return STATUS_USER_DONE;
  73 + }
  74 +
  75 + public static Integer getStatusVideoDone() {
  76 + return STATUS_VIDEO_DONE;
  77 + }
  78 +
  79 + public static Integer getStatusAllDone() {
  80 + return STATUS_ALL_DONE;
  81 + }
  82 +
  83 + public static String getTABLE() {
  84 + return TABLE;
  85 + }
  86 +
  87 + public BigInteger getId() {
  88 + return id;
  89 + }
  90 +
  91 + public void setId(BigInteger id) {
  92 + this.id = id;
  93 + }
  94 +
  95 + public Long getAwemeId() {
  96 + return awemeId;
  97 + }
  98 +
  99 + public void setAwemeId(Long awemeId) {
  100 + this.awemeId = awemeId;
  101 + }
  102 +
  103 + public String getAwemeDesc() {
  104 + return awemeDesc;
  105 + }
  106 +
  107 + public void setAwemeDesc(String awemeDesc) {
  108 + this.awemeDesc = awemeDesc;
  109 + }
  110 +
  111 + public Long getAwemeCreateTime() {
  112 + return awemeCreateTime;
  113 + }
  114 +
  115 + public void setAwemeCreateTime(Long awemeCreateTime) {
  116 + this.awemeCreateTime = awemeCreateTime;
  117 + }
  118 +
  119 + public Long getAuthorUid() {
  120 + return authorUid;
  121 + }
  122 +
  123 + public void setAuthorUid(Long authorUid) {
  124 + this.authorUid = authorUid;
  125 + }
  126 +
  127 + public Long getAuthorShortId() {
  128 + return authorShortId;
  129 + }
  130 +
  131 + public void setAuthorShortId(Long authorShortId) {
  132 + this.authorShortId = authorShortId;
  133 + }
  134 +
  135 + public String getAuthorNickname() {
  136 + return authorNickname;
  137 + }
  138 +
  139 + public void setAuthorNickname(String authorNickname) {
  140 + this.authorNickname = authorNickname;
  141 + }
  142 +
  143 + public String getAuthorSignature() {
  144 + return authorSignature;
  145 + }
  146 +
  147 + public void setAuthorSignature(String authorSignature) {
  148 + this.authorSignature = authorSignature;
  149 + }
  150 +
  151 + public String getAvatarLargerUrl() {
  152 + return avatarLargerUrl;
  153 + }
  154 +
  155 + public void setAvatarLargerUrl(String avatarLargerUrl) {
  156 + this.avatarLargerUrl = avatarLargerUrl;
  157 + }
  158 +
  159 + public String getAuthorShareInfoQrcodeUrl() {
  160 + return authorShareInfoQrcodeUrl;
  161 + }
  162 +
  163 + public void setAuthorShareInfoQrcodeUrl(String authorShareInfoQrcodeUrl) {
  164 + this.authorShareInfoQrcodeUrl = authorShareInfoQrcodeUrl;
  165 + }
  166 +
  167 + public String getVideoCoverUrl() {
  168 + return videoCoverUrl;
  169 + }
  170 +
  171 + public void setVideoCoverUrl(String videoCoverUrl) {
  172 + this.videoCoverUrl = videoCoverUrl;
  173 + }
  174 +
  175 + public String getVideoDynamicCoverUrl() {
  176 + return videoDynamicCoverUrl;
  177 + }
  178 +
  179 + public void setVideoDynamicCoverUrl(String videoDynamicCoverUrl) {
  180 + this.videoDynamicCoverUrl = videoDynamicCoverUrl;
  181 + }
  182 +
  183 + public String getVideoDownloadAddrUrl() {
  184 + return videoDownloadAddrUrl;
  185 + }
  186 +
  187 + public void setVideoDownloadAddrUrl(String videoDownloadAddrUrl) {
  188 + this.videoDownloadAddrUrl = videoDownloadAddrUrl;
  189 + }
  190 +
  191 + public String getVideoShareUrl() {
  192 + return videoShareUrl;
  193 + }
  194 +
  195 + public void setVideoShareUrl(String videoShareUrl) {
  196 + this.videoShareUrl = videoShareUrl;
  197 + }
  198 +
  199 + public String getVideoVideoTag() {
  200 + return videoVideoTag;
  201 + }
  202 +
  203 + public void setVideoVideoTag(String videoVideoTag) {
  204 + this.videoVideoTag = videoVideoTag;
  205 + }
  206 +
  207 + public Long getVideoDuration() {
  208 + return videoDuration;
  209 + }
  210 +
  211 + public void setVideoDuration(Long videoDuration) {
  212 + this.videoDuration = videoDuration;
  213 + }
  214 +
  215 + public Integer getType() {
  216 + return type;
  217 + }
  218 +
  219 + public void setType(Integer type) {
  220 + this.type = type;
  221 + }
  222 +
  223 + public Integer getStatus() {
  224 + return status;
  225 + }
  226 +
  227 + public void setStatus(Integer status) {
  228 + this.status = status;
  229 + }
  230 +
  231 + public String getCt() {
  232 + return ct;
  233 + }
  234 +
  235 + public void setCt(String ct) {
  236 + this.ct = ct;
  237 + }
  238 +
  239 + public String getUt() {
  240 + return ut;
  241 + }
  242 +
  243 + public void setUt(String ut) {
  244 + this.ut = ut;
  245 + }
  246 +
  247 + @Override
  248 + public String toString() {
  249 + return "DouyinCrawlerLogModel{" +
  250 + "id=" + id +
  251 + ", awemeId=" + awemeId +
  252 + ", awemeDesc='" + awemeDesc + '\'' +
  253 + ", awemeCreateTime=" + awemeCreateTime +
  254 + ", authorUid=" + authorUid +
  255 + ", authorShortId=" + authorShortId +
  256 + ", authorNickname='" + authorNickname + '\'' +
  257 + ", authorSignature='" + authorSignature + '\'' +
  258 + ", avatarLargerUrl='" + avatarLargerUrl + '\'' +
  259 + ", authorShareInfoQrcodeUrl='" + authorShareInfoQrcodeUrl + '\'' +
  260 + ", videoCoverUrl='" + videoCoverUrl + '\'' +
  261 + ", videoDynamicCoverUrl='" + videoDynamicCoverUrl + '\'' +
  262 + ", videoDownloadAddrUrl='" + videoDownloadAddrUrl + '\'' +
  263 + ", videoShareUrl='" + videoShareUrl + '\'' +
  264 + ", videoVideoTag='" + videoVideoTag + '\'' +
  265 + ", videoDuration=" + videoDuration +
  266 + ", type=" + type +
  267 + ", status=" + status +
  268 + ", ct=" + ct +
  269 + ", ut=" + ut +
  270 + '}';
  271 + }
  272 +}
  1 +package com.aries.crawler.model.douyincrawler;
  2 +
  3 +import com.aries.crawler.annotation.MysqlField;
  4 +import com.aries.crawler.model.DataModelable;
  5 +
  6 +import java.time.LocalDateTime;
  7 +
  8 +/**
  9 + * 抖音数据宽表
  10 + *
  11 + * @author arowana
  12 + */
  13 +public class DouyinUserModel implements DataModelable {
  14 + @MysqlField(alias = "uid", type = Long.class)
  15 + public Long uid;
  16 +
  17 + @MysqlField(alias = "short_id", type = Long.class)
  18 + public Long shortId;
  19 +
  20 + @MysqlField(alias = "nickname", type = String.class)
  21 + public String nickname;
  22 +
  23 + @MysqlField(alias = "signature", type = String.class)
  24 + public String signature;
  25 +
  26 + @MysqlField(alias = "avatar_larger_url", type = String.class)
  27 + public String avatarLargerUrl;
  28 +
  29 + @MysqlField(alias = "share_url", type = String.class)
  30 + public String shareUrl;
  31 +
  32 + @MysqlField(alias = "share_info_qrcode_url", type = String.class)
  33 + public String shareInfoQrCodeUrl;
  34 +
  35 + @MysqlField(alias = "ct", type = LocalDateTime.class)
  36 + public String ct;
  37 +
  38 + @MysqlField(alias = "ut", type = LocalDateTime.class)
  39 + public String ut;
  40 +
  41 + public Long getUid() {
  42 + return uid;
  43 + }
  44 +
  45 + public void setUid(Long uid) {
  46 + this.uid = uid;
  47 + }
  48 +
  49 + public Long getShortId() {
  50 + return shortId;
  51 + }
  52 +
  53 + public void setShortId(Long shortId) {
  54 + this.shortId = shortId;
  55 + }
  56 +
  57 + public String getNickname() {
  58 + return nickname;
  59 + }
  60 +
  61 + public void setNickname(String nickname) {
  62 + this.nickname = nickname;
  63 + }
  64 +
  65 + public String getSignature() {
  66 + return signature;
  67 + }
  68 +
  69 + public void setSignature(String signature) {
  70 + this.signature = signature;
  71 + }
  72 +
  73 + public String getAvatarLargerUrl() {
  74 + return avatarLargerUrl;
  75 + }
  76 +
  77 + public void setAvatarLargerUrl(String avatarLargerUrl) {
  78 + this.avatarLargerUrl = avatarLargerUrl;
  79 + }
  80 +
  81 + public String getShareUrl() {
  82 + return shareUrl;
  83 + }
  84 +
  85 + public void setShareUrl(String shareUrl) {
  86 + this.shareUrl = shareUrl;
  87 + }
  88 +
  89 + public String getShareInfoQrCodeUrl() {
  90 + return shareInfoQrCodeUrl;
  91 + }
  92 +
  93 + public void setShareInfoQrCodeUrl(String shareInfoQrCodeUrl) {
  94 + this.shareInfoQrCodeUrl = shareInfoQrCodeUrl;
  95 + }
  96 +
  97 + public String getCt() {
  98 + return ct;
  99 + }
  100 +
  101 + public void setCt(String ct) {
  102 + this.ct = ct;
  103 + }
  104 +
  105 + public String getUt() {
  106 + return ut;
  107 + }
  108 +
  109 + public void setUt(String ut) {
  110 + this.ut = ut;
  111 + }
  112 +
  113 + @Override
  114 + public String toString() {
  115 + return "DouyinUserModel{" +
  116 + "uid=" + uid +
  117 + ", shortId=" + shortId +
  118 + ", nickname='" + nickname + '\'' +
  119 + ", signature='" + signature + '\'' +
  120 + ", avatarLargerUrl='" + avatarLargerUrl + '\'' +
  121 + ", shareUrl='" + shareUrl + '\'' +
  122 + ", shareInfoQrCodeUrl='" + shareInfoQrCodeUrl + '\'' +
  123 + ", ct=" + ct +
  124 + ", ut=" + ut +
  125 + '}';
  126 + }
  127 +}
  1 +package com.aries.crawler.model.douyincrawler;
  2 +
  3 +import com.aries.crawler.annotation.MysqlField;
  4 +import com.aries.crawler.model.DataModelable;
  5 +
  6 +import java.time.LocalDateTime;
  7 +
  8 +/**
  9 + * 抖音数据宽表
  10 + *
  11 + * @author arowana
  12 + */
  13 +public class DouyinVideoModel implements DataModelable {
  14 + /**
  15 + * 默认值。未处理。
  16 + */
  17 + public static final Integer STATUS_VIDEO_DOWNLOAD_DEFAULT = 0;
  18 + /**
  19 + * 下载成功
  20 + */
  21 + public static final Integer STATUS_VIDEO_DOWNLOAD_SUCCESS = 1;
  22 +
  23 + /**
  24 + * 下载失败
  25 + */
  26 + public static final Integer STATUS_VIDEO_DOWNLOAD_FAILED = 2;
  27 +
  28 + public static final String TABLE = "douyin_video_info";
  29 +
  30 + @MysqlField(alias = "id", type = Long.class)
  31 + public Long id;
  32 +
  33 + @MysqlField(alias = "comments", type = String.class)
  34 + public String comments;
  35 +
  36 + @MysqlField(alias = "create_time", type = Long.class)
  37 + public Long createTime;
  38 +
  39 + @MysqlField(alias = "uid", type = Long.class)
  40 + public Long uid;
  41 +
  42 + @MysqlField(alias = "cover_url", type = String.class)
  43 + public String coverUrl;
  44 +
  45 + @MysqlField(alias = "dynamic_cover_url", type = String.class)
  46 + public String dynamicCoverUrl;
  47 +
  48 + @MysqlField(alias = "download_addr_url", type = String.class)
  49 + public String downloadAddrUrl;
  50 +
  51 + @MysqlField(alias = "share_url", type = String.class)
  52 + public String shareUrl;
  53 +
  54 + @MysqlField(alias = "tag", type = String.class)
  55 + public String tag;
  56 +
  57 + @MysqlField(alias = "duration", type = Long.class)
  58 + public Long duration;
  59 +
  60 + @MysqlField(alias = "type", type = Integer.class)
  61 + public Integer type;
  62 +
  63 + @MysqlField(alias = "status", type = Integer.class)
  64 + public Integer status;
  65 +
  66 + @MysqlField(alias = "ct", type = LocalDateTime.class)
  67 + public String ct;
  68 +
  69 + @MysqlField(alias = "ut", type = LocalDateTime.class)
  70 + public String ut;
  71 +
  72 + public Long getId() {
  73 + return id;
  74 + }
  75 +
  76 + public void setId(Long id) {
  77 + this.id = id;
  78 + }
  79 +
  80 + public String getComments() {
  81 + return comments;
  82 + }
  83 +
  84 + public void setComments(String comments) {
  85 + this.comments = comments;
  86 + }
  87 +
  88 + public Long getCreateTime() {
  89 + return createTime;
  90 + }
  91 +
  92 + public void setCreateTime(Long createTime) {
  93 + this.createTime = createTime;
  94 + }
  95 +
  96 + public Long getUid() {
  97 + return uid;
  98 + }
  99 +
  100 + public void setUid(Long uid) {
  101 + this.uid = uid;
  102 + }
  103 +
  104 + public String getCoverUrl() {
  105 + return coverUrl;
  106 + }
  107 +
  108 + public void setCoverUrl(String coverUrl) {
  109 + this.coverUrl = coverUrl;
  110 + }
  111 +
  112 + public String getDynamicCoverUrl() {
  113 + return dynamicCoverUrl;
  114 + }
  115 +
  116 + public void setDynamicCoverUrl(String dynamicCoverUrl) {
  117 + this.dynamicCoverUrl = dynamicCoverUrl;
  118 + }
  119 +
  120 + public String getDownloadAddrUrl() {
  121 + return downloadAddrUrl;
  122 + }
  123 +
  124 + public void setDownloadAddrUrl(String downloadAddrUrl) {
  125 + this.downloadAddrUrl = downloadAddrUrl;
  126 + }
  127 +
  128 + public String getShareUrl() {
  129 + return shareUrl;
  130 + }
  131 +
  132 + public void setShareUrl(String shareUrl) {
  133 + this.shareUrl = shareUrl;
  134 + }
  135 +
  136 + public String getTag() {
  137 + return tag;
  138 + }
  139 +
  140 + public void setTag(String tag) {
  141 + this.tag = tag;
  142 + }
  143 +
  144 + public Long getDuration() {
  145 + return duration;
  146 + }
  147 +
  148 + public void setDuration(Long duration) {
  149 + this.duration = duration;
  150 + }
  151 +
  152 + public Integer getType() {
  153 + return type;
  154 + }
  155 +
  156 + public void setType(Integer type) {
  157 + this.type = type;
  158 + }
  159 +
  160 + public Integer getStatus() {
  161 + return status;
  162 + }
  163 +
  164 + public void setStatus(Integer status) {
  165 + this.status = status;
  166 + }
  167 +
  168 + public String getCt() {
  169 + return ct;
  170 + }
  171 +
  172 + public void setCt(String ct) {
  173 + this.ct = ct;
  174 + }
  175 +
  176 + public String getUt() {
  177 + return ut;
  178 + }
  179 +
  180 + public void setUt(String ut) {
  181 + this.ut = ut;
  182 + }
  183 +
  184 + @Override
  185 + public String toString() {
  186 + return "DouyinVideoModel{" +
  187 + "id=" + id +
  188 + ", comments='" + comments + '\'' +
  189 + ", createTime=" + createTime +
  190 + ", uid=" + uid +
  191 + ", coverUrl='" + coverUrl + '\'' +
  192 + ", dynamicCoverUrl='" + dynamicCoverUrl + '\'' +
  193 + ", downloadAddrUrl='" + downloadAddrUrl + '\'' +
  194 + ", shareUrl='" + shareUrl + '\'' +
  195 + ", tag='" + tag + '\'' +
  196 + ", duration=" + duration +
  197 + ", type=" + type +
  198 + ", status=" + status +
  199 + ", ct=" + ct +
  200 + ", ut=" + ut +
  201 + '}';
  202 + }
  203 +}
  1 +/**
  2 + * 本包下存放的都是douyin_crawler数据库对应的orm映射关系
  3 + * <p>
  4 + * 一个子package对应一个数据库, 例如 douyincrawler包 对应 douyin_crawler数据库
  5 + * 一个子类对应一个数据表, 例如DouYinCrawlerLogModel类 对应 douyin_crawler_log数据表
  6 + */
  7 +package com.aries.crawler.model.douyincrawler;
  1 +/**
  2 + * 数据模型层的包
  3 + * 一个子package对应一个数据库, 例如 douyincrawler包 对应 douyin_crawler数据库
  4 + * 一个子类对应一个数据表, 例如DouYinCrawlerLogModel类 对应 douyin_crawler_log数据表
  5 + */
  6 +package com.aries.crawler.model;
  1 +package com.aries.crawler.sqlbuilder;
  2 +
  3 +import java.util.List;
  4 +
  5 +/**
  6 + * @author arowana
  7 + */
  8 +public abstract class AbstractSqlBuilder {
  9 +
  10 + protected void appendList(StringBuilder sql, List<?> list, String init, String sep) {
  11 +
  12 + boolean first = true;
  13 +
  14 + for (Object s : list) {
  15 + if (first) {
  16 + sql.append(init);
  17 + } else {
  18 + sql.append(sep);
  19 + }
  20 + sql.append(s);
  21 + first = false;
  22 + }
  23 + }
  24 +
  25 + protected String repeat(String ch, Integer times, String sep) {
  26 + StringBuilder result = new StringBuilder(ch);
  27 + if (times > 1) {
  28 + result.append((sep + ch).repeat(times - 1));
  29 + }
  30 + return result.toString();
  31 + }
  32 +
  33 +}
  1 +package com.aries.crawler.sqlbuilder;
  2 +
  3 +import java.util.ArrayList;
  4 +import java.util.List;
  5 +
  6 +/**
  7 + * @author arowana
  8 + */
  9 +public class DeleteBuilder extends AbstractSqlBuilder {
  10 +
  11 + private String table;
  12 +
  13 + private List<String> wheres = new ArrayList<>();
  14 +
  15 + public DeleteBuilder(String table) {
  16 + this.table = table;
  17 + }
  18 +
  19 + @Override
  20 + public String toString() {
  21 + StringBuilder sql = new StringBuilder("delete from ").append(table);
  22 + appendList(sql, wheres, " where ", " and ");
  23 + return sql.toString();
  24 + }
  25 +
  26 + public DeleteBuilder where(String expr) {
  27 + wheres.add(expr);
  28 + return this;
  29 + }
  30 +
  31 +}
  1 +package com.aries.crawler.sqlbuilder;
  2 +
  3 +import java.util.ArrayList;
  4 +import java.util.List;
  5 +
  6 +/**
  7 + * @author arowana
  8 + */
  9 +public class InsertBuilder extends AbstractSqlBuilder {
  10 + private String table;
  11 +
  12 + private List<String> columns = new ArrayList<>();
  13 + private List<Object> values = new ArrayList<>();
  14 +
  15 + private List<String> upColumns = new ArrayList<>();
  16 + private List<Object> upValues = new ArrayList<>();
  17 +
  18 + public InsertBuilder(String table) {
  19 + this.table = table;
  20 + }
  21 +
  22 + public InsertBuilder set(String column, Object value) {
  23 + columns.add(column);
  24 + values.add(value);
  25 + return this;
  26 + }
  27 +
  28 + public InsertBuilder onDuplicateKeyUpdate(String column, Object value) {
  29 + upColumns.add(column);
  30 + upValues.add(value);
  31 + return this;
  32 + }
  33 +
  34 + public List<Object> getValues() {
  35 + var result = new ArrayList<>();
  36 + result.addAll(values);
  37 + result.addAll(upValues);
  38 + return result;
  39 + }
  40 +
  41 + public String getSql() {
  42 + var sql = new StringBuilder("insert into ")
  43 + .append(table).append(" (");
  44 + appendList(sql, columns, "", ", ");
  45 + sql.append(") values (");
  46 + sql.append(repeat("?", columns.size(), ","));
  47 + sql.append(")");
  48 +
  49 + if (!upColumns.isEmpty()) {
  50 + sql.append(" on duplicate key update ");
  51 + for (int i = 0; i < upColumns.size(); i++) {
  52 + if (i == 0) {
  53 + sql.append(upColumns.get(i) + " = ?");
  54 + } else {
  55 + sql.append(", " + upColumns.get(i) + " = ?");
  56 + }
  57 + }
  58 + }
  59 + return sql.toString();
  60 + }
  61 +
  62 +}
  1 +package com.aries.crawler.sqlbuilder;
  2 +
  3 +import java.util.ArrayList;
  4 +import java.util.List;
  5 +
  6 +/**
  7 + * @author arowana
  8 + */
  9 +public class SelectBuilder extends AbstractSqlBuilder implements Cloneable {
  10 +
  11 + private boolean distinct;
  12 +
  13 + private List<Object> columns = new ArrayList<>();
  14 +
  15 + private List<String> tables = new ArrayList<>();
  16 +
  17 + private List<String> joins = new ArrayList<>();
  18 +
  19 + private List<String> leftJoins = new ArrayList<>();
  20 +
  21 + private List<String> wheres = new ArrayList<>();
  22 +
  23 + private List<String> groupBys = new ArrayList<>();
  24 +
  25 + private List<String> havings = new ArrayList<>();
  26 +
  27 + private List<SelectBuilder> unions = new ArrayList<>();
  28 +
  29 + private List<String> orderBys = new ArrayList<>();
  30 +
  31 + private Long offset = 0L;
  32 + private Long limit = 0L;
  33 +
  34 + private boolean forUpdate;
  35 +
  36 + private boolean noWait;
  37 +
  38 + public SelectBuilder() {
  39 +
  40 + }
  41 +
  42 + public SelectBuilder(String table) {
  43 + tables.add(table);
  44 + }
  45 +
  46 + /**
  47 + * Copy constructor. Used by {@link #clone()}.
  48 + *
  49 + * @param other SelectBuilder being cloned.
  50 + */
  51 + protected SelectBuilder(SelectBuilder other) {
  52 +
  53 + this.distinct = other.distinct;
  54 + this.forUpdate = other.forUpdate;
  55 + this.noWait = other.noWait;
  56 +
  57 + for (Object column : other.columns) {
  58 + if (column instanceof SubSelectBuilder) {
  59 + this.columns.add(((SubSelectBuilder) column).clone());
  60 + } else {
  61 + this.columns.add(column);
  62 + }
  63 + }
  64 +
  65 + this.tables.addAll(other.tables);
  66 + this.joins.addAll(other.joins);
  67 + this.leftJoins.addAll(other.leftJoins);
  68 + this.wheres.addAll(other.wheres);
  69 + this.groupBys.addAll(other.groupBys);
  70 + this.havings.addAll(other.havings);
  71 +
  72 + for (SelectBuilder sb : other.unions) {
  73 + this.unions.add(sb.clone());
  74 + }
  75 +
  76 + this.orderBys.addAll(other.orderBys);
  77 + }
  78 +
  79 + /**
  80 + * Alias for {@link #where(String)}.
  81 + */
  82 + public SelectBuilder and(String expr) {
  83 + return where(expr);
  84 + }
  85 +
  86 + public SelectBuilder column(String name) {
  87 + columns.add(name);
  88 + return this;
  89 + }
  90 +
  91 + public SelectBuilder column(SubSelectBuilder subSelect) {
  92 + columns.add(subSelect);
  93 + return this;
  94 + }
  95 +
  96 + public SelectBuilder column(String name, boolean groupBy) {
  97 + columns.add(name);
  98 + if (groupBy) {
  99 + groupBys.add(name);
  100 + }
  101 + return this;
  102 + }
  103 +
  104 + @Override
  105 + public SelectBuilder clone() {
  106 + return new SelectBuilder(this);
  107 + }
  108 +
  109 + public SelectBuilder distinct() {
  110 + this.distinct = true;
  111 + return this;
  112 + }
  113 +
  114 + public SelectBuilder forUpdate() {
  115 + forUpdate = true;
  116 + return this;
  117 + }
  118 +
  119 + public SelectBuilder from(String table) {
  120 + tables.add(table);
  121 + return this;
  122 + }
  123 +
  124 + public List<SelectBuilder> getUnions() {
  125 + return unions;
  126 + }
  127 +
  128 + public SelectBuilder groupBy(String expr) {
  129 + groupBys.add(expr);
  130 + return this;
  131 + }
  132 +
  133 + public SelectBuilder having(String expr) {
  134 + havings.add(expr);
  135 + return this;
  136 + }
  137 +
  138 + public SelectBuilder join(String join) {
  139 + joins.add(join);
  140 + return this;
  141 + }
  142 +
  143 + public SelectBuilder leftJoin(String join) {
  144 + leftJoins.add(join);
  145 + return this;
  146 + }
  147 +
  148 + public SelectBuilder noWait() {
  149 + if (!forUpdate) {
  150 + throw new RuntimeException("noWait without forUpdate cannot be called");
  151 + }
  152 + noWait = true;
  153 + return this;
  154 + }
  155 +
  156 + public SelectBuilder orderBy(String name) {
  157 + orderBys.add(name);
  158 + return this;
  159 + }
  160 +
  161 + /**
  162 + * Adds an ORDER BY item with a direction indicator.
  163 + *
  164 + * @param name Name of the column by which to sort.
  165 + * @param ascending If true, specifies the direction "asc", otherwise, specifies
  166 + * the direction "desc".
  167 + */
  168 + public SelectBuilder orderBy(String name, boolean ascending) {
  169 + if (ascending) {
  170 + orderBys.add(name + " asc");
  171 + } else {
  172 + orderBys.add(name + " desc");
  173 + }
  174 + return this;
  175 + }
  176 +
  177 + @Override
  178 + public String toString() {
  179 +
  180 + StringBuilder sql = new StringBuilder("select ");
  181 +
  182 + if (distinct) {
  183 + sql.append("distinct ");
  184 + }
  185 +
  186 + if (columns.size() == 0) {
  187 + sql.append("*");
  188 + } else {
  189 + appendList(sql, columns, "", ", ");
  190 + }
  191 +
  192 + appendList(sql, tables, " from ", ", ");
  193 + appendList(sql, joins, " join ", " join ");
  194 + appendList(sql, leftJoins, " left join ", " left join ");
  195 + appendList(sql, wheres, " where ", " and ");
  196 + appendList(sql, groupBys, " group by ", ", ");
  197 + appendList(sql, havings, " having ", " and ");
  198 + appendList(sql, unions, " union ", " union ");
  199 + appendList(sql, orderBys, " order by ", ", ");
  200 + if (limit > 0) {
  201 + sql.append(" limit ");
  202 + sql.append(limit);
  203 +
  204 + sql.append(" offset ");
  205 + sql.append(offset);
  206 + }
  207 +
  208 + if (forUpdate) {
  209 + sql.append(" for update");
  210 + if (noWait) {
  211 + sql.append(" nowait");
  212 + }
  213 + }
  214 +
  215 + return sql.toString();
  216 + }
  217 +
  218 + /**
  219 + * Adds a "union" select builder. The generated SQL will union this query
  220 + * with the result of the main query. The provided builder must have the
  221 + * same columns as the parent select builder and must not use "order by" or
  222 + * "for update".
  223 + */
  224 + public SelectBuilder union(SelectBuilder unionBuilder) {
  225 + unions.add(unionBuilder);
  226 + return this;
  227 + }
  228 +
  229 + public SelectBuilder where(String expr) {
  230 + wheres.add(expr);
  231 + return this;
  232 + }
  233 +
  234 + public SelectBuilder offset(Long offset) {
  235 + this.offset = offset;
  236 + return this;
  237 + }
  238 +
  239 + public SelectBuilder limit(Long limit) {
  240 + this.limit = limit;
  241 + return this;
  242 + }
  243 +}
  1 +package com.aries.crawler.sqlbuilder;
  2 +
  3 +/**
  4 + * @author arowana
  5 + */
  6 +public class SubSelectBuilder extends SelectBuilder {
  7 +
  8 + private String alias;
  9 +
  10 + public SubSelectBuilder(String alias) {
  11 + this.alias = alias;
  12 + }
  13 +
  14 + protected SubSelectBuilder(SubSelectBuilder other) {
  15 + super(other);
  16 + this.alias = other.alias;
  17 + }
  18 +
  19 + @Override
  20 + public SubSelectBuilder clone() {
  21 + return new SubSelectBuilder(this);
  22 + }
  23 +
  24 + @Override
  25 + public String toString() {
  26 + return new StringBuilder()
  27 + .append("(")
  28 + .append(super.toString())
  29 + .append(") as ")
  30 + .append(alias)
  31 + .toString();
  32 + }
  33 +}
  1 +package com.aries.crawler.sqlbuilder;
  2 +
  3 +import java.util.ArrayList;
  4 +import java.util.List;
  5 +
  6 +/**
  7 + * @author arowana
  8 + */
  9 +public class UpdateBuilder extends AbstractSqlBuilder {
  10 +
  11 + private String table;
  12 +
  13 + private List<String> sets = new ArrayList<String>();
  14 +
  15 + private List<String> wheres = new ArrayList<String>();
  16 +
  17 + public UpdateBuilder(String table) {
  18 + this.table = table;
  19 + }
  20 +
  21 + public UpdateBuilder set(String expr) {
  22 + sets.add(expr);
  23 + return this;
  24 + }
  25 +
  26 + @Override
  27 + public String toString() {
  28 + var sql = new StringBuilder("update ")
  29 + .append(table);
  30 + appendList(sql, sets, " set ", ", ");
  31 + appendList(sql, wheres, " where ", " and ");
  32 + return sql.toString();
  33 + }
  34 +
  35 + public UpdateBuilder where(String expr) {
  36 + wheres.add(expr);
  37 + return this;
  38 + }
  39 +
  40 +}
  1 +package com.aries.crawler.tools;
  2 +
  3 +import io.vertx.core.Vertx;
  4 +import io.vertx.core.json.JsonObject;
  5 +import io.vertx.ext.jdbc.JDBCClient;
  6 +
  7 +/**
  8 + * 用于获取mysql客户端、连接池
  9 + *
  10 + * @author arowana
  11 + */
  12 +public class MySqlClientHelper {
  13 + private static volatile JDBCClient jdbcClient;
  14 +
  15 + /**
  16 + * 防止实例化
  17 + */
  18 + private MySqlClientHelper() {
  19 + }
  20 +
  21 + /**
  22 + * 双重校验, 单例模式, 创建mysql连接池
  23 + *
  24 + * @param vertx 全局vertx
  25 + * @return 数据库连接池
  26 + */
  27 + public static JDBCClient getJDBcClient(Vertx vertx) {
  28 + if (jdbcClient == null) {
  29 + synchronized (MySqlClientHelper.class) {
  30 + if (jdbcClient == null) {
  31 + JsonObject dbConfig = new JsonObject();
  32 + dbConfig.put("url", "jdbc:mysql://localhost:3306/douyin_crawler");
  33 + dbConfig.put("driver_class", "com.mysql.jdbc.Driver");
  34 + dbConfig.put("user", "root");
  35 + dbConfig.put("password", "1qaz2wsx"); // 反正是localhost, 密码随便看
  36 +// dbConfig.put("provider_class", "io.vertx.ext.jdbc.spi.impl.HikariCPDataSourceProvider");
  37 + dbConfig.put("maximumPoolSize", 200);
  38 + dbConfig.put("cachePrepStmts", true);
  39 + dbConfig.put("prepStmtCacheSize", 250);
  40 + dbConfig.put("prepStmtCacheSqlLimit", 2048);
  41 + jdbcClient = JDBCClient.createShared(vertx, dbConfig, "my-data-pool¬");
  42 + }
  43 + }
  44 + }
  45 + return jdbcClient;
  46 + }
  47 +
  48 +}
  1 +package com.aries.crawler.tools;
  2 +
  3 +import io.vertx.core.AsyncResult;
  4 +import io.vertx.core.Handler;
  5 +import io.vertx.core.Vertx;
  6 +import io.vertx.core.json.JsonArray;
  7 +import io.vertx.core.logging.Logger;
  8 +import io.vertx.core.logging.LoggerFactory;
  9 +import io.vertx.ext.sql.ResultSet;
  10 +
  11 +import java.util.List;
  12 +
  13 +/**
  14 + * @author arowana
  15 + */
  16 +public class MySqlExecuteHelper {
  17 + private static final Logger logger = LoggerFactory.getLogger(MySqlExecuteHelper.class);
  18 +
  19 + /**
  20 + * 防止实例化
  21 + */
  22 + private MySqlExecuteHelper() {
  23 +
  24 + }
  25 +
  26 + /**
  27 + * @param vertx 全局vertx
  28 + * @param sql 要执行的sql语句
  29 + * @param arguments sql参数
  30 + * @param handler 回调
  31 + */
  32 + public static void prepareExecute(Vertx vertx, String sql, List<Object> arguments, Handler<AsyncResult<ResultSet>> handler) {
  33 + var jdbcClient = MySqlClientHelper.getJDBcClient(vertx);
  34 + // 构造参数
  35 + JsonArray params = new JsonArray();
  36 +
  37 + for (Object argument : arguments) {
  38 + params.add(argument);
  39 + }
  40 +
  41 + // 执行查询
  42 + jdbcClient.queryWithParams(sql, params, handler);
  43 + }
  44 +
  45 + public static void execute(Vertx vertx, String sql, Handler<AsyncResult<ResultSet>> handler) {
  46 + logger.debug("准备执行sql: " + sql);
  47 + MySqlClientHelper.getJDBcClient(vertx).getConnection(connectionHandlerRes -> {
  48 + if (connectionHandlerRes.succeeded()) {
  49 + var connection = connectionHandlerRes.result();
  50 + connection.query(sql, handler);
  51 + connection.close();
  52 + } else {
  53 + logger.error("Could not connect: " + connectionHandlerRes.cause().getMessage());
  54 + }
  55 + });
  56 + }
  57 +}
  1 +package com.aries.crawler.tools;
  2 +
  3 +import com.aries.crawler.annotation.MysqlField;
  4 +import com.aries.crawler.model.DataModelable;
  5 +import com.aries.crawler.verticles.WideDataPickUpVerticle;
  6 +import io.vertx.core.json.JsonObject;
  7 +import io.vertx.core.logging.Logger;
  8 +import io.vertx.core.logging.LoggerFactory;
  9 +
  10 +import java.lang.reflect.Constructor;
  11 +import java.lang.reflect.Field;
  12 +
  13 +/**
  14 + * vert.x没有orm, 使用mysql获取到的数据很难转化为对象。
  15 + * 平时只能这样使用:
  16 + * <code>
  17 + * DouYinCrawlerLogModel model = new DouYinCrawlerLogModel();
  18 + * model.setFirstName(row.getString("first_name"));
  19 + * model.setMale(row.getBoolean("male"));
  20 + * model.setAge(row.getInteger("age"));
  21 + * ...
  22 + * ...
  23 + * <code/>
  24 + * 可见, 想要把数据库对象row转化为model需要花费数行代码, 该表中的字段越多, 代码函数耗费的越多。
  25 + * 所以, 我实现了本方法, 只需一行代码即可将数据库对象Row转化为Model对象, 弥补了vert.x没有orm的不便之处:
  26 + * <code>
  27 + * DouYinCrawlerLogModel model = MySQLHelper.getModel(row, DouYinCrawlerLogModel.class);
  28 + * </code>
  29 + *
  30 + * @author arowana
  31 + */
  32 +public class Orm {
  33 +
  34 + private static final Logger logger = LoggerFactory.getLogger(WideDataPickUpVerticle.class);
  35 +
  36 + /**
  37 + * 防止实例化
  38 + */
  39 + private Orm() {
  40 +
  41 + }
  42 +
  43 + /**
  44 + * @param row 从vert.x-mysql获取到的数据数据行对象
  45 + * @param clazz 要转化的model类
  46 + * @param <T> 继承DataModel的某个类
  47 + * @return 返回clazz类型的一个实例
  48 + */
  49 + public static <T extends DataModelable> T getModel(JsonObject row, Class<T> clazz) {
  50 + try {
  51 + // 反射获取clazz的构造器
  52 + Constructor<? extends T> constructor = clazz.getConstructor();
  53 + // 利用反射的构造器实例化
  54 + T dataModel = constructor.newInstance();
  55 + // 获取所有字段
  56 + Field[] dataModelFields = clazz.getDeclaredFields();
  57 + // 如果存在字段
  58 + if (dataModelFields.length != 0) {
  59 + for (Field dataModelField : dataModelFields) {
  60 + dataModelField.setAccessible(true);
  61 + MysqlField annotation = dataModelField.getAnnotation(MysqlField.class);
  62 + if (annotation != null) {
  63 + Object columnValue = row.getValue(annotation.alias());
  64 + // 将值反射到dataModel里
  65 + dataModelField.set(dataModel, columnValue);
  66 + }
  67 + }
  68 + }
  69 + return dataModel;
  70 + } catch (Exception e) {
  71 + logger.error("exception in MysqlHelper.getModel(Row, Class): ", e);
  72 + }
  73 + return null;
  74 + }
  75 +}
  1 +package com.aries.crawler.tools;
  2 +
  3 +/**
  4 + * @author arowana
  5 + */
  6 +public final class Urls {
  7 + private static final String CM = "://";
  8 + private static final String USER_SHARE_PAGE_TEMPLATE = "https://www.iesdouyin.com/share/user/%d";
  9 +
  10 + /**
  11 + * 防止实例化
  12 + */
  13 + private Urls() {
  14 +
  15 + }
  16 +
  17 + /**
  18 + * 获取用户分享页的url
  19 + */
  20 + public static String getUserSharePage(final Long uid) {
  21 + return String.format(USER_SHARE_PAGE_TEMPLATE, uid);
  22 + }
  23 +
  24 + /**
  25 + * @param url http(s)://www.kkk.com/xxx/yyy
  26 + * @return
  27 + */
  28 + public static RequestInfo getInfo(String url) {
  29 + var cmIndex = url.indexOf(CM);
  30 + var hostStart = cmIndex + CM.length();
  31 + var hostEnd = url.indexOf("/", hostStart);
  32 + var host = url.substring(hostStart, hostEnd);//前缀https
  33 + var path = url.substring(hostEnd);
  34 + return new RequestInfo(host, path);
  35 + }
  36 +
  37 + public static record RequestInfo(String host, String path) {
  38 +
  39 + }
  40 +}
  1 +package com.aries.crawler.trans;
  2 +
  3 +/**
  4 + * @author arowana
  5 + */
  6 +
  7 +public enum EventBusTopic {
  8 + /**
  9 + * 抖音用户数据插入
  10 + */
  11 + LOGIC_DOUYIN_WIDEDATA_DISPATCH("logic.douyin.widedata.dispatch"),
  12 + MYSQL_DOUYIN_USER_INSERT("mysql.douyin.user.insert"),
  13 + MYSQL_DOUYIN_VIDEO_INSERT("mysql.douyin.video.insert"),
  14 + MYSQL_DOUYIN_WIDEDATA_UPDATE_STATUS_VIDEO("mysql.douyin.widedata.update.status.video"),
  15 + MYSQL_DOUYIN_WIDEDATA_UPDATE_STATUS_USER("mysql.douyin.widedata.update.status.user"),
  16 + LOGIC_DOUYIN_VIDEO_DOWNLOAD("logic.douyin.video.url.parse"),
  17 + MYSQL_DOUYIN_VIDEO__UPDATE_STATUS_DOWNLOADED("mysql.douyin.video.update.status.downloaded"),
  18 + MYSQL_DOUYIN_VIDEO__UPDATE_STATUS_FAILED("mysql.douyin.video.update.status.failed");
  19 +
  20 + EventBusTopic(String topic) {
  21 + this.topic = topic;
  22 + }
  23 +
  24 + private String topic;
  25 +
  26 + public String getTopic() {
  27 + return topic;
  28 + }
  29 +}
  1 +package com.aries.crawler.trans;
  2 +
  3 +/**
  4 + * @author arowana
  5 + */
  6 +public interface Messagable {
  7 +}
  1 +package com.aries.crawler.trans.codec;
  2 +
  3 +import com.aries.crawler.trans.Messagable;
  4 +import io.vertx.core.buffer.Buffer;
  5 +import io.vertx.core.eventbus.MessageCodec;
  6 +import io.vertx.core.json.JsonObject;
  7 +
  8 +import java.lang.reflect.ParameterizedType;
  9 +import java.util.UUID;
  10 +
  11 +/**
  12 + * common message codec(通用的codec)
  13 + *
  14 + * @author arowana
  15 + */
  16 +public class CommonMessageCodec<T extends Messagable> implements MessageCodec<T, T> {
  17 + @Override
  18 + public void encodeToWire(Buffer buffer, T message) {
  19 + // Easiest ways is using JSON object
  20 + JsonObject jsonToEncode = JsonObject.mapFrom(message);
  21 +
  22 + // Encode object to string
  23 + String jsonToStr = jsonToEncode.encode();
  24 +
  25 + // Length of JSON: is NOT characters count
  26 + int length = jsonToStr.getBytes().length;
  27 +
  28 + // Write data into given buffer
  29 + buffer.appendInt(length);
  30 + buffer.appendString(jsonToStr);
  31 + }
  32 +
  33 + @Override
  34 + public T decodeFromWire(int position, Buffer buffer) {
  35 + // My custom message starting from this *position* of buffer
  36 + int _pos = position;
  37 +
  38 + // Length of JSON
  39 + int length = buffer.getInt(_pos);
  40 +
  41 + // Get JSON string by it`s length
  42 + // Jump 4 because getInt() == 4 bytes
  43 + String jsonStr = buffer.getString(_pos += 4, _pos += length);
  44 + JsonObject jsonObject = new JsonObject(jsonStr);
  45 +
  46 + // Get fields
  47 + @SuppressWarnings("unchecked")
  48 + Class<T> entityClass = (Class<T>) ((ParameterizedType) getClass().getGenericSuperclass()).getActualTypeArguments()[0];
  49 +
  50 + return jsonObject.mapTo(entityClass);
  51 + }
  52 +
  53 + @Override
  54 + public T transform(T commonMessage) {
  55 + return commonMessage;
  56 + }
  57 +
  58 + @Override
  59 + public String name() {
  60 + return this.getClass().getSimpleName() + UUID.randomUUID();
  61 + }
  62 +
  63 + @Override
  64 + public byte systemCodecID() {
  65 + return -1;
  66 + }
  67 +}
  1 +package com.aries.crawler.trans.message;
  2 +
  3 +import com.aries.crawler.trans.Messagable;
  4 +
  5 +/**
  6 + * immutable,通用message
  7 + *
  8 + * @author arowana
  9 + */
  10 +public record CommonResponseMessage<T>(Integer code, T message) implements Messagable {
  11 + public static final CommonResponseMessage<Object> COMMON_SUCCESS_MESSAGE = CommonResponseMessage.CommonResponseMessageBuilder
  12 + .aCommonResponseMessage()
  13 + .code(100)
  14 + .message("success")
  15 + .build();
  16 +
  17 + public static final CommonResponseMessage<Object> COMMON_FAILED_MESSAGE = CommonResponseMessage.CommonResponseMessageBuilder
  18 + .aCommonResponseMessage()
  19 + .code(1000)
  20 + .message("failed")
  21 + .build();
  22 +
  23 + public static final class CommonResponseMessageBuilder<T> {
  24 + private Integer code;
  25 + private T message;
  26 +
  27 + private CommonResponseMessageBuilder() {
  28 + }
  29 +
  30 + public static <T> CommonResponseMessageBuilder<T> aCommonResponseMessage() {
  31 + return new CommonResponseMessageBuilder<T>();
  32 + }
  33 +
  34 + public CommonResponseMessageBuilder<T> code(Integer code) {
  35 + this.code = code;
  36 + return this;
  37 + }
  38 +
  39 + public CommonResponseMessageBuilder<T> message(T message) {
  40 + this.message = message;
  41 + return this;
  42 + }
  43 +
  44 + public CommonResponseMessage<T> build() {
  45 + return new CommonResponseMessage<>(code, message);
  46 + }
  47 + }
  48 +}
  1 +package com.aries.crawler.trans.message;
  2 +
  3 +import com.aries.crawler.tools.Urls;
  4 +import com.aries.crawler.trans.Messagable;
  5 +
  6 +/**
  7 + * immutable message, 用户数据message
  8 + *
  9 + * @author arowana
  10 + */
  11 +
  12 +public record DouyinUserInfoMessage(Long uid, Long shortId, String nickname, String signature,
  13 + String avatarLargerUrl, String shareUrl,
  14 + String shareInfoQrCodeUrl) implements Messagable {
  15 +
  16 + public static DouyinUserInfoMessage of(DouyinWideDataMessage wideDataMessage) {
  17 + return new DouyinUserInfoMessageBuilder()
  18 + .uid(wideDataMessage.authorUid())
  19 + .shortId(wideDataMessage.authorShortId())
  20 + .nickname(wideDataMessage.authorNickname())
  21 + .signature(wideDataMessage.authorSignature())
  22 + .avatarLargerUrl(wideDataMessage.avatarLargerUrl())
  23 + .shareUrl(Urls.getUserSharePage(wideDataMessage.authorUid()))
  24 + .shareInfoQrCodeUrl(wideDataMessage.authorShareInfoQrcodeUrl())
  25 + .build();
  26 + }
  27 +
  28 + public static final class DouyinUserInfoMessageBuilder {
  29 + private Long uid;
  30 + private Long shortId;
  31 + private String nickname;
  32 + private String signature;
  33 + private String avatarLargerUrl;
  34 + private String shareUrl;
  35 + private String shareInfoQrCodeUrl;
  36 +
  37 + private DouyinUserInfoMessageBuilder() {
  38 + }
  39 +
  40 + public static DouyinUserInfoMessageBuilder aDouyinUserInfoMessage() {
  41 + return new DouyinUserInfoMessageBuilder();
  42 + }
  43 +
  44 + public DouyinUserInfoMessageBuilder uid(Long uid) {
  45 + this.uid = uid;
  46 + return this;
  47 + }
  48 +
  49 + public DouyinUserInfoMessageBuilder shortId(Long shortId) {
  50 + this.shortId = shortId;
  51 + return this;
  52 + }
  53 +
  54 + public DouyinUserInfoMessageBuilder nickname(String nickname) {
  55 + this.nickname = nickname;
  56 + return this;
  57 + }
  58 +
  59 + public DouyinUserInfoMessageBuilder signature(String signature) {
  60 + this.signature = signature;
  61 + return this;
  62 + }
  63 +
  64 + public DouyinUserInfoMessageBuilder avatarLargerUrl(String avatarLargerUrl) {
  65 + this.avatarLargerUrl = avatarLargerUrl;
  66 + return this;
  67 + }
  68 +
  69 + public DouyinUserInfoMessageBuilder shareUrl(String shareUrl) {
  70 + this.shareUrl = shareUrl;
  71 + return this;
  72 + }
  73 +
  74 + public DouyinUserInfoMessageBuilder shareInfoQrCodeUrl(String shareInfoQrCodeUrl) {
  75 + this.shareInfoQrCodeUrl = shareInfoQrCodeUrl;
  76 + return this;
  77 + }
  78 +
  79 + public DouyinUserInfoMessage build() {
  80 + return new DouyinUserInfoMessage(uid, shortId, nickname, signature, avatarLargerUrl, shareUrl, shareInfoQrCodeUrl);
  81 + }
  82 + }
  83 +}
  1 +package com.aries.crawler.trans.message;
  2 +
  3 +import com.aries.crawler.model.douyincrawler.DouyinVideoModel;
  4 +import com.aries.crawler.trans.Messagable;
  5 +
  6 +/**
  7 + * immutable message, 视频数据message
  8 + *
  9 + * @author arowana
  10 + */
  11 +public record DouyinVideoInfoMessage(Long awemeId, Long authorUid, String awemeDesc, Long awemeCreateTime,
  12 + String videoCoverUrl, String videoDynamicCoverUrl, String videoDownloadAddrUrl,
  13 + String videoShareUrl, String videoVideoTag, Long videoDuration,
  14 + Integer type) implements Messagable {
  15 +
  16 + public static DouyinVideoInfoMessage of(DouyinWideDataMessage wideDataMessage) {
  17 + return new DouyinVideoInfoMessageBuilder()
  18 + .awemeId(wideDataMessage.awemeId())
  19 + .awemeDesc(wideDataMessage.awemeDesc())
  20 + .awemeCreateTime(wideDataMessage.awemeCreateTime())
  21 + .authorUid(wideDataMessage.authorUid())
  22 + .videoCoverUrl(wideDataMessage.videoCoverUrl())
  23 + .videoDynamicCoverUrl(wideDataMessage.videoDynamicCoverUrl())
  24 + .videoDownloadAddrUrl(wideDataMessage.videoDownloadAddrUrl())
  25 + .videoShareUrl(wideDataMessage.videoShareUrl())
  26 + .videoVideoTag(wideDataMessage.videoVideoTag())
  27 + .videoDuration(wideDataMessage.videoDuration())
  28 + .type(wideDataMessage.type())
  29 + .build();
  30 + }
  31 +
  32 + public static DouyinVideoInfoMessage of(DouyinVideoModel douyinVideoModel) {
  33 + return new DouyinVideoInfoMessageBuilder()
  34 + .awemeId(douyinVideoModel.getId())
  35 + .awemeDesc(douyinVideoModel.getComments())
  36 + .awemeCreateTime(douyinVideoModel.getCreateTime())
  37 + .authorUid(douyinVideoModel.getUid())
  38 + .videoCoverUrl(douyinVideoModel.getCoverUrl())
  39 + .videoDynamicCoverUrl(douyinVideoModel.getDynamicCoverUrl())
  40 + .videoDownloadAddrUrl(douyinVideoModel.getDownloadAddrUrl())
  41 + .videoShareUrl(douyinVideoModel.getShareUrl())
  42 + .videoVideoTag(douyinVideoModel.getTag())
  43 + .videoDuration(douyinVideoModel.getDuration())
  44 + .type(douyinVideoModel.getType())
  45 + .build();
  46 + }
  47 +
  48 + public static final class DouyinVideoInfoMessageBuilder {
  49 + private Long awemeId;
  50 + private Long authorUid;
  51 + private String awemeDesc;
  52 + private Long awemeCreateTime;
  53 + private String videoCoverUrl;
  54 + private String videoDynamicCoverUrl;
  55 + private String videoDownloadAddrUrl;
  56 + private String videoShareUrl;
  57 + private String videoVideoTag;
  58 + private Long videoDuration;
  59 + private Integer type;
  60 +
  61 + private DouyinVideoInfoMessageBuilder() {
  62 + }
  63 +
  64 + public static DouyinVideoInfoMessageBuilder aDouyinVideoInfoMessage() {
  65 + return new DouyinVideoInfoMessageBuilder();
  66 + }
  67 +
  68 + public DouyinVideoInfoMessageBuilder awemeId(Long awemeId) {
  69 + this.awemeId = awemeId;
  70 + return this;
  71 + }
  72 +
  73 + public DouyinVideoInfoMessageBuilder authorUid(Long authorUid) {
  74 + this.authorUid = authorUid;
  75 + return this;
  76 + }
  77 +
  78 + public DouyinVideoInfoMessageBuilder awemeDesc(String awemeDesc) {
  79 + this.awemeDesc = awemeDesc;
  80 + return this;
  81 + }
  82 +
  83 + public DouyinVideoInfoMessageBuilder awemeCreateTime(Long awemeCreateTime) {
  84 + this.awemeCreateTime = awemeCreateTime;
  85 + return this;
  86 + }
  87 +
  88 + public DouyinVideoInfoMessageBuilder videoCoverUrl(String videoCoverUrl) {
  89 + this.videoCoverUrl = videoCoverUrl;
  90 + return this;
  91 + }
  92 +
  93 + public DouyinVideoInfoMessageBuilder videoDynamicCoverUrl(String videoDynamicCoverUrl) {
  94 + this.videoDynamicCoverUrl = videoDynamicCoverUrl;
  95 + return this;
  96 + }
  97 +
  98 + public DouyinVideoInfoMessageBuilder videoDownloadAddrUrl(String videoDownloadAddrUrl) {
  99 + this.videoDownloadAddrUrl = videoDownloadAddrUrl;
  100 + return this;
  101 + }
  102 +
  103 + public DouyinVideoInfoMessageBuilder videoShareUrl(String videoShareUrl) {
  104 + this.videoShareUrl = videoShareUrl;
  105 + return this;
  106 + }
  107 +
  108 + public DouyinVideoInfoMessageBuilder videoVideoTag(String videoVideoTag) {
  109 + this.videoVideoTag = videoVideoTag;
  110 + return this;
  111 + }
  112 +
  113 + public DouyinVideoInfoMessageBuilder videoDuration(Long videoDuration) {
  114 + this.videoDuration = videoDuration;
  115 + return this;
  116 + }
  117 +
  118 + public DouyinVideoInfoMessageBuilder type(Integer type) {
  119 + this.type = type;
  120 + return this;
  121 + }
  122 +
  123 + public DouyinVideoInfoMessage build() {
  124 + return new DouyinVideoInfoMessage(awemeId, authorUid, awemeDesc, awemeCreateTime, videoCoverUrl, videoDynamicCoverUrl, videoDownloadAddrUrl, videoShareUrl, videoVideoTag, videoDuration, type);
  125 + }
  126 + }
  127 +}
  1 +package com.aries.crawler.trans.message;
  2 +
  3 +import com.aries.crawler.model.douyincrawler.DouyinCrawlerLogModel;
  4 +import com.aries.crawler.trans.Messagable;
  5 +
  6 +import java.math.BigInteger;
  7 +
  8 +/**
  9 + * immutable message, 宽表数据message
  10 + *
  11 + * @author arowana
  12 + */
  13 +public record DouyinWideDataMessage(BigInteger id, Long awemeId, String awemeDesc, Long awemeCreateTime,
  14 + Long authorUid, Long authorShortId, String authorNickname, String authorSignature,
  15 + String avatarLargerUrl, String authorShareInfoQrcodeUrl,
  16 + String videoCoverUrl, String videoDynamicCoverUrl, String videoDownloadAddrUrl,
  17 + String videoShareUrl, String videoVideoTag, Long videoDuration,
  18 + Integer type, Integer status, String ct, String ut) implements Messagable {
  19 +
  20 + public static DouyinWideDataMessage of(DouyinCrawlerLogModel douyinCrawlerLogModel) {
  21 + return new DouyinWideDataMessageBuilder()
  22 + .id(douyinCrawlerLogModel.getId())
  23 + .awemeId(douyinCrawlerLogModel.getAwemeId())
  24 + .awemeDesc(douyinCrawlerLogModel.getAwemeDesc())
  25 + .awemeCreateTime(douyinCrawlerLogModel.getAwemeCreateTime())
  26 + .authorUid(douyinCrawlerLogModel.getAuthorUid())
  27 + .authorShortId(douyinCrawlerLogModel.getAuthorShortId())
  28 + .authorNickname(douyinCrawlerLogModel.getAuthorNickname())
  29 + .authorSignature(douyinCrawlerLogModel.getAuthorSignature())
  30 + .avatarLargerUrl(douyinCrawlerLogModel.getAvatarLargerUrl())
  31 + .authorShareInfoQrcodeUrl(douyinCrawlerLogModel.getAuthorShareInfoQrcodeUrl())
  32 + .videoCoverUrl(douyinCrawlerLogModel.getVideoCoverUrl())
  33 + .videoDynamicCoverUrl(douyinCrawlerLogModel.getVideoDynamicCoverUrl())
  34 + .videoDownloadAddrUrl(douyinCrawlerLogModel.getVideoDownloadAddrUrl())
  35 + .videoShareUrl(douyinCrawlerLogModel.getVideoShareUrl())
  36 + .videoVideoTag(douyinCrawlerLogModel.getVideoVideoTag())
  37 + .videoDuration(douyinCrawlerLogModel.getVideoDuration())
  38 + .type(douyinCrawlerLogModel.getType())
  39 + .status(douyinCrawlerLogModel.getStatus())
  40 + .ct(douyinCrawlerLogModel.getCt())
  41 + .ut(douyinCrawlerLogModel.getUt())
  42 + .build();
  43 + }
  44 +
  45 + public static final class DouyinWideDataMessageBuilder {
  46 + private BigInteger id;
  47 + private Long awemeId;
  48 + private String awemeDesc;
  49 + private Long awemeCreateTime;
  50 + private Long authorUid;
  51 + private Long authorShortId;
  52 + private String authorNickname;
  53 + private String authorSignature;
  54 + private String avatarLargerUrl;
  55 + private String authorShareInfoQrcodeUrl;
  56 + private String videoCoverUrl;
  57 + private String videoDynamicCoverUrl;
  58 + private String videoDownloadAddrUrl;
  59 + private String videoShareUrl;
  60 + private String videoVideoTag;
  61 + private Long videoDuration;
  62 + private Integer type;
  63 + private Integer status;
  64 + private String ct;
  65 + private String ut;
  66 +
  67 + private DouyinWideDataMessageBuilder() {
  68 + }
  69 +
  70 + public static DouyinWideDataMessageBuilder aDouyinWideDataMessage() {
  71 + return new DouyinWideDataMessageBuilder();
  72 + }
  73 +
  74 + public DouyinWideDataMessageBuilder id(BigInteger id) {
  75 + this.id = id;
  76 + return this;
  77 + }
  78 +
  79 + public DouyinWideDataMessageBuilder awemeId(Long awemeId) {
  80 + this.awemeId = awemeId;
  81 + return this;
  82 + }
  83 +
  84 + public DouyinWideDataMessageBuilder awemeDesc(String awemeDesc) {
  85 + this.awemeDesc = awemeDesc;
  86 + return this;
  87 + }
  88 +
  89 + public DouyinWideDataMessageBuilder awemeCreateTime(Long awemeCreateTime) {
  90 + this.awemeCreateTime = awemeCreateTime;
  91 + return this;
  92 + }
  93 +
  94 + public DouyinWideDataMessageBuilder authorUid(Long authorUid) {
  95 + this.authorUid = authorUid;
  96 + return this;
  97 + }
  98 +
  99 + public DouyinWideDataMessageBuilder authorShortId(Long authorShortId) {
  100 + this.authorShortId = authorShortId;
  101 + return this;
  102 + }
  103 +
  104 + public DouyinWideDataMessageBuilder authorNickname(String authorNickname) {
  105 + this.authorNickname = authorNickname;
  106 + return this;
  107 + }
  108 +
  109 + public DouyinWideDataMessageBuilder authorSignature(String authorSignature) {
  110 + this.authorSignature = authorSignature;
  111 + return this;
  112 + }
  113 +
  114 + public DouyinWideDataMessageBuilder avatarLargerUrl(String avatarLargerUrl) {
  115 + this.avatarLargerUrl = avatarLargerUrl;
  116 + return this;
  117 + }
  118 +
  119 + public DouyinWideDataMessageBuilder authorShareInfoQrcodeUrl(String authorShareInfoQrcodeUrl) {
  120 + this.authorShareInfoQrcodeUrl = authorShareInfoQrcodeUrl;
  121 + return this;
  122 + }
  123 +
  124 + public DouyinWideDataMessageBuilder videoCoverUrl(String videoCoverUrl) {
  125 + this.videoCoverUrl = videoCoverUrl;
  126 + return this;
  127 + }
  128 +
  129 + public DouyinWideDataMessageBuilder videoDynamicCoverUrl(String videoDynamicCoverUrl) {
  130 + this.videoDynamicCoverUrl = videoDynamicCoverUrl;
  131 + return this;
  132 + }
  133 +
  134 + public DouyinWideDataMessageBuilder videoDownloadAddrUrl(String videoDownloadAddrUrl) {
  135 + this.videoDownloadAddrUrl = videoDownloadAddrUrl;
  136 + return this;
  137 + }
  138 +
  139 + public DouyinWideDataMessageBuilder videoShareUrl(String videoShareUrl) {
  140 + this.videoShareUrl = videoShareUrl;
  141 + return this;
  142 + }
  143 +
  144 + public DouyinWideDataMessageBuilder videoVideoTag(String videoVideoTag) {
  145 + this.videoVideoTag = videoVideoTag;
  146 + return this;
  147 + }
  148 +
  149 + public DouyinWideDataMessageBuilder videoDuration(Long videoDuration) {
  150 + this.videoDuration = videoDuration;
  151 + return this;
  152 + }
  153 +
  154 + public DouyinWideDataMessageBuilder type(Integer type) {
  155 + this.type = type;
  156 + return this;
  157 + }
  158 +
  159 + public DouyinWideDataMessageBuilder status(Integer status) {
  160 + this.status = status;
  161 + return this;
  162 + }
  163 +
  164 + public DouyinWideDataMessageBuilder ct(String ct) {
  165 + this.ct = ct;
  166 + return this;
  167 + }
  168 +
  169 + public DouyinWideDataMessageBuilder ut(String ut) {
  170 + this.ut = ut;
  171 + return this;
  172 + }
  173 +
  174 + public DouyinWideDataMessage build() {
  175 + return new DouyinWideDataMessage(id, awemeId, awemeDesc, awemeCreateTime, authorUid, authorShortId, authorNickname, authorSignature, avatarLargerUrl, authorShareInfoQrcodeUrl, videoCoverUrl, videoDynamicCoverUrl, videoDownloadAddrUrl, videoShareUrl, videoVideoTag, videoDuration, type, status, ct, ut);
  176 + }
  177 + }
  178 +}
  1 +package com.aries.crawler.trans.message;
  2 +
  3 +import com.aries.crawler.trans.Messagable;
  4 +
  5 +import java.math.BigInteger;
  6 +
  7 +/**
  8 + * immutable message, 用户数据message
  9 + *
  10 + * @author arowana
  11 + */
  12 +public record SimpleInt64Message(BigInteger id) implements Messagable {
  13 +}
  1 +package com.aries.crawler.verticles;
  2 +
  3 +import com.aries.crawler.sqlbuilder.UpdateBuilder;
  4 +import com.aries.crawler.tools.MySqlExecuteHelper;
  5 +import com.aries.crawler.trans.message.SimpleInt64Message;
  6 +import io.vertx.core.AbstractVerticle;
  7 +import io.vertx.core.eventbus.Message;
  8 +import io.vertx.core.logging.Logger;
  9 +import io.vertx.core.logging.LoggerFactory;
  10 +
  11 +import java.time.LocalDateTime;
  12 +import java.time.format.DateTimeFormatter;
  13 +
  14 +import static com.aries.crawler.trans.EventBusTopic.*;
  15 +import static com.aries.crawler.trans.message.CommonResponseMessage.COMMON_FAILED_MESSAGE;
  16 +import static com.aries.crawler.trans.message.CommonResponseMessage.COMMON_SUCCESS_MESSAGE;
  17 +
  18 +/**
  19 + * @author arowana
  20 + */
  21 +public class UpdateDataVerticle extends AbstractVerticle {
  22 + private static final Logger logger = LoggerFactory.getLogger(UpdateDataVerticle.class);
  23 +
  24 + public static String getDateTimeAsString(LocalDateTime localDateTime, String format) {
  25 + DateTimeFormatter formatter = DateTimeFormatter.ofPattern(format);
  26 + return localDateTime.format(formatter);
  27 + }
  28 +
  29 + @Override
  30 + public void start() {
  31 + // 更新款表的status状态为'已处理用户数据'状态
  32 + vertx.eventBus().consumer(MYSQL_DOUYIN_WIDEDATA_UPDATE_STATUS_USER.getTopic(), this::mysqlDouyinWideDataUpdateStatusUser).setMaxBufferedMessages(4000);
  33 + // 更新款表的status状态为'已处理视频数据'状态
  34 + vertx.eventBus().consumer(MYSQL_DOUYIN_WIDEDATA_UPDATE_STATUS_VIDEO.getTopic(), this::mysqlDouyinWideDataUpdateStatusVideo).setMaxBufferedMessages(4000);
  35 + // 更新完成下载的视频
  36 + vertx.eventBus().consumer(MYSQL_DOUYIN_VIDEO__UPDATE_STATUS_DOWNLOADED.getTopic(), this::mysqlDouyinVideoDataUpdateStatusDownloaded).setMaxBufferedMessages(4000);
  37 + // 更新下载失败的视频
  38 + vertx.eventBus().consumer(MYSQL_DOUYIN_VIDEO__UPDATE_STATUS_FAILED.getTopic(), this::mysqlDouyinVideoDataUpdateStatusFailed).setMaxBufferedMessages(4000);
  39 + }
  40 +
  41 + private void mysqlDouyinWideDataUpdateStatusUser(Message<Object> message) {
  42 + var idMessage = (SimpleInt64Message) message.body();
  43 +
  44 + // 构建sql数据, 插入用户信息。
  45 + var insertBuilder = new UpdateBuilder("douyin_crawler_log")
  46 + .set("status = status | 1")
  47 + .where("id=" + idMessage.id())
  48 + .toString();
  49 +
  50 + MySqlExecuteHelper.execute(vertx, insertBuilder, mysqlExecutorRes -> {
  51 + if (mysqlExecutorRes.succeeded()) {
  52 + message.reply(COMMON_SUCCESS_MESSAGE);
  53 + } else {
  54 + message.reply(COMMON_FAILED_MESSAGE);
  55 + }
  56 + });
  57 + }
  58 +
  59 + private void mysqlDouyinWideDataUpdateStatusVideo(Message<Object> message) {
  60 + var idMessage = (SimpleInt64Message) message.body();
  61 +
  62 + // 构建sql数据, 插入用户信息。
  63 + var insertBuilder = new UpdateBuilder("douyin_crawler_log")
  64 + .set("status = status | 2")
  65 + .where("id=" + idMessage.id())
  66 + .toString();
  67 +
  68 + MySqlExecuteHelper.execute(vertx, insertBuilder, mysqlExecutorRes -> {
  69 + if (mysqlExecutorRes.succeeded()) {
  70 + message.reply(COMMON_SUCCESS_MESSAGE);
  71 + } else {
  72 + message.reply(COMMON_FAILED_MESSAGE);
  73 + }
  74 + });
  75 + }
  76 +
  77 + private void mysqlDouyinVideoDataUpdateStatusDownloaded(Message<Object> message) {
  78 + var idMessage = (SimpleInt64Message) message.body();
  79 +
  80 + // 构建sql数据, 插入用户信息。
  81 + var insertBuilder = new UpdateBuilder("douyin_video_info")
  82 + .set("status = status | 1")
  83 + .where("id=" + idMessage.id())
  84 + .toString();
  85 +
  86 + MySqlExecuteHelper.execute(vertx, insertBuilder, mysqlExecutorRes -> {
  87 + if (mysqlExecutorRes.succeeded()) {
  88 + message.reply(COMMON_SUCCESS_MESSAGE);
  89 + } else {
  90 + message.reply(COMMON_FAILED_MESSAGE);
  91 + }
  92 + });
  93 + }
  94 +
  95 + private void mysqlDouyinVideoDataUpdateStatusFailed(Message<Object> message) {
  96 + var idMessage = (SimpleInt64Message) message.body();
  97 +
  98 + // 构建sql数据, 插入用户信息。
  99 + var insertBuilder = new UpdateBuilder("douyin_video_info")
  100 + .set("status = status | 2")
  101 + .where("id=" + idMessage.id())
  102 + .toString();
  103 +
  104 + MySqlExecuteHelper.execute(vertx, insertBuilder, mysqlExecutorRes -> {
  105 + if (mysqlExecutorRes.succeeded()) {
  106 + message.reply(COMMON_SUCCESS_MESSAGE);
  107 + } else {
  108 + message.reply(COMMON_FAILED_MESSAGE);
  109 + }
  110 + });
  111 + }
  112 +}
  1 +package com.aries.crawler.verticles;
  2 +
  3 +import com.aries.crawler.sqlbuilder.InsertBuilder;
  4 +import com.aries.crawler.tools.MySqlExecuteHelper;
  5 +import com.aries.crawler.trans.message.DouyinUserInfoMessage;
  6 +import io.vertx.core.AbstractVerticle;
  7 +import io.vertx.core.eventbus.Message;
  8 +import io.vertx.core.logging.Logger;
  9 +import io.vertx.core.logging.LoggerFactory;
  10 +
  11 +import java.time.LocalDateTime;
  12 +import java.time.format.DateTimeFormatter;
  13 +
  14 +import static com.aries.crawler.trans.EventBusTopic.MYSQL_DOUYIN_USER_INSERT;
  15 +import static com.aries.crawler.trans.message.CommonResponseMessage.COMMON_FAILED_MESSAGE;
  16 +import static com.aries.crawler.trans.message.CommonResponseMessage.COMMON_SUCCESS_MESSAGE;
  17 +
  18 +/**
  19 + * @author arowana
  20 + */
  21 +public class UserInsertVerticle extends AbstractVerticle {
  22 + private static final Logger logger = LoggerFactory.getLogger(UserInsertVerticle.class);
  23 +
  24 + @Override
  25 + public void start() {
  26 + // 用于插入用户数据
  27 + vertx.eventBus().consumer(MYSQL_DOUYIN_USER_INSERT.getTopic(), this::mysqlDouyinUserInsertHandler).setMaxBufferedMessages(4000);
  28 + }
  29 +
  30 + public static String getDateTimeAsString(LocalDateTime localDateTime, String format) {
  31 + DateTimeFormatter formatter = DateTimeFormatter.ofPattern(format);
  32 + return localDateTime.format(formatter);
  33 + }
  34 +
  35 + private void mysqlDouyinUserInsertHandler(Message<Object> message) {
  36 + var userInfoMessage = (DouyinUserInfoMessage) message.body();
  37 +
  38 + // 构建sql数据, 插入用户信息。
  39 + var insertBuilder = new InsertBuilder("douyin_user_info")
  40 + .set("uid", userInfoMessage.uid())
  41 + .set("short_id", userInfoMessage.shortId())
  42 + .set("nickname", userInfoMessage.nickname())
  43 + .set("signature", userInfoMessage.signature())
  44 + .set("avatar_larger_url", userInfoMessage.avatarLargerUrl())
  45 + .set("share_url", userInfoMessage.shareUrl())
  46 + .set("share_info_qrcode_url", userInfoMessage.shareInfoQrCodeUrl())
  47 + .onDuplicateKeyUpdate("ut", getDateTimeAsString(LocalDateTime.now(), "yyyy-MM-dd HH:mm:ss"));
  48 +
  49 + logger.info("user sql:" + insertBuilder.getSql() + "---values:" + insertBuilder.getValues());
  50 +
  51 + MySqlExecuteHelper.prepareExecute(vertx, insertBuilder.getSql(), insertBuilder.getValues(), mysqlExecutorRes -> {
  52 + if (mysqlExecutorRes.succeeded()) {
  53 + logger.info("insert user succ, uid:" + userInfoMessage.uid());
  54 + message.reply(COMMON_SUCCESS_MESSAGE);
  55 + } else {
  56 + logger.info("insert user. failed" + mysqlExecutorRes.cause());
  57 + message.reply(COMMON_FAILED_MESSAGE);
  58 + }
  59 + });
  60 + }
  61 +}
  1 +package com.aries.crawler.verticles;
  2 +
  3 +import com.aries.crawler.model.douyincrawler.DouyinVideoModel;
  4 +import com.aries.crawler.sqlbuilder.SelectBuilder;
  5 +import com.aries.crawler.tools.MySqlExecuteHelper;
  6 +import com.aries.crawler.tools.Orm;
  7 +import com.aries.crawler.trans.message.DouyinVideoInfoMessage;
  8 +import io.vertx.core.AbstractVerticle;
  9 +import io.vertx.core.json.JsonObject;
  10 +import io.vertx.core.logging.Logger;
  11 +import io.vertx.core.logging.LoggerFactory;
  12 +
  13 +import java.util.ArrayList;
  14 +import java.util.List;
  15 +import java.util.function.Consumer;
  16 +
  17 +import static com.aries.crawler.trans.EventBusTopic.LOGIC_DOUYIN_VIDEO_DOWNLOAD;
  18 +
  19 +/**
  20 + * @author arowana
  21 + */
  22 +public class VideoDataPickUpVerticle extends AbstractVerticle {
  23 + private static final Logger logger = LoggerFactory.getLogger(WideDataPickUpVerticle.class);
  24 +
  25 + private Consumer<Long> consumer = (offset) -> {
  26 + var sql = new SelectBuilder()
  27 + .column("*")
  28 + .from(DouyinVideoModel.TABLE)
  29 + .where(" status = " + DouyinVideoModel.STATUS_VIDEO_DOWNLOAD_DEFAULT)
  30 + .limit(1L)
  31 + .offset(offset)
  32 + .orderBy("ct", false)
  33 + .toString();
  34 +
  35 + logger.info("构建pick up sql: " + sql);
  36 + MySqlExecuteHelper.prepareExecute(vertx, sql, new ArrayList<>(), mysqlExecutorRes -> {
  37 + if (mysqlExecutorRes.succeeded()) {
  38 + List<JsonObject> rows = mysqlExecutorRes.result().getRows();
  39 + for (JsonObject row : rows) {
  40 + var model = Orm.getModel(row, DouyinVideoModel.class);
  41 + DouyinVideoInfoMessage douyinVideoInfoMessage = DouyinVideoInfoMessage.of(model);
  42 + vertx.eventBus().request(LOGIC_DOUYIN_VIDEO_DOWNLOAD.getTopic(), douyinVideoInfoMessage, updateReply -> {
  43 + if (updateReply.succeeded()) {
  44 + logger.info("download video succ ...");
  45 + } else {
  46 + logger.info("download video fail ...");
  47 + }
  48 + });
  49 + }
  50 + } else {
  51 + logger.error("execute download video failed: " + mysqlExecutorRes.cause());
  52 + }
  53 + });
  54 +
  55 + };
  56 +
  57 +
  58 + @Override
  59 + public void start() {
  60 + vertx.setPeriodic(5000, id -> {
  61 + consumer.accept(0L);
  62 + });
  63 +
  64 +
  65 +// vertx.setPeriodic(2000, id -> vertx.executeBlocking(future -> {
  66 +// consumer.accept(0L);
  67 +// consumer.accept(5L);
  68 +// consumer.accept(10L);
  69 +// }, res -> {
  70 + // nothing
  71 +// }));
  72 + }
  73 +}
  1 +package com.aries.crawler.verticles;
  2 +
  3 +import com.aries.crawler.tools.Urls;
  4 +import com.aries.crawler.trans.message.DouyinVideoInfoMessage;
  5 +import com.aries.crawler.trans.message.SimpleInt64Message;
  6 +import io.vertx.core.AbstractVerticle;
  7 +import io.vertx.core.buffer.Buffer;
  8 +import io.vertx.core.eventbus.Message;
  9 +import io.vertx.core.http.HttpClient;
  10 +import io.vertx.core.http.RequestOptions;
  11 +import io.vertx.core.logging.Logger;
  12 +import io.vertx.core.logging.LoggerFactory;
  13 +
  14 +import java.math.BigInteger;
  15 +import java.util.regex.Matcher;
  16 +import java.util.regex.Pattern;
  17 +
  18 +import static com.aries.crawler.trans.EventBusTopic.*;
  19 +
  20 +public class VideoDownloadVerticle extends AbstractVerticle {
  21 + private static final Logger logger = LoggerFactory.getLogger(VideoDownloadVerticle.class);
  22 + private static final String MY_UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.148 Safari/537.36";
  23 +
  24 + @Override
  25 + public void start() {
  26 + // 下载视频
  27 + vertx.eventBus().consumer(LOGIC_DOUYIN_VIDEO_DOWNLOAD.getTopic(), this::mysqlDouyinVideoInsertHandler).setMaxBufferedMessages(4000);
  28 + }
  29 +
  30 + private void mysqlDouyinVideoInsertHandler(Message<Object> message) {
  31 + var videoInfoMessage = (DouyinVideoInfoMessage) message.body();
  32 + HttpClient client = vertx.createHttpClient();
  33 +
  34 + var videoPageUrlInfo = Urls.getInfo(videoInfoMessage.videoShareUrl());
  35 + System.out.println(videoInfoMessage.videoShareUrl());
  36 + client.get(new RequestOptions().setHost(videoPageUrlInfo.host()).setURI(videoPageUrlInfo.path()).addHeader("user-agent", MY_UA), response -> {
  37 + response.bodyHandler(body -> {
  38 + var bodyStr = new String(body.getBytes());
  39 + var addrUrl = getPlayAddrUrl(bodyStr);
  40 + System.out.println(addrUrl);
  41 + downloadMp4(addrUrl, "/Users/arowana/Movies/douyin/", videoInfoMessage.awemeId());
  42 + });
  43 + }).setFollowRedirects(true).end();
  44 + }
  45 +
  46 + private void downloadMp4(String addrUrl, String filePath, Long awemeId) {
  47 + vertx.executeBlocking(future -> {
  48 + if (addrUrl == null || addrUrl.equals("")) {
  49 + future.complete(200);
  50 + } else {
  51 + HttpClient client = vertx.createHttpClient();
  52 + var videoDownloadUrlInfo = Urls.getInfo(addrUrl);
  53 + client.get(new RequestOptions().setHost(videoDownloadUrlInfo.host()).setURI(videoDownloadUrlInfo.path()).addHeader("user-agent", MY_UA), httpResponse -> {
  54 + httpResponse.bodyHandler(httpBody -> {
  55 + final String mp4PathName = filePath + awemeId + ".mp4";
  56 + vertx.fileSystem().exists(mp4PathName, fileRes -> {
  57 + if (!fileRes.result()) {
  58 + vertx.fileSystem().writeFile(mp4PathName, Buffer.buffer(httpBody.getBytes()), fileStoreRes -> {
  59 + if (fileStoreRes.succeeded()) {
  60 + logger.info("file written. pathname:" + mp4PathName);
  61 + } else {
  62 + logger.info("file written failed. pathname: " + mp4PathName + ", cause: " + fileStoreRes.cause());
  63 + }
  64 + });
  65 + }
  66 + });
  67 +
  68 + });
  69 + }).setFollowRedirects(true).end();
  70 + future.complete(100);
  71 + }
  72 + }, res -> {
  73 + if (res.succeeded() && res.result() instanceof Integer s) {
  74 + switch (s) {
  75 + case 100 -> vertx.eventBus().request(MYSQL_DOUYIN_VIDEO__UPDATE_STATUS_DOWNLOADED.getTopic(), new SimpleInt64Message(BigInteger.valueOf(awemeId)), updateReply -> {
  76 + if (updateReply.succeeded()) {
  77 + logger.info("update status video downloaded succ ...");
  78 + } else {
  79 + logger.info("update status video downloaded fail ...");
  80 + }
  81 + });
  82 + case 200 -> vertx.eventBus().request(MYSQL_DOUYIN_VIDEO__UPDATE_STATUS_FAILED.getTopic(), new SimpleInt64Message(BigInteger.valueOf(awemeId)), updateReply -> {
  83 + if (updateReply.succeeded()) {
  84 + logger.info("update status video failed-status succ ...");
  85 + } else {
  86 + logger.info("update status video failed-status fail ...");
  87 + }
  88 + });
  89 + }
  90 + } else {
  91 + logger.error(res);
  92 + }
  93 + });
  94 + }
  95 +
  96 + public String getPlayAddrUrl(String body) {
  97 + int indexOfPlayAddr = body.indexOf("playAddr: ");
  98 + int indexOfCover = body.indexOf("cover: ");
  99 + String subBody = body.substring(indexOfPlayAddr, indexOfCover);
  100 + Pattern r = Pattern.compile("playAddr: \"(.*)\"");
  101 + Matcher m = r.matcher(subBody);
  102 + if (m.find()) {
  103 + return m.group(1);
  104 + } else {
  105 + return "";
  106 + }
  107 + }
  108 +
  109 +}
  1 +package com.aries.crawler.verticles;
  2 +
  3 +import com.aries.crawler.sqlbuilder.InsertBuilder;
  4 +import com.aries.crawler.tools.MySqlExecuteHelper;
  5 +import com.aries.crawler.trans.message.DouyinVideoInfoMessage;
  6 +import io.vertx.core.AbstractVerticle;
  7 +import io.vertx.core.eventbus.Message;
  8 +import io.vertx.core.logging.Logger;
  9 +import io.vertx.core.logging.LoggerFactory;
  10 +
  11 +import java.time.LocalDateTime;
  12 +import java.time.format.DateTimeFormatter;
  13 +
  14 +import static com.aries.crawler.trans.EventBusTopic.MYSQL_DOUYIN_VIDEO_INSERT;
  15 +import static com.aries.crawler.trans.message.CommonResponseMessage.COMMON_FAILED_MESSAGE;
  16 +import static com.aries.crawler.trans.message.CommonResponseMessage.COMMON_SUCCESS_MESSAGE;
  17 +
  18 +/**
  19 + * @author arowana
  20 + */
  21 +public class VideoInsertVerticle extends AbstractVerticle {
  22 + private static final Logger logger = LoggerFactory.getLogger(VideoInsertVerticle.class);
  23 +
  24 +
  25 + @Override
  26 + public void start() {
  27 + // 用于插入视频数据
  28 + vertx.eventBus().consumer(MYSQL_DOUYIN_VIDEO_INSERT.getTopic(), this::mysqlDouyinVideoInsertHandler).setMaxBufferedMessages(4000);
  29 + }
  30 +
  31 + public static String getDateTimeAsString(LocalDateTime localDateTime, String format) {
  32 + DateTimeFormatter formatter = DateTimeFormatter.ofPattern(format);
  33 + return localDateTime.format(formatter);
  34 + }
  35 +
  36 + private void mysqlDouyinVideoInsertHandler(Message<Object> message) {
  37 + var videoInfoMessage = (DouyinVideoInfoMessage) message.body();
  38 + // 构建sql数据, 插入视频信息。
  39 + var insertBuilder = new InsertBuilder("douyin_video_info")
  40 + .set("id", videoInfoMessage.awemeId())
  41 + .set("comments", videoInfoMessage.awemeDesc())
  42 + .set("create_time", videoInfoMessage.awemeCreateTime())
  43 + .set("uid", videoInfoMessage.authorUid())
  44 + .set("cover_url", videoInfoMessage.videoCoverUrl())
  45 + .set("dynamic_cover_url", videoInfoMessage.videoDynamicCoverUrl())
  46 + .set("download_addr_url", videoInfoMessage.videoDownloadAddrUrl())
  47 + .set("share_url", videoInfoMessage.videoShareUrl())
  48 + .set("tag", videoInfoMessage.videoVideoTag())
  49 + .set("duration", videoInfoMessage.videoDuration())
  50 + .set("type", videoInfoMessage.type())
  51 + .onDuplicateKeyUpdate("ut", getDateTimeAsString(LocalDateTime.now(), "yyyy-MM-dd HH:mm:ss"));
  52 +
  53 + logger.info("video sql:" + insertBuilder.getSql() + "---values:" + insertBuilder.getValues());
  54 +
  55 + MySqlExecuteHelper.prepareExecute(vertx, insertBuilder.getSql(), insertBuilder.getValues(), mysqlExecutorRes -> {
  56 + if (mysqlExecutorRes.succeeded()) {
  57 + logger.info("insert video succ, awemeid:" + videoInfoMessage.awemeId());
  58 + message.reply(COMMON_SUCCESS_MESSAGE);
  59 + } else {
  60 + logger.info("insert video. failed:" + mysqlExecutorRes.cause());
  61 + message.reply(COMMON_FAILED_MESSAGE);
  62 + }
  63 + });
  64 + }
  65 +
  66 +}
  1 +package com.aries.crawler.verticles;
  2 +
  3 +import com.aries.crawler.model.douyincrawler.DouyinCrawlerLogModel;
  4 +import com.aries.crawler.trans.message.DouyinUserInfoMessage;
  5 +import com.aries.crawler.trans.message.DouyinVideoInfoMessage;
  6 +import com.aries.crawler.trans.message.DouyinWideDataMessage;
  7 +import com.aries.crawler.trans.message.SimpleInt64Message;
  8 +import io.vertx.core.AbstractVerticle;
  9 +import io.vertx.core.eventbus.DeliveryOptions;
  10 +import io.vertx.core.eventbus.Message;
  11 +import io.vertx.core.logging.Logger;
  12 +import io.vertx.core.logging.LoggerFactory;
  13 +
  14 +import java.math.BigInteger;
  15 +import java.util.concurrent.TimeUnit;
  16 +
  17 +import static com.aries.crawler.trans.EventBusTopic.*;
  18 +import static com.aries.crawler.trans.message.CommonResponseMessage.COMMON_SUCCESS_MESSAGE;
  19 +
  20 +/**
  21 + * 这个verticle的职责是:
  22 + * 将受到的数据, 根据状态做不同的处理。
  23 + * <p> status = 0 表示这个数据没被处理过, 要把宽表中的数据拆成两部分, 分别交给用户数据插入器和视频数据插入器来处理。
  24 + * <p> status = 1 表示这个款数据中的用户数据部分已经处理过, 但是视频数据的部分还没处理。需要发给视频数据插入器来处理。
  25 + * <p> status = 2 表示这个款数据中的视频数据部分已经处理过, 但是用户数据的部分还没处理。需要发给用户数据插入器来处理。
  26 + * <p> status = 3 表示这个数据已经处理过, 没有剩余价值了, 不必处理, 忽略就可以了。
  27 + *
  28 + * @author arowana
  29 + */
  30 +public class WideDataDispatchVerticle extends AbstractVerticle {
  31 + private static final Logger logger = LoggerFactory.getLogger(WideDataDispatchVerticle.class);
  32 +
  33 + @Override
  34 + public void start() {
  35 + vertx.eventBus().consumer(LOGIC_DOUYIN_WIDEDATA_DISPATCH.getTopic(), this::dispatch).setMaxBufferedMessages(4000);
  36 + }
  37 +
  38 + private void dispatch(Message<Object> message) {
  39 + var wideDataMessage = (DouyinWideDataMessage) message.body();
  40 +
  41 + // 如果 用户部分的数据 未处于已处理状态
  42 + if ((wideDataMessage.status() & DouyinCrawlerLogModel.STATUS_USER_DONE) == 0) {
  43 + logger.info("user data need to be parsed, uid:" + wideDataMessage.authorUid());
  44 + processUser(DouyinUserInfoMessage.of(wideDataMessage), wideDataMessage.id());
  45 + }
  46 +
  47 + // 如果 视频部分的数据 未处于已处理状态
  48 + if ((wideDataMessage.status() & DouyinCrawlerLogModel.STATUS_VIDEO_DONE) == 0) {
  49 + logger.info("video data need to be parsed, awemeid:" + wideDataMessage.awemeId());
  50 + processVideo(DouyinVideoInfoMessage.of(wideDataMessage), wideDataMessage.id());
  51 + }
  52 +
  53 + message.reply(COMMON_SUCCESS_MESSAGE);
  54 +
  55 + }
  56 +
  57 + private void processVideo(DouyinVideoInfoMessage douyinVideoInfoMessage, BigInteger wideDataId) {
  58 + logger.info("prepare to insert video, awemeid: " + douyinVideoInfoMessage.awemeId());
  59 + vertx.eventBus().request(MYSQL_DOUYIN_VIDEO_INSERT.getTopic(), douyinVideoInfoMessage, new DeliveryOptions().setSendTimeout(TimeUnit.SECONDS.toMillis(20)), insertReply -> {
  60 + vertx.executeBlocking(future -> {
  61 + if (insertReply.succeeded()) {
  62 + logger.info("insert video reply succeeded, awemeid: " + douyinVideoInfoMessage.awemeId());
  63 + future.complete(100);
  64 + } else {
  65 + logger.error("insert video reply failed, awemeid: " + douyinVideoInfoMessage.awemeId() + ",cause:" + insertReply.cause());
  66 + }
  67 + }, res -> {
  68 + if (res.result() instanceof Integer s) {
  69 + if (s.equals(100)) {
  70 + vertx.eventBus().request(MYSQL_DOUYIN_WIDEDATA_UPDATE_STATUS_VIDEO.getTopic(), new SimpleInt64Message(wideDataId), updateReply -> {
  71 + if (updateReply.succeeded()) {
  72 + logger.info("update status video succ ...");
  73 + } else {
  74 + logger.info("update status video fail ...");
  75 + }
  76 + });
  77 + }
  78 + }
  79 + });
  80 + });
  81 + }
  82 +
  83 + private void processUser(DouyinUserInfoMessage douyinUserInfoMessage, BigInteger wideDataId) {
  84 + logger.info("prepare to insert user, uid:" + douyinUserInfoMessage.uid());
  85 + vertx.eventBus().request(MYSQL_DOUYIN_USER_INSERT.getTopic(), douyinUserInfoMessage, new DeliveryOptions().setSendTimeout(TimeUnit.SECONDS.toMillis(20)), insertReply -> {
  86 + vertx.executeBlocking(future -> {
  87 + if (insertReply.succeeded()) {
  88 + logger.info("insert user reply succeeded, uid: " + douyinUserInfoMessage.uid());
  89 + future.complete(100);
  90 + } else {
  91 + logger.error("insert user reply failed, uid: " + douyinUserInfoMessage.uid() + ",cause:" + insertReply.cause());
  92 + }
  93 + }, res -> {
  94 + if (res.result() instanceof Integer s) {
  95 + if (s.equals(100)) {
  96 + vertx.eventBus().request(MYSQL_DOUYIN_WIDEDATA_UPDATE_STATUS_USER.getTopic(), new SimpleInt64Message(wideDataId), updateReply -> {
  97 + if (updateReply.succeeded()) {
  98 + logger.info("update status user succ ...");
  99 + } else {
  100 + logger.info("update status user fail ...");
  101 + }
  102 + });
  103 + }
  104 + }
  105 + });
  106 + });
  107 + }
  108 +}
  1 +package com.aries.crawler.verticles;
  2 +
  3 +import com.aries.crawler.model.douyincrawler.DouyinCrawlerLogModel;
  4 +import com.aries.crawler.sqlbuilder.SelectBuilder;
  5 +import com.aries.crawler.tools.MySqlExecuteHelper;
  6 +import com.aries.crawler.tools.Orm;
  7 +import com.aries.crawler.trans.message.DouyinWideDataMessage;
  8 +import io.vertx.core.AbstractVerticle;
  9 +import io.vertx.core.json.JsonObject;
  10 +import io.vertx.core.logging.Logger;
  11 +import io.vertx.core.logging.LoggerFactory;
  12 +
  13 +import java.util.ArrayList;
  14 +import java.util.List;
  15 +import java.util.function.Supplier;
  16 +
  17 +import static com.aries.crawler.trans.EventBusTopic.LOGIC_DOUYIN_WIDEDATA_DISPATCH;
  18 +
  19 +
  20 +/**
  21 + * 这个verticle的职责是:
  22 + * 从宽表douyin_crawler_log中读取数据, 然后将数据派发给WideDataDispatchVerticle来做分派处理
  23 + *
  24 + * @author arowana
  25 + */
  26 +public class WideDataPickUpVerticle extends AbstractVerticle {
  27 + private static final Logger logger = LoggerFactory.getLogger(WideDataPickUpVerticle.class);
  28 +
  29 + private final Supplier<Void> pickUpWideDataSupplier = () -> {
  30 + var sql = new SelectBuilder()
  31 + .column("*")
  32 + .from(DouyinCrawlerLogModel.TABLE)
  33 + .where(" status != " + DouyinCrawlerLogModel.STATUS_ALL_DONE)
  34 + .limit(1000L)
  35 + .orderBy("ct", false)
  36 + .toString();
  37 +
  38 + MySqlExecuteHelper.prepareExecute(vertx, sql, new ArrayList<>(), mysqlExecutorRes -> {
  39 + logger.info("prepare to pick up wide data. sql: " + sql);
  40 + if (mysqlExecutorRes.succeeded()) {
  41 + vertx.executeBlocking(future -> {
  42 + List<JsonObject> rows = mysqlExecutorRes.result().getRows();
  43 + for (JsonObject row : rows) {
  44 + var model = Orm.getModel(row, DouyinCrawlerLogModel.class);
  45 + processWideData(model);
  46 + }
  47 + }, res -> {
  48 + // ignore
  49 + });
  50 + } else {
  51 + logger.error("pick wide data failed, sql:" + sql + ", cause: " + mysqlExecutorRes.cause());
  52 + }
  53 + });
  54 +
  55 + return null;
  56 + };
  57 +
  58 + @Override
  59 + public void start() {
  60 + vertx.setPeriodic(10000, id -> {
  61 + pickUpWideDataSupplier.get();
  62 + });
  63 + }
  64 +
  65 + public void processWideData(DouyinCrawlerLogModel model) {
  66 + if (model == null) {
  67 + logger.info("model is null, do nothing");
  68 + return;
  69 + }
  70 +
  71 + var douyinWideDataMessage = DouyinWideDataMessage.of(model);
  72 + vertx.eventBus().request(LOGIC_DOUYIN_WIDEDATA_DISPATCH.getTopic(), douyinWideDataMessage, reply -> {
  73 + if (reply.succeeded()) {
  74 + logger.info("reply success from topic: " + LOGIC_DOUYIN_WIDEDATA_DISPATCH.getTopic() +
  75 + ", wide data id: " + douyinWideDataMessage.id() +
  76 + ", authorUid:" + douyinWideDataMessage.authorUid() +
  77 + ", awemeid: " + douyinWideDataMessage.awemeId());
  78 + } else {
  79 + logger.info("reply success from topic: " + LOGIC_DOUYIN_WIDEDATA_DISPATCH.getTopic() +
  80 + ", wide data id: " + douyinWideDataMessage.id() +
  81 + ", authorUid:" + douyinWideDataMessage.authorUid() +
  82 + ", awemeid: " + douyinWideDataMessage.awemeId() +
  83 + ". cause:" + reply.cause());
  84 + }
  85 + });
  86 + }
  87 +
  88 +
  89 +}
  1 +package com.aries.crawler;
  2 +
  3 +import com.aries.crawler.sqlbuilder.InsertBuilder;
  4 +import io.vertx.core.Vertx;
  5 +import io.vertx.core.buffer.Buffer;
  6 +import io.vertx.core.http.HttpClient;
  7 +import io.vertx.core.http.RequestOptions;
  8 +import org.junit.Test;
  9 +
  10 +import java.time.LocalDateTime;
  11 +import java.util.List;
  12 +import java.util.Map;
  13 +
  14 +import static com.aries.crawler.verticles.UpdateDataVerticle.getDateTimeAsString;
  15 +
  16 +public class AppTest {
  17 + @Test
  18 + public void shouldAnswerWithTrue() {
  19 + Vertx vertx = Vertx.vertx();
  20 +
  21 + HttpClient client = vertx.createHttpClient();
  22 +// client.getNow("aweme.snssdk.com", "/aweme/v1/playwm/?s_vid=93f1b41336a8b7a442dbf1c29c6bbc560cdca46fc197329a17cb02eef09b72493338e49045f75c3a6cd886d97de228c6e6a1f93d3b9a63a26a63e40654c6655e&line=0", response -> {
  23 +// System.out.println("Received response with status code " + response.statusCode());
  24 +// response.bodyHandler(x -> {
  25 +// vertx.fileSystem().writeFile("target/classes/a.mp4", Buffer.buffer(x.getBytes()), result -> {
  26 +// if (result.succeeded()) {
  27 +// System.out.println("File written");
  28 +// } else {
  29 +// System.err.println("Oh oh ..." + result.cause());
  30 +// }
  31 +// });
  32 +// });
  33 +// });
  34 +
  35 + client.get(new RequestOptions().setHost("aweme.snssdk.com").setURI("/aweme/v1/playwm/?s_vid=93f1b41336a8b7a442dbf1c29c6bbc560cdca46fc197329a17cb02eef09b72493338e49045f75c3a6cd886d97de228c6e6a1f93d3b9a63a26a63e40654c6655e&line=0").addHeader("user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36"), response -> {
  36 + for (Map.Entry<String, String> header : response.headers()) {
  37 + System.out.println(header);
  38 + }
  39 + System.out.println("Received response with status code " + response.statusCode());
  40 + response.bodyHandler(x -> {
  41 + vertx.fileSystem().writeFile("target/classes/a.mp4", Buffer.buffer(x.getBytes()), result -> {
  42 + if (result.succeeded()) {
  43 + System.out.println("File written");
  44 + } else {
  45 + System.err.println("Oh oh ..." + result.cause());
  46 + }
  47 + });
  48 + });
  49 + })
  50 + .setFollowRedirects(true)
  51 + .end();
  52 +
  53 + try {
  54 + Thread.sleep(10000);
  55 + } catch (InterruptedException e) {
  56 + e.printStackTrace();
  57 + }
  58 +
  59 + }
  60 +
  61 + @Test
  62 + public void TestInsertSql() {
  63 + InsertBuilder ib = new InsertBuilder("douyin_user_info")
  64 + .set("uid", 123)
  65 + .set("short_id", 234234)
  66 + .set("nickname", "jinlong")
  67 + .onDuplicateKeyUpdate("ut", getDateTimeAsString(LocalDateTime.now(), "yyyy-MM-dd HH:mm:ss"));
  68 + String sql = ib.getSql();
  69 + List<Object> values = ib.getValues();
  70 +
  71 + System.out.println(sql);
  72 + System.out.println(values);
  73 + }
  74 +
  75 +}
  1 +package com.aries.crawler;
  2 +
  3 +import io.vertx.core.AbstractVerticle;
  4 +import io.vertx.core.Vertx;
  5 +import io.vertx.core.http.HttpClient;
  6 +import io.vertx.core.http.RequestOptions;
  7 +
  8 +import java.util.regex.Matcher;
  9 +import java.util.regex.Pattern;
  10 +
  11 +public class HttpParserTestVerticle extends AbstractVerticle {
  12 +
  13 + public static void main(String[] args) {
  14 + Vertx vertx = Vertx.vertx();
  15 + vertx.deployVerticle(new HttpParserTestVerticle());
  16 + }
  17 +
  18 + @Override
  19 + public void start() throws Exception {
  20 + HttpClient client = vertx.createHttpClient();
  21 + client.get(new RequestOptions()
  22 +
  23 + .setHost("www.iesdouyin.com")
  24 + .setURI("/share/video/6772821096413580548/?region=CN&mid=6772787703437069070&u_code=19h7agc1k&titleType=title")
  25 + .addHeader("user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36")
  26 + , response -> {
  27 + response.bodyHandler(body -> {
  28 + var bodyStr = new String(body.getBytes());
  29 + var addrUrl = getPlayAddrUrl(bodyStr);
  30 + System.out.println(addrUrl);
  31 + });
  32 + }).setFollowRedirects(true).end();
  33 + }
  34 +
  35 + public String getPlayAddrUrl(String body) {
  36 + int indexOfPlayAddr = body.indexOf("playAddr: ");
  37 + int indexOfCover = body.indexOf("cover: ");
  38 + String subBody = body.substring(indexOfPlayAddr, indexOfCover);
  39 + Pattern r = Pattern.compile("playAddr: \"(.*)\"");
  40 + Matcher m = r.matcher(subBody);
  41 + if (m.find()) {
  42 + return m.group(1);
  43 + } else {
  44 + return "";
  45 + }
  46 + }
  47 +
  48 +}
  1 +package com.aries.crawler;
  2 +
  3 +import com.aries.crawler.tools.MySqlClientHelper;
  4 +import io.vertx.core.AbstractVerticle;
  5 +import io.vertx.core.Vertx;
  6 +import io.vertx.core.json.JsonArray;
  7 +import io.vertx.core.json.JsonObject;
  8 +import io.vertx.ext.jdbc.JDBCClient;
  9 +import io.vertx.ext.sql.ResultSet;
  10 +
  11 +import java.util.List;
  12 +
  13 +public class JdbcTestVerticle extends AbstractVerticle {
  14 +
  15 + public static void main(String[] args) {
  16 + Vertx vertx = Vertx.vertx();
  17 + vertx.deployVerticle(new JdbcTestVerticle());
  18 + }
  19 +
  20 + @Override
  21 + public void start() throws Exception {
  22 +
  23 + // 获取到数据库连接的客户端
  24 + JDBCClient jdbcClient = MySqlClientHelper.getJDBcClient(vertx);
  25 + String sql = "SELECT * from douyin_crawler_log WHERE id=?";
  26 + // 构造参数
  27 + JsonArray params = new JsonArray().add(139719);
  28 + // 执行查询
  29 + jdbcClient.queryWithParams(sql, params, qryRes -> {
  30 + if (qryRes.succeeded()) {
  31 + // 获取到查询的结果,Vert.x对ResultSet进行了封装
  32 + ResultSet resultSet = qryRes.result();
  33 + // 把ResultSet转为List<JsonObject>形式
  34 + List<JsonObject> rows = resultSet.getRows();
  35 + // 输出结果
  36 + System.out.println(rows);
  37 + } else {
  38 + System.out.println("查询数据库出错!");
  39 + }
  40 + });
  41 +
  42 + }
  43 +
  44 +}
  1 +package com.aries.crawler.sqlbuilder;
  2 +
  3 +import com.aries.crawler.model.douyincrawler.DouyinCrawlerLogModel;
  4 +import org.junit.Assert;
  5 +import org.junit.Test;
  6 +
  7 +public class TestSqlBuilder {
  8 + @Test
  9 + public void sqlSelectTest() {
  10 + String sql = new SelectBuilder()
  11 + .column("*")
  12 + .from("douyin_crawler_log")
  13 + .where("status != " + DouyinCrawlerLogModel.STATUS_ALL_DONE)
  14 + .limit(10L)
  15 + .toString();
  16 +
  17 + Assert.assertEquals(sql, "select * from douyin_crawler_log where status != 3 limit 10 offset 0");
  18 + }
  19 +}
  1 +import json
  2 +
  3 +# 这个地方必须这么写 函数名:response
  4 +import sys
  5 +
  6 +sys.path.append('/usr/local/lib/python3.8/site-packages')
  7 +sys.path.append('/usr/local/lib/python3.7/site-packages')
  8 +
  9 +import pymysql as pymysql
  10 +
  11 +db = pymysql.connect("localhost", "root", "1qaz2wsx", "douyin_crawler")
  12 +
  13 +
  14 +def response(flow):
  15 + # 通过抓包软包软件获取请求的接口
  16 + if '/aweme/favorite' in flow.request.url or '/aweme/post' in flow.request.url:
  17 + # print("-------"+flow.response.text)
  18 + for aweme in json.loads(flow.response.text)['aweme_list']:
  19 + aweme_map = {}
  20 + aweme_map['aweme_id'] = aweme['aweme_id']
  21 + aweme_map['aweme_desc'] = aweme['desc']
  22 + aweme_map['aweme_create_time'] = aweme['create_time']
  23 + aweme_map['author_uid'] = aweme['author']['uid']
  24 + aweme_map['author_short_id'] = aweme['author']['short_id']
  25 + aweme_map['author_nickname'] = aweme['author']['nickname']
  26 + aweme_map['author_signature'] = aweme['author']['signature']
  27 + aweme_map['avatar_larger_url'] = aweme['author']['avatar_larger']['url_list'][0]
  28 + aweme_map['author_share_info_qrcode'] = aweme['author']['share_info']['share_qrcode_url']['url_list'][0]
  29 + aweme_map['video_cover'] = aweme['video']['cover']['url_list'][0]
  30 + aweme_map['video_dynamic_cover'] = aweme['video']['dynamic_cover']['url_list'][0]
  31 + aweme_map['video_download_addr'] = aweme['video']['download_addr']['url_list'][0]
  32 + aweme_map['video_share_url'] = aweme['share_info']['share_url']
  33 + if len(aweme['text_extra']) > 0:
  34 + aweme_map['video_tag'] = aweme['text_extra']
  35 + aweme_map['video_duration'] = aweme['duration']
  36 +
  37 + sql = """INSERT INTO douyin_crawler_log(aweme_id,aweme_desc,aweme_create_time,author_uid,author_short_id,author_nickname,author_signature,avatar_larger_url,
  38 + author_share_info_qrcode_url,video_cover_url,video_dynamic_cover_url,video_download_addr_url ,video_share_url ,
  39 + video_tag ,video_duration)
  40 + VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
  41 + on duplicate key update video_download_addr_url = %s"""
  42 + cursor = db.cursor()
  43 +
  44 + if 'video_tag' in aweme_map:
  45 + t_tag = json.dumps(aweme_map['video_tag'], ensure_ascii=False)
  46 + else:
  47 + t_tag = "{}"
  48 +
  49 + values = (aweme_map['aweme_id'], aweme_map['aweme_desc'], aweme_map['aweme_create_time'],
  50 + aweme_map['author_uid'], aweme_map['author_short_id'],
  51 + aweme_map['author_nickname'], aweme_map['author_signature'],
  52 + aweme_map['avatar_larger_url'], aweme_map['author_share_info_qrcode'],
  53 + aweme_map['video_cover'], aweme_map['video_dynamic_cover'],
  54 + aweme_map['video_download_addr'], aweme_map['video_share_url'],
  55 + t_tag,
  56 + aweme_map['video_duration'], aweme_map['video_download_addr'])
  57 +
  58 + try:
  59 + # 执行sql语句
  60 + cursor.execute(sql, values)
  61 + # 提交到数据库执行
  62 + db.commit()
  63 + print("here is succ")
  64 +
  65 + except:
  66 + # 如果发生错误则回滚
  67 + db.rollback()
  68 + print("here is err:", sys.exc_info())
  69 +
  70 + # 关闭数据库连接
  71 + # print("crawler res: ", aweme_map)
  72 + # db.close()
  1 +import pymysql
  2 +import sys
  3 +
  4 +
  5 +def main():
  6 + print("start-----")
  7 + db = pymysql.connect("localhost", "root", "1qaz2wsx", "douyin_crawler")
  8 + sql = "INSERT INTO douyin_crawler_log(aweme_id) values(123)"
  9 + cursor = db.cursor()
  10 + try:
  11 + cursor.execute(sql)
  12 + db.commit()
  13 + print("commit-----")
  14 + except:
  15 + # 如果发生错误则回滚
  16 + db.rollback()
  17 + print("rollback-----")
  18 + print(sys.exc_info())
  19 +
  20 + # 关闭数据库连接
  21 + db.close()
  22 +
  23 +
  24 +if __name__ == '__main__':
  25 + main()
  1 +.idea/
  2 +.idea/*
  3 +*.iml
  4 +target/
  5 +*.class
  6 +.project
  7 +.settings/
  8 +.settings/*
  1 +# netty-proxy-server
  2 +基于Netty实现的代理服务器,Web Proxy Server(普通Web代理和SSL隧道代理),Socks5 Proxy Server和混合模式(同时支持以上两种,自动选择)
  3 +
  4 +
  5 +虽然是个玩具,但麻雀虽小五脏俱全,基本的都有。不管是用来做 netty 的学习,还是代理协议的学习都是不错的参考资料
  1 +<?xml version="1.0" encoding="UTF-8"?>
  2 +<project xmlns="http://maven.apache.org/POM/4.0.0"
  3 + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 + <parent>
  6 + <artifactId>netty-proxy-server</artifactId>
  7 + <groupId>cc.leevi.common</groupId>
  8 + <version>1.0-SNAPSHOT</version>
  9 + </parent>
  10 + <modelVersion>4.0.0</modelVersion>
  11 +
  12 + <artifactId>http-proxy</artifactId>
  13 + <build>
  14 + <plugins>
  15 + <plugin>
  16 + <groupId>org.apache.maven.plugins</groupId>
  17 + <artifactId>maven-compiler-plugin</artifactId>
  18 + <configuration>
  19 + <source>7</source>
  20 + <target>7</target>
  21 + </configuration>
  22 + </plugin>
  23 + </plugins>
  24 + </build>
  25 +
  26 + <dependencies>
  27 + <dependency>
  28 + <groupId>io.netty</groupId>
  29 + <artifactId>netty-all</artifactId>
  30 + </dependency>
  31 + <dependency>
  32 + <groupId>org.apache.commons</groupId>
  33 + <artifactId>commons-lang3</artifactId>
  34 + </dependency>
  35 + <dependency>
  36 + <groupId>org.slf4j</groupId>
  37 + <artifactId>slf4j-api</artifactId>
  38 + </dependency>
  39 + <dependency>
  40 + <groupId>junit</groupId>
  41 + <artifactId>junit</artifactId>
  42 + <version>4.13.1</version>
  43 + <scope>test</scope>
  44 + </dependency>
  45 + <dependency>
  46 + <groupId>org.apache.logging.log4j</groupId>
  47 + <artifactId>log4j-core</artifactId>
  48 + <version>2.14.0</version>
  49 + <scope>test</scope>
  50 + </dependency>
  51 + <dependency>
  52 + <groupId>org.apache.logging.log4j</groupId>
  53 + <artifactId>log4j-slf4j-impl</artifactId>
  54 + <version>2.14.0</version>
  55 + <scope>test</scope>
  56 + </dependency>
  57 + <dependency>
  58 + <groupId>com.google.guava</groupId>
  59 + <artifactId>guava</artifactId>
  60 + <version>30.0-jre</version>
  61 + </dependency>
  62 +
  63 + </dependencies>
  64 +
  65 +</project>
  1 +package cc.leevi.common.httpproxy;
  2 +
  3 +import io.netty.channel.Channel;
  4 +import io.netty.channel.ChannelHandlerContext;
  5 +import io.netty.channel.ChannelInboundHandlerAdapter;
  6 +import io.netty.util.concurrent.Promise;
  7 +
  8 +public final class DirectClientHandler extends ChannelInboundHandlerAdapter {
  9 +
  10 + private final Promise<Channel> promise;
  11 +
  12 + public DirectClientHandler(Promise<Channel> promise) {
  13 + this.promise = promise;
  14 + }
  15 +
  16 + @Override
  17 + public void channelActive(ChannelHandlerContext ctx) {
  18 + ctx.pipeline().remove(this);
  19 + promise.setSuccess(ctx.channel());
  20 + }
  21 +
  22 + @Override
  23 + public void exceptionCaught(ChannelHandlerContext ctx, Throwable throwable) {
  24 + promise.setFailure(throwable);
  25 + }
  26 +}
  1 +package cc.leevi.common.httpproxy;
  2 +
  3 +import io.netty.buffer.ByteBuf;
  4 +
  5 +public class HttpProxyRequestHead {
  6 + private String host;
  7 + private int port;
  8 + private String proxyType;//tunnel or web
  9 + private String protocolVersion;
  10 +
  11 + private ByteBuf byteBuf;
  12 +
  13 + public HttpProxyRequestHead(String host, int port, String proxyType, String protocolVersion, ByteBuf byteBuf) {
  14 + this.host = host;
  15 + this.port = port;
  16 + this.proxyType = proxyType;
  17 + this.protocolVersion = protocolVersion;
  18 + this.byteBuf = byteBuf;
  19 + }
  20 +
  21 + public String getHost() {
  22 + return host;
  23 + }
  24 +
  25 + public void setHost(String host) {
  26 + this.host = host;
  27 + }
  28 +
  29 + public int getPort() {
  30 + return port;
  31 + }
  32 +
  33 + public void setPort(int port) {
  34 + this.port = port;
  35 + }
  36 +
  37 + public String getProxyType() {
  38 + return proxyType;
  39 + }
  40 +
  41 + public void setProxyType(String proxyType) {
  42 + this.proxyType = proxyType;
  43 + }
  44 +
  45 + public ByteBuf getByteBuf() {
  46 + return byteBuf;
  47 + }
  48 +
  49 + public void setByteBuf(ByteBuf byteBuf) {
  50 + this.byteBuf = byteBuf;
  51 + }
  52 +
  53 + public String getProtocolVersion() {
  54 + return protocolVersion;
  55 + }
  56 +
  57 + public void setProtocolVersion(String protocolVersion) {
  58 + this.protocolVersion = protocolVersion;
  59 + }
  60 +}
  1 +package cc.leevi.common.httpproxy;
  2 +
  3 +import io.netty.channel.Channel;
  4 +import io.netty.channel.ChannelInitializer;
  5 +import io.netty.channel.ChannelPipeline;
  6 +import io.netty.handler.logging.LoggingHandler;
  7 +
  8 +public class HttpProxyServerInitializer extends ChannelInitializer {
  9 +
  10 + protected void initChannel(Channel channel) throws Exception {
  11 + ChannelPipeline p = channel.pipeline();
  12 + p.addLast(new LoggingHandler());
  13 + p.addLast(new HttpServerHeadDecoder());
  14 + }
  15 +}
  1 +package cc.leevi.common.httpproxy;
  2 +
  3 +import io.netty.bootstrap.ServerBootstrap;
  4 +import io.netty.channel.*;
  5 +import io.netty.channel.nio.NioEventLoopGroup;
  6 +import io.netty.channel.socket.nio.NioServerSocketChannel;
  7 +import org.slf4j.Logger;
  8 +import org.slf4j.LoggerFactory;
  9 +
  10 +public class HttpServer {
  11 +
  12 + private Logger logger = LoggerFactory.getLogger(HttpServer.class);
  13 +
  14 + private ServerBootstrap serverBootstrap;
  15 +
  16 + private EventLoopGroup serverEventLoopGroup;
  17 +
  18 + private Channel acceptorChannel;
  19 +
  20 + public void startServer(){
  21 + logger.info("Proxy Server starting...");
  22 +
  23 + serverEventLoopGroup = new NioEventLoopGroup(4);
  24 +
  25 + serverBootstrap = new ServerBootstrap()
  26 + .channel(NioServerSocketChannel.class)
  27 + .childHandler(new HttpProxyServerInitializer())
  28 + .group(serverEventLoopGroup);
  29 + acceptorChannel = serverBootstrap.bind(17891).syncUninterruptibly().channel();
  30 + }
  31 +
  32 + public void shutdown(){
  33 + logger.info("Proxy Server shutting down...");
  34 + acceptorChannel.close().syncUninterruptibly();
  35 + serverEventLoopGroup.shutdownGracefully().syncUninterruptibly();
  36 + logger.info("shutdown completed!");
  37 + }
  38 +}
  1 +/*
  2 + * Copyright 2012 The Netty Project
  3 + *
  4 + * The Netty Project licenses this file to you under the Apache License,
  5 + * version 2.0 (the "License"); you may not use this file except in compliance
  6 + * with the License. You may obtain a copy of the License at:
  7 + *
  8 + * https://www.apache.org/licenses/LICENSE-2.0
  9 + *
  10 + * Unless required by applicable law or agreed to in writing, software
  11 + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12 + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13 + * License for the specific language governing permissions and limitations
  14 + * under the License.
  15 + */
  16 +package cc.leevi.common.httpproxy;
  17 +
  18 +import io.netty.bootstrap.Bootstrap;
  19 +import io.netty.buffer.Unpooled;
  20 +import io.netty.channel.Channel;
  21 +import io.netty.channel.ChannelFuture;
  22 +import io.netty.channel.ChannelFutureListener;
  23 +import io.netty.channel.ChannelHandler;
  24 +import io.netty.channel.ChannelHandlerContext;
  25 +import io.netty.channel.ChannelOption;
  26 +import io.netty.channel.SimpleChannelInboundHandler;
  27 +import io.netty.channel.socket.nio.NioSocketChannel;
  28 +import io.netty.util.concurrent.Future;
  29 +import io.netty.util.concurrent.FutureListener;
  30 +import io.netty.util.concurrent.Promise;
  31 +
  32 +@ChannelHandler.Sharable
  33 +public final class HttpServerConnectHandler extends SimpleChannelInboundHandler<HttpProxyRequestHead> {
  34 +
  35 + private final Bootstrap b = new Bootstrap();
  36 +
  37 + @Override
  38 + public void channelRead0(final ChannelHandlerContext ctx, final HttpProxyRequestHead requestHead) throws Exception {
  39 +
  40 + Promise<Channel> promise = ctx.executor().newPromise();
  41 + final Channel inboundChannel = ctx.channel();
  42 + promise.addListener(
  43 + new FutureListener<Channel>() {
  44 + @Override
  45 + public void operationComplete(final Future<Channel> future) throws Exception {
  46 + final Channel outboundChannel = future.getNow();
  47 + if (future.isSuccess()) {
  48 + ChannelFuture responseFuture;
  49 + if("TUNNEL".equals(requestHead.getProxyType())){
  50 + responseFuture = inboundChannel.writeAndFlush(Unpooled.wrappedBuffer((requestHead.getProtocolVersion() + " 200 Connection Established\r\n\r\n").getBytes()));
  51 + }else if("WEB".equals(requestHead.getProxyType())){
  52 + responseFuture = outboundChannel.writeAndFlush(requestHead.getByteBuf());
  53 + }else{
  54 + HttpServerUtils.closeOnFlush(inboundChannel);
  55 + return;
  56 + }
  57 + responseFuture.addListener(new ChannelFutureListener() {
  58 + @Override
  59 + public void operationComplete(ChannelFuture channelFuture) {
  60 + ctx.pipeline().remove(HttpServerConnectHandler.this);
  61 + outboundChannel.pipeline().addLast(new RelayHandler(inboundChannel));
  62 + ctx.pipeline().addLast(new RelayHandler(outboundChannel));
  63 + }
  64 + });
  65 + } else {
  66 + HttpServerUtils.closeOnFlush(inboundChannel);
  67 + }
  68 + }
  69 + });
  70 +
  71 + b.group(inboundChannel.eventLoop())
  72 + .channel(NioSocketChannel.class)
  73 + .option(ChannelOption.CONNECT_TIMEOUT_MILLIS, 10000)
  74 + .option(ChannelOption.SO_KEEPALIVE, true)
  75 + .handler(new DirectClientHandler(promise));
  76 +
  77 + b.connect(requestHead.getHost(), requestHead.getPort()).addListener(new ChannelFutureListener() {
  78 + @Override
  79 + public void operationComplete(ChannelFuture future) throws Exception {
  80 + if (future.isSuccess()) {
  81 + // Connection established use handler provided results
  82 + } else {
  83 + // Close the connection if the connection attempt has failed.
  84 + HttpServerUtils.closeOnFlush(inboundChannel);
  85 + }
  86 + }
  87 + });
  88 + }
  89 +
  90 + @Override
  91 + public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
  92 + HttpServerUtils.closeOnFlush(ctx.channel());
  93 + }
  94 +}
  1 +package cc.leevi.common.httpproxy;
  2 +
  3 +import com.google.common.net.HostAndPort;
  4 +import io.netty.buffer.ByteBuf;
  5 +import io.netty.channel.ChannelHandlerContext;
  6 +import io.netty.channel.SimpleChannelInboundHandler;
  7 +import io.netty.handler.codec.http.HttpConstants;
  8 +import io.netty.handler.codec.http.HttpMethod;
  9 +import io.netty.util.ByteProcessor;
  10 +import io.netty.util.internal.AppendableCharSequence;
  11 +
  12 +import java.net.URL;
  13 +
  14 +public class HttpServerHeadDecoder extends SimpleChannelInboundHandler<ByteBuf> {
  15 +
  16 + private HttpServerHeadDecoder.HeadLineByteProcessor headLineByteProcessor = new HttpServerHeadDecoder.HeadLineByteProcessor();
  17 +
  18 + private
  19 +
  20 + class HeadLineByteProcessor implements ByteProcessor{
  21 + private AppendableCharSequence seq;
  22 +
  23 + public HeadLineByteProcessor() {
  24 + this.seq = new AppendableCharSequence(4096);
  25 + }
  26 +
  27 + public AppendableCharSequence parse(ByteBuf buffer) {
  28 + seq.reset();
  29 + int i = buffer.forEachByte(this);
  30 + if (i == -1) {
  31 + return null;
  32 + }
  33 + buffer.readerIndex(i + 1);
  34 + return seq;
  35 + }
  36 +
  37 + @Override
  38 + public boolean process(byte value) throws Exception {
  39 + char nextByte = (char) (value & 0xFF);
  40 + if (nextByte == HttpConstants.LF) {
  41 + int len = seq.length();
  42 + if (len >= 1 && seq.charAtUnsafe(len - 1) == HttpConstants.CR) {
  43 + seq.append(nextByte);
  44 + }
  45 + return false;
  46 + }
  47 + //continue loop byte
  48 + seq.append(nextByte);
  49 + return true;
  50 + }
  51 + }
  52 +
  53 + @Override
  54 + protected void channelRead0(ChannelHandlerContext ctx, ByteBuf in) throws Exception {
  55 + AppendableCharSequence seq = headLineByteProcessor.parse(in);
  56 + if(seq.charAt(seq.length()-1) == HttpConstants.LF){
  57 + HttpProxyRequestHead httpProxyRequestHead;
  58 + String[] splitInitialLine = splitInitialLine(seq);
  59 + String method = splitInitialLine[0];
  60 + String uri = splitInitialLine[1];
  61 + String protocolVersion = splitInitialLine[2];
  62 + String host;
  63 + int port;
  64 + if(HttpMethod.CONNECT.name().equals(method)){
  65 + //https tunnel proxy
  66 + HostAndPort hostAndPort = HostAndPort.fromString(uri);
  67 + host = hostAndPort.getHost();
  68 + port = hostAndPort.getPort();
  69 +
  70 + httpProxyRequestHead = new HttpProxyRequestHead(host, port, "TUNNEL",protocolVersion,null);
  71 + }else{
  72 + //http proxy
  73 + URL url = new URL(uri);
  74 + host = url.getHost();
  75 + port = url.getPort();
  76 + if(port == -1){
  77 + port = 80;
  78 + }
  79 +
  80 + httpProxyRequestHead = new HttpProxyRequestHead(host, port,"WEB",protocolVersion,in.retain().resetReaderIndex());
  81 + }
  82 + ctx.pipeline().addLast(new HttpServerConnectHandler()).remove(this);
  83 + ctx.fireChannelRead(httpProxyRequestHead);
  84 + }
  85 + }
  86 +
  87 + private static String[] splitInitialLine(AppendableCharSequence sb) {
  88 + int aStart;
  89 + int aEnd;
  90 + int bStart;
  91 + int bEnd;
  92 + int cStart;
  93 + int cEnd;
  94 +
  95 + aStart = findNonSPLenient(sb, 0);
  96 + aEnd = findSPLenient(sb, aStart);
  97 +
  98 + bStart = findNonSPLenient(sb, aEnd);
  99 + bEnd = findSPLenient(sb, bStart);
  100 +
  101 + cStart = findNonSPLenient(sb, bEnd);
  102 + cEnd = findEndOfString(sb);
  103 +
  104 + return new String[] {
  105 + sb.subStringUnsafe(aStart, aEnd),
  106 + sb.subStringUnsafe(bStart, bEnd),
  107 + cStart < cEnd? sb.subStringUnsafe(cStart, cEnd) : "" };
  108 + }
  109 +
  110 + private static int findNonSPLenient(AppendableCharSequence sb, int offset) {
  111 + for (int result = offset; result < sb.length(); ++result) {
  112 + char c = sb.charAtUnsafe(result);
  113 + // See https://tools.ietf.org/html/rfc7230#section-3.5
  114 + if (isSPLenient(c)) {
  115 + continue;
  116 + }
  117 + if (Character.isWhitespace(c)) {
  118 + // Any other whitespace delimiter is invalid
  119 + throw new IllegalArgumentException("Invalid separator");
  120 + }
  121 + return result;
  122 + }
  123 + return sb.length();
  124 + }
  125 +
  126 + private static int findSPLenient(AppendableCharSequence sb, int offset) {
  127 + for (int result = offset; result < sb.length(); ++result) {
  128 + if (isSPLenient(sb.charAtUnsafe(result))) {
  129 + return result;
  130 + }
  131 + }
  132 + return sb.length();
  133 + }
  134 +
  135 + private static boolean isSPLenient(char c) {
  136 + // See https://tools.ietf.org/html/rfc7230#section-3.5
  137 + return c == ' ' || c == (char) 0x09 || c == (char) 0x0B || c == (char) 0x0C || c == (char) 0x0D;
  138 + }
  139 +
  140 + private static int findNonWhitespace(AppendableCharSequence sb, int offset, boolean validateOWS) {
  141 + for (int result = offset; result < sb.length(); ++result) {
  142 + char c = sb.charAtUnsafe(result);
  143 + if (!Character.isWhitespace(c)) {
  144 + return result;
  145 + } else if (validateOWS && !isOWS(c)) {
  146 + // Only OWS is supported for whitespace
  147 + throw new IllegalArgumentException("Invalid separator, only a single space or horizontal tab allowed," +
  148 + " but received a '" + c + "'");
  149 + }
  150 + }
  151 + return sb.length();
  152 + }
  153 +
  154 + private static int findEndOfString(AppendableCharSequence sb) {
  155 + for (int result = sb.length() - 1; result > 0; --result) {
  156 + if (!Character.isWhitespace(sb.charAtUnsafe(result))) {
  157 + return result + 1;
  158 + }
  159 + }
  160 + return 0;
  161 + }
  162 +
  163 + private static boolean isOWS(char ch) {
  164 + return ch == ' ' || ch == (char) 0x09;
  165 + }
  166 +
  167 +}
  1 +package cc.leevi.common.httpproxy;
  2 +
  3 +
  4 +import io.netty.buffer.Unpooled;
  5 +import io.netty.channel.Channel;
  6 +import io.netty.channel.ChannelFutureListener;
  7 +
  8 +public final class HttpServerUtils {
  9 +
  10 + /**
  11 + * Closes the specified channel after all queued write requests are flushed.
  12 + */
  13 + public static void closeOnFlush(Channel ch) {
  14 + if (ch.isActive()) {
  15 + ch.writeAndFlush(Unpooled.EMPTY_BUFFER).addListener(ChannelFutureListener.CLOSE);
  16 + }
  17 + }
  18 +
  19 + private HttpServerUtils() { }
  20 +}
  1 +package cc.leevi.common.httpproxy;
  2 +
  3 +import io.netty.buffer.Unpooled;
  4 +import io.netty.channel.Channel;
  5 +import io.netty.channel.ChannelFutureListener;
  6 +import io.netty.channel.ChannelHandlerContext;
  7 +import io.netty.channel.ChannelInboundHandlerAdapter;
  8 +import io.netty.util.ReferenceCountUtil;
  9 +
  10 +public class RelayHandler extends ChannelInboundHandlerAdapter {
  11 + private final Channel relayChannel;
  12 +
  13 + public RelayHandler(Channel relayChannel) {
  14 + this.relayChannel = relayChannel;
  15 + }
  16 +
  17 + @Override
  18 + public void channelActive(ChannelHandlerContext ctx) {
  19 + ctx.writeAndFlush(Unpooled.EMPTY_BUFFER);
  20 + }
  21 +
  22 + @Override
  23 + public void channelRead(ChannelHandlerContext ctx, Object msg) {
  24 + if (relayChannel.isActive()) {
  25 + relayChannel.writeAndFlush(msg);
  26 + } else {
  27 + ReferenceCountUtil.release(msg);
  28 + }
  29 + }
  30 +
  31 + @Override
  32 + public void channelInactive(ChannelHandlerContext ctx) {
  33 + HttpServerUtils.closeOnFlush(ctx.channel());
  34 + }
  35 +
  36 + @Override
  37 + public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) {
  38 + ctx.close();
  39 + }
  40 +}
  1 +package cc.leevi.common.httpproxy;
  2 +
  3 +import com.google.common.net.HostAndPort;
  4 +import org.junit.Before;
  5 +import org.junit.Test;
  6 +
  7 +import java.io.IOException;
  8 +
  9 +public class HttpProxyClientTest {
  10 +
  11 + @Before
  12 + public void setUp() throws Exception {
  13 + }
  14 +
  15 + @Test
  16 + public void startServer() throws IOException {
  17 + HttpServer httpServer = new HttpServer();
  18 + httpServer.startServer();
  19 + System.in.read();
  20 + }
  21 +
  22 + @Test
  23 + public void parseURI(){
  24 + HostAndPort hostAndPort = HostAndPort.fromString("cdn.segmentfault.com:443");
  25 + System.out.println(hostAndPort.getHost());
  26 + System.out.println(hostAndPort.getPort());
  27 + }
  28 +}
  1 +<?xml version="1.0" encoding="UTF-8"?>
  2 +<Configuration status="error">
  3 + <Appenders>
  4 + <Console name="Console" target="SYSTEM_OUT">
  5 + <PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
  6 + </Console>
  7 + </Appenders>
  8 + <Loggers>
  9 + <Root level="TRACE">
  10 + <AppenderRef ref="Console" />
  11 + </Root>
  12 + </Loggers>
  13 +</Configuration>
  1 +<?xml version="1.0" encoding="UTF-8"?>
  2 +
  3 +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 + <modelVersion>4.0.0</modelVersion>
  6 +
  7 +
  8 + <parent>
  9 + <artifactId>netty-proxy-server</artifactId>
  10 + <groupId>cc.leevi.common</groupId>
  11 + <version>1.0-SNAPSHOT</version>
  12 + </parent>
  13 +
  14 + <artifactId>mixin-proxy</artifactId>
  15 +
  16 + <name>socks5-proxy</name>
  17 +
  18 + <properties>
  19 + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  20 + <maven.compiler.source>1.7</maven.compiler.source>
  21 + <maven.compiler.target>1.7</maven.compiler.target>
  22 + </properties>
  23 +
  24 + <dependencies>
  25 + <dependency>
  26 + <groupId>io.netty</groupId>
  27 + <artifactId>netty-all</artifactId>
  28 + </dependency>
  29 + <dependency>
  30 + <groupId>org.apache.commons</groupId>
  31 + <artifactId>commons-lang3</artifactId>
  32 + </dependency>
  33 + <dependency>
  34 + <groupId>org.slf4j</groupId>
  35 + <artifactId>slf4j-api</artifactId>
  36 + </dependency>
  37 + <dependency>
  38 + <groupId>junit</groupId>
  39 + <artifactId>junit</artifactId>
  40 + <version>4.13.1</version>
  41 + <scope>test</scope>
  42 + </dependency>
  43 + <dependency>
  44 + <groupId>org.apache.logging.log4j</groupId>
  45 + <artifactId>log4j-core</artifactId>
  46 + <version>2.14.0</version>
  47 + <scope>test</scope>
  48 + </dependency>
  49 + <dependency>
  50 + <groupId>org.apache.logging.log4j</groupId>
  51 + <artifactId>log4j-slf4j-impl</artifactId>
  52 + <version>2.14.0</version>
  53 + <scope>test</scope>
  54 + </dependency>
  55 + <dependency>
  56 + <groupId>com.google.guava</groupId>
  57 + <artifactId>guava</artifactId>
  58 + <version>30.0-jre</version>
  59 + </dependency>
  60 + </dependencies>
  61 +
  62 +</project>
  1 +package cc.leevi.common.socks5proxy;
  2 +
  3 +import io.netty.channel.Channel;
  4 +import io.netty.channel.ChannelHandlerContext;
  5 +import io.netty.channel.ChannelInboundHandlerAdapter;
  6 +import io.netty.util.concurrent.Promise;
  7 +
  8 +public final class DirectClientHandler extends ChannelInboundHandlerAdapter {
  9 +
  10 + private final Promise<Channel> promise;
  11 +
  12 + public DirectClientHandler(Promise<Channel> promise) {
  13 + this.promise = promise;
  14 + }
  15 +
  16 + @Override
  17 + public void channelActive(ChannelHandlerContext ctx) {
  18 + ctx.pipeline().remove(this);
  19 + promise.setSuccess(ctx.channel());
  20 + }
  21 +
  22 + @Override
  23 + public void exceptionCaught(ChannelHandlerContext ctx, Throwable throwable) {
  24 + promise.setFailure(throwable);
  25 + }
  26 +}
  1 +package cc.leevi.common.socks5proxy;
  2 +
  3 +import io.netty.buffer.ByteBuf;
  4 +
  5 +public class HttpProxyRequestHead {
  6 + private String host;
  7 + private int port;
  8 + private String proxyType;//tunnel or web
  9 + private String protocolVersion;
  10 +
  11 + private ByteBuf byteBuf;
  12 +
  13 + public HttpProxyRequestHead(String host, int port, String proxyType, String protocolVersion, ByteBuf byteBuf) {
  14 + this.host = host;
  15 + this.port = port;
  16 + this.proxyType = proxyType;
  17 + this.protocolVersion = protocolVersion;
  18 + this.byteBuf = byteBuf;
  19 + }
  20 +
  21 + public String getHost() {
  22 + return host;
  23 + }
  24 +
  25 + public void setHost(String host) {
  26 + this.host = host;
  27 + }
  28 +
  29 + public int getPort() {
  30 + return port;
  31 + }
  32 +
  33 + public void setPort(int port) {
  34 + this.port = port;
  35 + }
  36 +
  37 + public String getProxyType() {
  38 + return proxyType;
  39 + }
  40 +
  41 + public void setProxyType(String proxyType) {
  42 + this.proxyType = proxyType;
  43 + }
  44 +
  45 + public ByteBuf getByteBuf() {
  46 + return byteBuf;
  47 + }
  48 +
  49 + public void setByteBuf(ByteBuf byteBuf) {
  50 + this.byteBuf = byteBuf;
  51 + }
  52 +
  53 + public String getProtocolVersion() {
  54 + return protocolVersion;
  55 + }
  56 +
  57 + public void setProtocolVersion(String protocolVersion) {
  58 + this.protocolVersion = protocolVersion;
  59 + }
  60 +}
  1 +/*
  2 + * Copyright 2012 The Netty Project
  3 + *
  4 + * The Netty Project licenses this file to you under the Apache License,
  5 + * version 2.0 (the "License"); you may not use this file except in compliance
  6 + * with the License. You may obtain a copy of the License at:
  7 + *
  8 + * https://www.apache.org/licenses/LICENSE-2.0
  9 + *
  10 + * Unless required by applicable law or agreed to in writing, software
  11 + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12 + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13 + * License for the specific language governing permissions and limitations
  14 + * under the License.
  15 + */
  16 +package cc.leevi.common.socks5proxy;
  17 +
  18 +import io.netty.bootstrap.Bootstrap;
  19 +import io.netty.buffer.Unpooled;
  20 +import io.netty.channel.*;
  21 +import io.netty.channel.socket.nio.NioSocketChannel;
  22 +import io.netty.util.concurrent.Future;
  23 +import io.netty.util.concurrent.FutureListener;
  24 +import io.netty.util.concurrent.Promise;
  25 +
  26 +@ChannelHandler.Sharable
  27 +public final class HttpServerConnectHandler extends SimpleChannelInboundHandler<HttpProxyRequestHead> {
  28 +
  29 + private final Bootstrap b = new Bootstrap();
  30 +
  31 + @Override
  32 + public void channelRead0(final ChannelHandlerContext ctx, final HttpProxyRequestHead requestHead) throws Exception {
  33 +
  34 +
  35 + Promise<Channel> promise = ctx.executor().newPromise();
  36 + final Channel inboundChannel = ctx.channel();
  37 + promise.addListener(
  38 + new FutureListener<Channel>() {
  39 + @Override
  40 + public void operationComplete(final Future<Channel> future) throws Exception {
  41 + final Channel outboundChannel = future.getNow();
  42 + if (future.isSuccess()) {
  43 + ChannelFuture responseFuture;
  44 + if("TUNNEL".equals(requestHead.getProxyType())){
  45 + responseFuture = inboundChannel.writeAndFlush(Unpooled.wrappedBuffer((requestHead.getProtocolVersion() + " 200 Connection Established\r\n\r\n").getBytes()));
  46 + }else if("WEB".equals(requestHead.getProxyType())){
  47 + responseFuture = outboundChannel.writeAndFlush(requestHead.getByteBuf());
  48 + }else{
  49 + MixinServerUtils.closeOnFlush(inboundChannel);
  50 + return;
  51 + }
  52 + responseFuture.addListener(new ChannelFutureListener() {
  53 + @Override
  54 + public void operationComplete(ChannelFuture channelFuture) {
  55 + ctx.pipeline().remove(HttpServerConnectHandler.this);
  56 + outboundChannel.pipeline().addLast(new RelayHandler(inboundChannel));
  57 + ctx.pipeline().addLast(new RelayHandler(outboundChannel));
  58 + }
  59 + });
  60 + } else {
  61 + MixinServerUtils.closeOnFlush(inboundChannel);
  62 + }
  63 + }
  64 + });
  65 +
  66 + b.group(inboundChannel.eventLoop())
  67 + .channel(NioSocketChannel.class)
  68 + .option(ChannelOption.CONNECT_TIMEOUT_MILLIS, 10000)
  69 + .option(ChannelOption.SO_KEEPALIVE, true)
  70 + .handler(new DirectClientHandler(promise));
  71 +
  72 + b.connect(requestHead.getHost(), requestHead.getPort()).addListener(new ChannelFutureListener() {
  73 + @Override
  74 + public void operationComplete(ChannelFuture future) throws Exception {
  75 + if (future.isSuccess()) {
  76 + // Connection established use handler provided results
  77 + } else {
  78 + // Close the connection if the connection attempt has failed.
  79 + MixinServerUtils.closeOnFlush(inboundChannel);
  80 + }
  81 + }
  82 + });
  83 + }
  84 +
  85 + @Override
  86 + public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
  87 + MixinServerUtils.closeOnFlush(ctx.channel());
  88 + }
  89 +}
  1 +package cc.leevi.common.socks5proxy;
  2 +
  3 +import com.google.common.net.HostAndPort;
  4 +import io.netty.buffer.ByteBuf;
  5 +import io.netty.channel.ChannelHandlerContext;
  6 +import io.netty.channel.SimpleChannelInboundHandler;
  7 +import io.netty.handler.codec.http.HttpConstants;
  8 +import io.netty.handler.codec.http.HttpMethod;
  9 +import io.netty.util.ByteProcessor;
  10 +import io.netty.util.internal.AppendableCharSequence;
  11 +
  12 +import java.net.URL;
  13 +
  14 +public class HttpServerHeadDecoder extends SimpleChannelInboundHandler<ByteBuf> {
  15 +
  16 + private HeadLineByteProcessor headLineByteProcessor = new HeadLineByteProcessor();
  17 +
  18 + private
  19 +
  20 + class HeadLineByteProcessor implements ByteProcessor{
  21 + private AppendableCharSequence seq;
  22 +
  23 + public HeadLineByteProcessor() {
  24 + this.seq = new AppendableCharSequence(4096);
  25 + }
  26 +
  27 + public AppendableCharSequence parse(ByteBuf buffer) {
  28 + seq.reset();
  29 + int i = buffer.forEachByte(this);
  30 + if (i == -1) {
  31 + return null;
  32 + }
  33 + buffer.readerIndex(i + 1);
  34 + return seq;
  35 + }
  36 +
  37 + @Override
  38 + public boolean process(byte value) throws Exception {
  39 + char nextByte = (char) (value & 0xFF);
  40 + if (nextByte == HttpConstants.LF) {
  41 + int len = seq.length();
  42 + if (len >= 1 && seq.charAtUnsafe(len - 1) == HttpConstants.CR) {
  43 + seq.append(nextByte);
  44 + }
  45 + return false;
  46 + }
  47 + //continue loop byte
  48 + seq.append(nextByte);
  49 + return true;
  50 + }
  51 + }
  52 +
  53 + @Override
  54 + protected void channelRead0(ChannelHandlerContext ctx, ByteBuf in) throws Exception {
  55 + AppendableCharSequence seq = headLineByteProcessor.parse(in);
  56 + if(seq.charAt(seq.length()-1) == HttpConstants.LF){
  57 + HttpProxyRequestHead httpProxyRequestHead;
  58 + String[] splitInitialLine = splitInitialLine(seq);
  59 + String method = splitInitialLine[0];
  60 + String uri = splitInitialLine[1];
  61 + String protocolVersion = splitInitialLine[2];
  62 + String host;
  63 + int port;
  64 + if(HttpMethod.CONNECT.name().equals(method)){
  65 + //https tunnel proxy
  66 + HostAndPort hostAndPort = HostAndPort.fromString(uri);
  67 + host = hostAndPort.getHost();
  68 + port = hostAndPort.getPort();
  69 +
  70 + httpProxyRequestHead = new HttpProxyRequestHead(host, port, "TUNNEL",protocolVersion,null);
  71 + }else{
  72 + //http proxy
  73 + URL url = new URL(uri);
  74 + host = url.getHost();
  75 + port = url.getPort();
  76 + if(port == -1){
  77 + port = 80;
  78 + }
  79 +
  80 + httpProxyRequestHead = new HttpProxyRequestHead(host, port, protocolVersion,"WEB",in.resetReaderIndex());
  81 + }
  82 + ctx.pipeline().addLast(new HttpServerConnectHandler()).remove(this);
  83 + ctx.fireChannelRead(httpProxyRequestHead);
  84 + }
  85 + }
  86 +
  87 + private static String[] splitInitialLine(AppendableCharSequence sb) {
  88 + int aStart;
  89 + int aEnd;
  90 + int bStart;
  91 + int bEnd;
  92 + int cStart;
  93 + int cEnd;
  94 +
  95 + aStart = findNonSPLenient(sb, 0);
  96 + aEnd = findSPLenient(sb, aStart);
  97 +
  98 + bStart = findNonSPLenient(sb, aEnd);
  99 + bEnd = findSPLenient(sb, bStart);
  100 +
  101 + cStart = findNonSPLenient(sb, bEnd);
  102 + cEnd = findEndOfString(sb);
  103 +
  104 + return new String[] {
  105 + sb.subStringUnsafe(aStart, aEnd),
  106 + sb.subStringUnsafe(bStart, bEnd),
  107 + cStart < cEnd? sb.subStringUnsafe(cStart, cEnd) : "" };
  108 + }
  109 +
  110 + private static int findNonSPLenient(AppendableCharSequence sb, int offset) {
  111 + for (int result = offset; result < sb.length(); ++result) {
  112 + char c = sb.charAtUnsafe(result);
  113 + // See https://tools.ietf.org/html/rfc7230#section-3.5
  114 + if (isSPLenient(c)) {
  115 + continue;
  116 + }
  117 + if (Character.isWhitespace(c)) {
  118 + // Any other whitespace delimiter is invalid
  119 + throw new IllegalArgumentException("Invalid separator");
  120 + }
  121 + return result;
  122 + }
  123 + return sb.length();
  124 + }
  125 +
  126 + private static int findSPLenient(AppendableCharSequence sb, int offset) {
  127 + for (int result = offset; result < sb.length(); ++result) {
  128 + if (isSPLenient(sb.charAtUnsafe(result))) {
  129 + return result;
  130 + }
  131 + }
  132 + return sb.length();
  133 + }
  134 +
  135 + private static boolean isSPLenient(char c) {
  136 + // See https://tools.ietf.org/html/rfc7230#section-3.5
  137 + return c == ' ' || c == (char) 0x09 || c == (char) 0x0B || c == (char) 0x0C || c == (char) 0x0D;
  138 + }
  139 +
  140 + private static int findNonWhitespace(AppendableCharSequence sb, int offset, boolean validateOWS) {
  141 + for (int result = offset; result < sb.length(); ++result) {
  142 + char c = sb.charAtUnsafe(result);
  143 + if (!Character.isWhitespace(c)) {
  144 + return result;
  145 + } else if (validateOWS && !isOWS(c)) {
  146 + // Only OWS is supported for whitespace
  147 + throw new IllegalArgumentException("Invalid separator, only a single space or horizontal tab allowed," +
  148 + " but received a '" + c + "'");
  149 + }
  150 + }
  151 + return sb.length();
  152 + }
  153 +
  154 + private static int findEndOfString(AppendableCharSequence sb) {
  155 + for (int result = sb.length() - 1; result > 0; --result) {
  156 + if (!Character.isWhitespace(sb.charAtUnsafe(result))) {
  157 + return result + 1;
  158 + }
  159 + }
  160 + return 0;
  161 + }
  162 +
  163 + private static boolean isOWS(char ch) {
  164 + return ch == ' ' || ch == (char) 0x09;
  165 + }
  166 +
  167 +}
  1 +package cc.leevi.common.socks5proxy;
  2 +
  3 +import io.netty.bootstrap.ServerBootstrap;
  4 +import io.netty.channel.Channel;
  5 +import io.netty.channel.EventLoopGroup;
  6 +import io.netty.channel.nio.NioEventLoopGroup;
  7 +import io.netty.channel.socket.nio.NioServerSocketChannel;
  8 +import org.slf4j.Logger;
  9 +import org.slf4j.LoggerFactory;
  10 +
  11 +public class MixinProxyServer {
  12 + private Logger logger = LoggerFactory.getLogger(MixinProxyServer.class);
  13 +
  14 + private ServerBootstrap serverBootstrap;
  15 +
  16 + private EventLoopGroup serverEventLoopGroup;
  17 +
  18 + private Channel acceptorChannel;
  19 +
  20 + public void startServer(){
  21 + logger.info("Proxy Server starting...");
  22 +
  23 + serverEventLoopGroup = new NioEventLoopGroup(4);
  24 +
  25 + serverBootstrap = new ServerBootstrap()
  26 + .channel(NioServerSocketChannel.class)
  27 + .childHandler(new MixinServerInitializer())
  28 + .group(serverEventLoopGroup);
  29 + acceptorChannel = serverBootstrap.bind(8065).syncUninterruptibly().channel();
  30 + }
  31 +
  32 + public void shutdown(){
  33 + logger.info("Proxy Server shutting down...");
  34 + acceptorChannel.close().syncUninterruptibly();
  35 + serverEventLoopGroup.shutdownGracefully().syncUninterruptibly();
  36 + logger.info("shutdown completed!");
  37 + }
  38 +}
  1 +package cc.leevi.common.socks5proxy;
  2 +
  3 +import io.netty.buffer.ByteBuf;
  4 +import io.netty.channel.ChannelHandlerContext;
  5 +import io.netty.channel.ChannelPipeline;
  6 +import io.netty.channel.SimpleChannelInboundHandler;
  7 +import io.netty.handler.codec.socksx.SocksPortUnificationServerHandler;
  8 +import io.netty.handler.codec.socksx.SocksVersion;
  9 +
  10 +public class MixinSelectHandler extends SimpleChannelInboundHandler<ByteBuf> {
  11 +
  12 + @Override
  13 + protected void channelRead0(ChannelHandlerContext ctx, ByteBuf msg) {
  14 + final int readerIndex = msg.readerIndex();
  15 + if (msg.writerIndex() == readerIndex) {
  16 + return;
  17 + }
  18 +
  19 + ChannelPipeline p = ctx.pipeline();
  20 + final byte versionVal = msg.getByte(readerIndex);
  21 +
  22 + SocksVersion version = SocksVersion.valueOf(versionVal);
  23 + if(version.equals(SocksVersion.SOCKS4a) || version.equals(SocksVersion.SOCKS5)){
  24 + //socks proxy
  25 + p.addLast(new SocksPortUnificationServerHandler(),
  26 + SocksServerHandler.INSTANCE).remove(this);
  27 + }else{
  28 + //http/tunnel proxy
  29 + p.addLast(new HttpServerHeadDecoder()).remove(this);
  30 + }
  31 + msg.retain();
  32 + ctx.fireChannelRead(msg);
  33 + }
  34 +}
  1 +package cc.leevi.common.socks5proxy;
  2 +
  3 +import io.netty.channel.ChannelInitializer;
  4 +import io.netty.channel.socket.SocketChannel;
  5 +import io.netty.handler.codec.socksx.SocksPortUnificationServerHandler;
  6 +import io.netty.handler.logging.LogLevel;
  7 +import io.netty.handler.logging.LoggingHandler;
  8 +
  9 +public final class MixinServerInitializer extends ChannelInitializer<SocketChannel> {
  10 + @Override
  11 + public void initChannel(SocketChannel ch) throws Exception {
  12 + ch.pipeline().addLast(
  13 + new LoggingHandler(LogLevel.DEBUG),
  14 + new MixinSelectHandler());
  15 + }
  16 +}
  1 +package cc.leevi.common.socks5proxy;
  2 +
  3 +
  4 +import io.netty.buffer.Unpooled;
  5 +import io.netty.channel.Channel;
  6 +import io.netty.channel.ChannelFutureListener;
  7 +
  8 +public final class MixinServerUtils {
  9 +
  10 + /**
  11 + * Closes the specified channel after all queued write requests are flushed.
  12 + */
  13 + public static void closeOnFlush(Channel ch) {
  14 + if (ch.isActive()) {
  15 + ch.writeAndFlush(Unpooled.EMPTY_BUFFER).addListener(ChannelFutureListener.CLOSE);
  16 + }
  17 + }
  18 +
  19 + private MixinServerUtils() { }
  20 +}
  1 +package cc.leevi.common.socks5proxy;
  2 +
  3 +import io.netty.buffer.Unpooled;
  4 +import io.netty.channel.Channel;
  5 +import io.netty.channel.ChannelFutureListener;
  6 +import io.netty.channel.ChannelHandlerContext;
  7 +import io.netty.channel.ChannelInboundHandlerAdapter;
  8 +import io.netty.util.ReferenceCountUtil;
  9 +
  10 +public class RelayHandler extends ChannelInboundHandlerAdapter {
  11 + private final Channel relayChannel;
  12 +
  13 + public RelayHandler(Channel relayChannel) {
  14 + this.relayChannel = relayChannel;
  15 + }
  16 +
  17 + @Override
  18 + public void channelActive(ChannelHandlerContext ctx) {
  19 + ctx.writeAndFlush(Unpooled.EMPTY_BUFFER);
  20 + }
  21 +
  22 + @Override
  23 + public void channelRead(ChannelHandlerContext ctx, Object msg) {
  24 + if (relayChannel.isActive()) {
  25 + relayChannel.writeAndFlush(msg);
  26 + } else {
  27 + ReferenceCountUtil.release(msg);
  28 + }
  29 + }
  30 +
  31 + @Override
  32 + public void channelInactive(ChannelHandlerContext ctx) {
  33 + if (relayChannel.isActive()) {
  34 + relayChannel.writeAndFlush(Unpooled.EMPTY_BUFFER).addListener(ChannelFutureListener.CLOSE);
  35 + }
  36 + }
  37 +
  38 + @Override
  39 + public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) {
  40 + cause.printStackTrace();
  41 + ctx.close();
  42 + }
  43 +}
  1 +/*
  2 + * Copyright 2012 The Netty Project
  3 + *
  4 + * The Netty Project licenses this file to you under the Apache License,
  5 + * version 2.0 (the "License"); you may not use this file except in compliance
  6 + * with the License. You may obtain a copy of the License at:
  7 + *
  8 + * https://www.apache.org/licenses/LICENSE-2.0
  9 + *
  10 + * Unless required by applicable law or agreed to in writing, software
  11 + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12 + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13 + * License for the specific language governing permissions and limitations
  14 + * under the License.
  15 + */
  16 +package cc.leevi.common.socks5proxy;
  17 +
  18 +import io.netty.bootstrap.Bootstrap;
  19 +import io.netty.channel.*;
  20 +import io.netty.channel.socket.nio.NioSocketChannel;
  21 +import io.netty.handler.codec.socksx.SocksMessage;
  22 +import io.netty.handler.codec.socksx.v5.DefaultSocks5CommandResponse;
  23 +import io.netty.handler.codec.socksx.v5.Socks5CommandRequest;
  24 +import io.netty.handler.codec.socksx.v5.Socks5CommandStatus;
  25 +import io.netty.util.concurrent.Future;
  26 +import io.netty.util.concurrent.FutureListener;
  27 +import io.netty.util.concurrent.Promise;
  28 +
  29 +@ChannelHandler.Sharable
  30 +public final class SocksServerConnectHandler extends SimpleChannelInboundHandler<SocksMessage> {
  31 +
  32 + private final Bootstrap b = new Bootstrap();
  33 +
  34 + @Override
  35 + public void channelRead0(final ChannelHandlerContext ctx, final SocksMessage message) throws Exception {
  36 + final Socks5CommandRequest request = (Socks5CommandRequest) message;
  37 +
  38 + Promise<Channel> promise = ctx.executor().newPromise();
  39 + promise.addListener(
  40 + new FutureListener<Channel>() {
  41 + @Override
  42 + public void operationComplete(final Future<Channel> future) throws Exception {
  43 + final Channel outboundChannel = future.getNow();
  44 + if (future.isSuccess()) {
  45 + ChannelFuture responseFuture =
  46 + ctx.channel().writeAndFlush(new DefaultSocks5CommandResponse(
  47 + Socks5CommandStatus.SUCCESS,
  48 + request.dstAddrType(),
  49 + request.dstAddr(),
  50 + request.dstPort()));
  51 +
  52 + responseFuture.addListener(new ChannelFutureListener() {
  53 + @Override
  54 + public void operationComplete(ChannelFuture channelFuture) {
  55 + ctx.pipeline().remove(SocksServerConnectHandler.this);
  56 + outboundChannel.pipeline().addLast(new RelayHandler(ctx.channel()));
  57 + ctx.pipeline().addLast(new RelayHandler(outboundChannel));
  58 + }
  59 + });
  60 + } else {
  61 + ctx.channel().writeAndFlush(new DefaultSocks5CommandResponse(
  62 + Socks5CommandStatus.FAILURE, request.dstAddrType()));
  63 + MixinServerUtils.closeOnFlush(ctx.channel());
  64 + }
  65 + }
  66 + });
  67 +
  68 + final Channel inboundChannel = ctx.channel();
  69 + b.group(inboundChannel.eventLoop())
  70 + .channel(NioSocketChannel.class)
  71 + .option(ChannelOption.CONNECT_TIMEOUT_MILLIS, 10000)
  72 + .option(ChannelOption.SO_KEEPALIVE, true)
  73 + .handler(new DirectClientHandler(promise));
  74 +
  75 + b.connect(request.dstAddr(), request.dstPort()).addListener(new ChannelFutureListener() {
  76 + @Override
  77 + public void operationComplete(ChannelFuture future) throws Exception {
  78 + if (future.isSuccess()) {
  79 + // Connection established use handler provided results
  80 + } else {
  81 + // Close the connection if the connection attempt has failed.
  82 + ctx.channel().writeAndFlush(
  83 + new DefaultSocks5CommandResponse(Socks5CommandStatus.FAILURE, request.dstAddrType()));
  84 + MixinServerUtils.closeOnFlush(ctx.channel());
  85 + }
  86 + }
  87 + });
  88 + }
  89 +
  90 + @Override
  91 + public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
  92 + MixinServerUtils.closeOnFlush(ctx.channel());
  93 + }
  94 +}
  1 +/*
  2 + * Copyright 2012 The Netty Project
  3 + *
  4 + * The Netty Project licenses this file to you under the Apache License,
  5 + * version 2.0 (the "License"); you may not use this file except in compliance
  6 + * with the License. You may obtain a copy of the License at:
  7 + *
  8 + * https://www.apache.org/licenses/LICENSE-2.0
  9 + *
  10 + * Unless required by applicable law or agreed to in writing, software
  11 + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12 + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13 + * License for the specific language governing permissions and limitations
  14 + * under the License.
  15 + */
  16 +package cc.leevi.common.socks5proxy;
  17 +
  18 +import io.netty.buffer.Unpooled;
  19 +import io.netty.channel.ChannelHandler;
  20 +import io.netty.channel.ChannelHandlerContext;
  21 +import io.netty.channel.SimpleChannelInboundHandler;
  22 +import io.netty.handler.codec.socksx.SocksMessage;
  23 +import io.netty.handler.codec.socksx.SocksVersion;
  24 +import io.netty.handler.codec.socksx.v5.*;
  25 +import org.slf4j.Logger;
  26 +import org.slf4j.LoggerFactory;
  27 +
  28 +@ChannelHandler.Sharable
  29 +public final class SocksServerHandler extends SimpleChannelInboundHandler<SocksMessage> {
  30 +
  31 + private Logger logger = LoggerFactory.getLogger(SocksServerHandler.class);
  32 +
  33 + public static final SocksServerHandler INSTANCE = new SocksServerHandler();
  34 +
  35 + private SocksServerHandler() { }
  36 +
  37 + @Override
  38 + public void channelRead0(ChannelHandlerContext ctx, SocksMessage socksRequest) throws Exception {
  39 + if(!socksRequest.version().equals(SocksVersion.SOCKS5)){
  40 + logger.error("only supports socks5 protocol!");
  41 + ctx.writeAndFlush(Unpooled.wrappedBuffer("protocol version illegal!".getBytes()));
  42 + return ;
  43 + }
  44 + if (socksRequest instanceof Socks5InitialRequest) {
  45 + ctx.pipeline().addFirst(new Socks5CommandRequestDecoder());
  46 + ctx.write(new DefaultSocks5InitialResponse(Socks5AuthMethod.NO_AUTH));
  47 + //如果需要密码,这里可以换成
  48 +// ctx.write(new DefaultSocks5InitialResponse(Socks5AuthMethod.PASSWORD));
  49 + } else if (socksRequest instanceof Socks5PasswordAuthRequest) {
  50 + //如果需要密码,这里需要验证密码
  51 + ctx.pipeline().addFirst(new Socks5CommandRequestDecoder());
  52 + ctx.write(new DefaultSocks5PasswordAuthResponse(Socks5PasswordAuthStatus.SUCCESS));
  53 + } else if (socksRequest instanceof Socks5CommandRequest) {
  54 + Socks5CommandRequest socks5CmdRequest = (Socks5CommandRequest) socksRequest;
  55 + if (socks5CmdRequest.type() == Socks5CommandType.CONNECT) {
  56 + ctx.pipeline().addLast(new SocksServerConnectHandler());
  57 + ctx.pipeline().remove(this);
  58 + ctx.fireChannelRead(socksRequest);
  59 + } else {
  60 + ctx.close();
  61 + }
  62 + } else {
  63 + ctx.close();
  64 + }
  65 + }
  66 +
  67 + @Override
  68 + public void channelReadComplete(ChannelHandlerContext ctx) {
  69 + ctx.flush();
  70 + }
  71 +
  72 + @Override
  73 + public void exceptionCaught(ChannelHandlerContext ctx, Throwable throwable) {
  74 + logger.error("exceptionCaught",throwable);
  75 + MixinServerUtils.closeOnFlush(ctx.channel());
  76 + }
  77 +}
  1 +package cc.leevi.common.socks5proxy;
  2 +
  3 +import org.junit.Before;
  4 +import org.junit.Test;
  5 +
  6 +import java.io.IOException;
  7 +
  8 +public class MixinProxyServerTest {
  9 +
  10 + @Before
  11 + public void setUp() throws Exception {
  12 + }
  13 +
  14 + @Test
  15 + public void startServer() throws IOException {
  16 + MixinProxyServer mixinProxyServer = new MixinProxyServer();
  17 + mixinProxyServer.startServer();
  18 + System.in.read();
  19 + }
  20 +}
  1 +<?xml version="1.0" encoding="UTF-8"?>
  2 +<Configuration status="error">
  3 + <Appenders>
  4 + <Console name="Console" target="SYSTEM_OUT">
  5 + <PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
  6 + </Console>
  7 + </Appenders>
  8 + <Loggers>
  9 + <Root level="TRACE">
  10 + <AppenderRef ref="Console" />
  11 + </Root>
  12 + </Loggers>
  13 +</Configuration>
  1 +<?xml version="1.0" encoding="UTF-8"?>
  2 +<project xmlns="http://maven.apache.org/POM/4.0.0"
  3 + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 + <modelVersion>4.0.0</modelVersion>
  6 +
  7 + <groupId>cc.leevi.common</groupId>
  8 + <artifactId>netty-proxy-server</artifactId>
  9 + <packaging>pom</packaging>
  10 + <version>1.0-SNAPSHOT</version>
  11 + <modules>
  12 + <module>http-proxy</module>
  13 + <module>socks5-proxy</module>
  14 + <module>mixin-proxy</module>
  15 + </modules>
  16 +
  17 + <dependencyManagement>
  18 + <dependencies>
  19 + <dependency>
  20 + <groupId>io.netty</groupId>
  21 + <artifactId>netty-all</artifactId>
  22 + <version>4.1.54.Final</version>
  23 + </dependency>
  24 + <dependency>
  25 + <groupId>org.apache.commons</groupId>
  26 + <artifactId>commons-lang3</artifactId>
  27 + <version>3.11</version>
  28 + </dependency>
  29 + <dependency>
  30 + <groupId>org.slf4j</groupId>
  31 + <artifactId>slf4j-api</artifactId>
  32 + <version>1.7.30</version>
  33 + </dependency>
  34 + <dependency>
  35 + <groupId>org.apache.logging.log4j</groupId>
  36 + <artifactId>log4j-core</artifactId>
  37 + <version>2.14.0</version>
  38 + </dependency>
  39 + <dependency>
  40 + <groupId>org.apache.logging.log4j</groupId>
  41 + <artifactId>log4j-slf4j-impl</artifactId>
  42 + <version>2.14.0</version>
  43 + <scope>test</scope>
  44 + </dependency>
  45 + <dependency>
  46 + <groupId>junit</groupId>
  47 + <artifactId>junit</artifactId>
  48 + <version>4.13.1</version>
  49 + <scope>test</scope>
  50 + </dependency>
  51 + <dependency>
  52 + <groupId>com.google.guava</groupId>
  53 + <artifactId>guava</artifactId>
  54 + <version>30.0-jre</version>
  55 + </dependency>
  56 +
  57 +
  58 + </dependencies>
  59 + </dependencyManagement>
  60 +
  61 +</project>
  1 +<?xml version="1.0" encoding="UTF-8"?>
  2 +
  3 +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 + <modelVersion>4.0.0</modelVersion>
  6 +
  7 +
  8 + <parent>
  9 + <artifactId>netty-proxy-server</artifactId>
  10 + <groupId>cc.leevi.common</groupId>
  11 + <version>1.0-SNAPSHOT</version>
  12 + </parent>
  13 +
  14 + <artifactId>socks5-proxy</artifactId>
  15 +
  16 + <name>socks5-proxy</name>
  17 +
  18 + <properties>
  19 + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  20 + <maven.compiler.source>1.7</maven.compiler.source>
  21 + <maven.compiler.target>1.7</maven.compiler.target>
  22 + </properties>
  23 +
  24 + <dependencies>
  25 + <dependency>
  26 + <groupId>io.netty</groupId>
  27 + <artifactId>netty-all</artifactId>
  28 + </dependency>
  29 + <dependency>
  30 + <groupId>org.apache.commons</groupId>
  31 + <artifactId>commons-lang3</artifactId>
  32 + </dependency>
  33 + <dependency>
  34 + <groupId>org.slf4j</groupId>
  35 + <artifactId>slf4j-api</artifactId>
  36 + </dependency>
  37 + <dependency>
  38 + <groupId>junit</groupId>
  39 + <artifactId>junit</artifactId>
  40 + <version>4.13.1</version>
  41 + <scope>test</scope>
  42 + </dependency>
  43 + <dependency>
  44 + <groupId>org.apache.logging.log4j</groupId>
  45 + <artifactId>log4j-core</artifactId>
  46 + <version>2.14.0</version>
  47 + <scope>test</scope>
  48 + </dependency>
  49 + <dependency>
  50 + <groupId>org.apache.logging.log4j</groupId>
  51 + <artifactId>log4j-slf4j-impl</artifactId>
  52 + <version>2.14.0</version>
  53 + <scope>test</scope>
  54 + </dependency>
  55 + <dependency>
  56 + <groupId>com.google.guava</groupId>
  57 + <artifactId>guava</artifactId>
  58 + <version>30.0-jre</version>
  59 + </dependency>
  60 + </dependencies>
  61 +
  62 +</project>
  1 +package cc.leevi.common.socks5proxy;
  2 +
  3 +import io.netty.channel.Channel;
  4 +import io.netty.channel.ChannelHandlerContext;
  5 +import io.netty.channel.ChannelInboundHandlerAdapter;
  6 +import io.netty.util.concurrent.Promise;
  7 +
  8 +public final class DirectClientHandler extends ChannelInboundHandlerAdapter {
  9 +
  10 + private final Promise<Channel> promise;
  11 +
  12 + public DirectClientHandler(Promise<Channel> promise) {
  13 + this.promise = promise;
  14 + }
  15 +
  16 + @Override
  17 + public void channelActive(ChannelHandlerContext ctx) {
  18 + ctx.pipeline().remove(this);
  19 + promise.setSuccess(ctx.channel());
  20 + }
  21 +
  22 + @Override
  23 + public void exceptionCaught(ChannelHandlerContext ctx, Throwable throwable) {
  24 + promise.setFailure(throwable);
  25 + }
  26 +}
  1 +package cc.leevi.common.socks5proxy;
  2 +
  3 +import io.netty.buffer.Unpooled;
  4 +import io.netty.channel.Channel;
  5 +import io.netty.channel.ChannelFutureListener;
  6 +import io.netty.channel.ChannelHandlerContext;
  7 +import io.netty.channel.ChannelInboundHandlerAdapter;
  8 +import io.netty.util.ReferenceCountUtil;
  9 +
  10 +public class RelayHandler extends ChannelInboundHandlerAdapter {
  11 + private final Channel relayChannel;
  12 +
  13 + public RelayHandler(Channel relayChannel) {
  14 + this.relayChannel = relayChannel;
  15 + }
  16 +
  17 + @Override
  18 + public void channelActive(ChannelHandlerContext ctx) {
  19 + ctx.writeAndFlush(Unpooled.EMPTY_BUFFER);
  20 + }
  21 +
  22 + @Override
  23 + public void channelRead(ChannelHandlerContext ctx, Object msg) {
  24 + if (relayChannel.isActive()) {
  25 + relayChannel.writeAndFlush(msg);
  26 + } else {
  27 + ReferenceCountUtil.release(msg);
  28 + }
  29 + }
  30 +
  31 + @Override
  32 + public void channelInactive(ChannelHandlerContext ctx) {
  33 + if (relayChannel.isActive()) {
  34 + relayChannel.writeAndFlush(Unpooled.EMPTY_BUFFER).addListener(ChannelFutureListener.CLOSE);
  35 + }
  36 + }
  37 +
  38 + @Override
  39 + public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) {
  40 + cause.printStackTrace();
  41 + ctx.close();
  42 + }
  43 +}
  1 +package cc.leevi.common.socks5proxy;
  2 +
  3 +import io.netty.bootstrap.ServerBootstrap;
  4 +import io.netty.channel.Channel;
  5 +import io.netty.channel.EventLoopGroup;
  6 +import io.netty.channel.nio.NioEventLoopGroup;
  7 +import io.netty.channel.socket.nio.NioServerSocketChannel;
  8 +import org.slf4j.Logger;
  9 +import org.slf4j.LoggerFactory;
  10 +
  11 +public class Socks5ProxyServer {
  12 + private Logger logger = LoggerFactory.getLogger(Socks5ProxyServer.class);
  13 +
  14 + private ServerBootstrap serverBootstrap;
  15 +
  16 + private EventLoopGroup serverEventLoopGroup;
  17 +
  18 + private Channel acceptorChannel;
  19 +
  20 + public void startServer(){
  21 + logger.info("Proxy Server starting...");
  22 +
  23 + serverEventLoopGroup = new NioEventLoopGroup(4);
  24 +
  25 + serverBootstrap = new ServerBootstrap()
  26 + .channel(NioServerSocketChannel.class)
  27 + .childHandler(new SocksServerInitializer())
  28 + .group(serverEventLoopGroup);
  29 + acceptorChannel = serverBootstrap.bind(1080).syncUninterruptibly().channel();
  30 + }
  31 +
  32 + public void shutdown(){
  33 + logger.info("Proxy Server shutting down...");
  34 + acceptorChannel.close().syncUninterruptibly();
  35 + serverEventLoopGroup.shutdownGracefully().syncUninterruptibly();
  36 + logger.info("shutdown completed!");
  37 + }
  38 +}
  1 +/*
  2 + * Copyright 2012 The Netty Project
  3 + *
  4 + * The Netty Project licenses this file to you under the Apache License,
  5 + * version 2.0 (the "License"); you may not use this file except in compliance
  6 + * with the License. You may obtain a copy of the License at:
  7 + *
  8 + * https://www.apache.org/licenses/LICENSE-2.0
  9 + *
  10 + * Unless required by applicable law or agreed to in writing, software
  11 + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12 + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13 + * License for the specific language governing permissions and limitations
  14 + * under the License.
  15 + */
  16 +package cc.leevi.common.socks5proxy;
  17 +
  18 +import io.netty.bootstrap.Bootstrap;
  19 +import io.netty.buffer.Unpooled;
  20 +import io.netty.channel.Channel;
  21 +import io.netty.channel.ChannelFuture;
  22 +import io.netty.channel.ChannelFutureListener;
  23 +import io.netty.channel.ChannelHandler;
  24 +import io.netty.channel.ChannelHandlerContext;
  25 +import io.netty.channel.ChannelOption;
  26 +import io.netty.channel.SimpleChannelInboundHandler;
  27 +import io.netty.channel.socket.nio.NioSocketChannel;
  28 +import io.netty.handler.codec.socksx.SocksMessage;
  29 +import io.netty.handler.codec.socksx.v4.DefaultSocks4CommandResponse;
  30 +import io.netty.handler.codec.socksx.v4.Socks4CommandRequest;
  31 +import io.netty.handler.codec.socksx.v4.Socks4CommandStatus;
  32 +import io.netty.handler.codec.socksx.v5.DefaultSocks5CommandResponse;
  33 +import io.netty.handler.codec.socksx.v5.Socks5CommandRequest;
  34 +import io.netty.handler.codec.socksx.v5.Socks5CommandStatus;
  35 +import io.netty.util.concurrent.Future;
  36 +import io.netty.util.concurrent.FutureListener;
  37 +import io.netty.util.concurrent.Promise;
  38 +
  39 +@ChannelHandler.Sharable
  40 +public final class SocksServerConnectHandler extends SimpleChannelInboundHandler<SocksMessage> {
  41 +
  42 + private final Bootstrap b = new Bootstrap();
  43 +
  44 + @Override
  45 + public void channelRead0(final ChannelHandlerContext ctx, final SocksMessage message) throws Exception {
  46 + final Socks5CommandRequest request = (Socks5CommandRequest) message;
  47 +
  48 + Promise<Channel> promise = ctx.executor().newPromise();
  49 + promise.addListener(
  50 + new FutureListener<Channel>() {
  51 + @Override
  52 + public void operationComplete(final Future<Channel> future) throws Exception {
  53 + final Channel outboundChannel = future.getNow();
  54 + if (future.isSuccess()) {
  55 + ChannelFuture responseFuture =
  56 + ctx.channel().writeAndFlush(new DefaultSocks5CommandResponse(
  57 + Socks5CommandStatus.SUCCESS,
  58 + request.dstAddrType(),
  59 + request.dstAddr(),
  60 + request.dstPort()));
  61 +
  62 + responseFuture.addListener(new ChannelFutureListener() {
  63 + @Override
  64 + public void operationComplete(ChannelFuture channelFuture) {
  65 + ctx.pipeline().remove(SocksServerConnectHandler.this);
  66 + outboundChannel.pipeline().addLast(new RelayHandler(ctx.channel()));
  67 + ctx.pipeline().addLast(new RelayHandler(outboundChannel));
  68 + }
  69 + });
  70 + } else {
  71 + ctx.channel().writeAndFlush(new DefaultSocks5CommandResponse(
  72 + Socks5CommandStatus.FAILURE, request.dstAddrType()));
  73 + SocksServerUtils.closeOnFlush(ctx.channel());
  74 + }
  75 + }
  76 + });
  77 +
  78 + final Channel inboundChannel = ctx.channel();
  79 + b.group(inboundChannel.eventLoop())
  80 + .channel(NioSocketChannel.class)
  81 + .option(ChannelOption.CONNECT_TIMEOUT_MILLIS, 10000)
  82 + .option(ChannelOption.SO_KEEPALIVE, true)
  83 + .handler(new DirectClientHandler(promise));
  84 +
  85 + b.connect(request.dstAddr(), request.dstPort()).addListener(new ChannelFutureListener() {
  86 + @Override
  87 + public void operationComplete(ChannelFuture future) throws Exception {
  88 + if (future.isSuccess()) {
  89 + // Connection established use handler provided results
  90 + } else {
  91 + // Close the connection if the connection attempt has failed.
  92 + ctx.channel().writeAndFlush(
  93 + new DefaultSocks5CommandResponse(Socks5CommandStatus.FAILURE, request.dstAddrType()));
  94 + SocksServerUtils.closeOnFlush(ctx.channel());
  95 + }
  96 + }
  97 + });
  98 + }
  99 +
  100 + @Override
  101 + public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
  102 + SocksServerUtils.closeOnFlush(ctx.channel());
  103 + }
  104 +}
  1 +/*
  2 + * Copyright 2012 The Netty Project
  3 + *
  4 + * The Netty Project licenses this file to you under the Apache License,
  5 + * version 2.0 (the "License"); you may not use this file except in compliance
  6 + * with the License. You may obtain a copy of the License at:
  7 + *
  8 + * https://www.apache.org/licenses/LICENSE-2.0
  9 + *
  10 + * Unless required by applicable law or agreed to in writing, software
  11 + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12 + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13 + * License for the specific language governing permissions and limitations
  14 + * under the License.
  15 + */
  16 +package cc.leevi.common.socks5proxy;
  17 +
  18 +import io.netty.buffer.Unpooled;
  19 +import io.netty.channel.ChannelHandler;
  20 +import io.netty.channel.ChannelHandlerContext;
  21 +import io.netty.channel.SimpleChannelInboundHandler;
  22 +import io.netty.handler.codec.socksx.SocksMessage;
  23 +import io.netty.handler.codec.socksx.SocksVersion;
  24 +import io.netty.handler.codec.socksx.v4.Socks4CommandRequest;
  25 +import io.netty.handler.codec.socksx.v4.Socks4CommandType;
  26 +import io.netty.handler.codec.socksx.v5.*;
  27 +import org.slf4j.Logger;
  28 +import org.slf4j.LoggerFactory;
  29 +
  30 +@ChannelHandler.Sharable
  31 +public final class SocksServerHandler extends SimpleChannelInboundHandler<SocksMessage> {
  32 +
  33 + private Logger logger = LoggerFactory.getLogger(SocksServerHandler.class);
  34 +
  35 + public static final SocksServerHandler INSTANCE = new SocksServerHandler();
  36 +
  37 + private SocksServerHandler() { }
  38 +
  39 + @Override
  40 + public void channelRead0(ChannelHandlerContext ctx, SocksMessage socksRequest) throws Exception {
  41 + if(!socksRequest.version().equals(SocksVersion.SOCKS5)){
  42 + logger.error("only supports socks5 protocol!");
  43 + ctx.writeAndFlush(Unpooled.wrappedBuffer("protocol version illegal!".getBytes()));
  44 + return ;
  45 + }
  46 + if (socksRequest instanceof Socks5InitialRequest) {
  47 + ctx.pipeline().addFirst(new Socks5CommandRequestDecoder());
  48 + ctx.write(new DefaultSocks5InitialResponse(Socks5AuthMethod.NO_AUTH));
  49 + //如果需要密码,这里可以换成
  50 +// ctx.write(new DefaultSocks5InitialResponse(Socks5AuthMethod.PASSWORD));
  51 + } else if (socksRequest instanceof Socks5PasswordAuthRequest) {
  52 + //如果需要密码,这里需要验证密码
  53 + ctx.pipeline().addFirst(new Socks5CommandRequestDecoder());
  54 + ctx.write(new DefaultSocks5PasswordAuthResponse(Socks5PasswordAuthStatus.SUCCESS));
  55 + } else if (socksRequest instanceof Socks5CommandRequest) {
  56 + Socks5CommandRequest socks5CmdRequest = (Socks5CommandRequest) socksRequest;
  57 + if (socks5CmdRequest.type() == Socks5CommandType.CONNECT) {
  58 + ctx.pipeline().addLast(new SocksServerConnectHandler());
  59 + ctx.pipeline().remove(this);
  60 + ctx.fireChannelRead(socksRequest);
  61 + } else {
  62 + ctx.close();
  63 + }
  64 + } else {
  65 + ctx.close();
  66 + }
  67 + }
  68 +
  69 + @Override
  70 + public void channelReadComplete(ChannelHandlerContext ctx) {
  71 + ctx.flush();
  72 + }
  73 +
  74 + @Override
  75 + public void exceptionCaught(ChannelHandlerContext ctx, Throwable throwable) {
  76 + logger.error("exceptionCaught",throwable);
  77 + SocksServerUtils.closeOnFlush(ctx.channel());
  78 + }
  79 +}
  1 +package cc.leevi.common.socks5proxy;
  2 +
  3 +import io.netty.channel.ChannelInitializer;
  4 +import io.netty.channel.socket.SocketChannel;
  5 +import io.netty.handler.codec.socksx.SocksPortUnificationServerHandler;
  6 +import io.netty.handler.logging.LogLevel;
  7 +import io.netty.handler.logging.LoggingHandler;
  8 +
  9 +public final class SocksServerInitializer extends ChannelInitializer<SocketChannel> {
  10 + @Override
  11 + public void initChannel(SocketChannel ch) throws Exception {
  12 + ch.pipeline().addLast(
  13 + new LoggingHandler(LogLevel.DEBUG),
  14 + new SocksPortUnificationServerHandler(),
  15 + SocksServerHandler.INSTANCE);
  16 + }
  17 +}
  1 +package cc.leevi.common.socks5proxy;
  2 +
  3 +
  4 +import io.netty.buffer.Unpooled;
  5 +import io.netty.channel.Channel;
  6 +import io.netty.channel.ChannelFutureListener;
  7 +
  8 +public final class SocksServerUtils {
  9 +
  10 + /**
  11 + * Closes the specified channel after all queued write requests are flushed.
  12 + */
  13 + public static void closeOnFlush(Channel ch) {
  14 + if (ch.isActive()) {
  15 + ch.writeAndFlush(Unpooled.EMPTY_BUFFER).addListener(ChannelFutureListener.CLOSE);
  16 + }
  17 + }
  18 +
  19 + private SocksServerUtils() { }
  20 +}