golang模拟新浪微博登录

1.基于幽灵蛛pholcus开源项目的规则

直接贴代码,代码可以更改后用于其他爬虫项目

package pholcus_lib

// 基础包
import (
	// "github.com/henrylee2cn/pholcus/common/goquery"                          //DOM解析
	"github.com/henrylee2cn/pholcus/app/downloader/request" //必需
	. "github.com/henrylee2cn/pholcus/app/spider"           //必需
	// . "github.com/henrylee2cn/pholcus/app/spider/common" //选用
	// "github.com/henrylee2cn/pholcus/logs"
	// net包
	// "net/http" //设置http.Header
	// "net/url"
	// 编码包
	// "encoding/xml"
	//"encoding/json"
	// 字符串处理包
	//"regexp"
	// "strconv"
	// "fmt"
	// "math"

	//"net/http"
	"strconv"
	"regexp"
	"fmt"
	"encoding/json"
	"net/url"
	//"strings"
	//"strings"
	"strings"
	"github.com/henrylee2cn/pholcus/common/goquery"
	//"net/http"
)
////获取unix时间
var millisecond int64
//用户名
var name string
//密码
var password string
//解析json结构体
type (
	Info struct {
		Retcode  int
		Uid string
		Nick string
		CrossDomainUrlList []string
	}
)
func init() {
	FileTest.Register()
	millisecond = getMillisecond()
	name="88888888"
	password="8888888"
	name = encryptUname(name)

}

var FileTest = &Spider{
	Name:        "微博登录测试",
	Description: "测试 [s.weibo.com/user/]",
	Pausetime: 1500,
	Keyin:   KEYIN,
	// Limit:        LIMIT,
	EnableCookie: true,
	RuleTree: &RuleTree{
		Root: func(ctx *Context) {
			//https://weibo.cn/
			ctx.AddQueue(&request.Request{
				Url:          "https://login.sina.com.cn/sso/prelogin.php?entry=account&callback=sinaSSOController.preloginCallBack&su="+name+"&rsakt=mod&client=ssologin.js(v1.4.15)&_="+strconv.FormatInt(millisecond,10),
				Rule:         "登录一",
				//DownloaderID:1,
			})
		},

		Trunk: map[string]*Rule{
			"登录一": {
				ParseFunc: func(ctx *Context) {

					str := ctx.GetText()
					println("-----1-----" + str)

					compile1, _ := regexp.Compile("{.*}")
					match1 := compile1.FindString(str)
					fmt.Println(match1)
					//json str 转map
					var dat map[string]interface{}
					if err := json.Unmarshal([]byte(match1), &dat); err == nil {
						if err != nil{
							println("转换异常!")
						}
					}
					servertime := dat["servertime"]
					servertime= strconv.FormatFloat(servertime.(float64), 'f', -1, 64)
					nonce:=dat["nonce"]
					pubkey:=dat["pubkey"]
					rsakv := dat["rsakv"]

					//加密密码

					ep := encryptPassword(pubkey.(string), servertime.(string), nonce.(string), password)

					postDict := map[string]string{}
					postDict["entry"] = "account"
					postDict["gateway"] = "1"
					postDict["from"] = ""
					postDict["savestate"] = "30"
					postDict["qrcode_flag"] = "true"
					postDict["useticket"] = "0"
					postDict["pagerefer"] = ""
					postDict["vsnf"] = "1"
					postDict["su"] = name
					postDict["service"] = "account"
					postDict["servertime"] = servertime.(string)
					postDict["nonce"] = nonce.(string)
					postDict["pwencode"] = "rsa2"
					postDict["rsakv"] = rsakv.(string)
					postDict["sp"] = ep
					postDict["sr"] = "1395*822"
					postDict["cdult"] = "3"
					postDict["domain"] = "sina.com.cn"
					postDict["prelt"] = "170"
					postDict["returntype"] = "TEXT"

					postValues := url.Values{}
					for postKey, PostValue := range postDict{
						postValues.Set(postKey, PostValue)
					}

					//post参数编码
					postDataStr := postValues.Encode()
					ctx.AddQueue(&request.Request{
						Url:          "https://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.15)&_="+strconv.FormatInt(getMillisecond(),10),
						Method:       "POST",
						EnableCookie: true,
						PostData:     postDataStr,
						Rule:         "登录二",
						//DownloaderID:1,
					})
				},
			},

			"登录二": {
				ParseFunc: func(ctx *Context) {

					str := ctx.GetText()

					println("-----2-----" + str)

					var dat Info
					json.Unmarshal([]byte(str), &dat)
					//此处获取2个链接,包含普通版和移动版
					//print(dat.CrossDomainUrlList[2])

					ctx.AddQueue(&request.Request{
						Url:          dat.CrossDomainUrlList[2],
						Method:       "GET",
						EnableCookie: true,
						Rule:         "登录三",
					})
				},
			},

			"登录三": {
				ParseFunc: func(ctx *Context) {

					ctx.AddQueue(&request.Request{
						Url:          "https://weibo.cn/",
						Method:       "GET",
						EnableCookie: true,
						Rule:         "重定向一",
					})
				},
			},

			"重定向一": {
				ParseFunc: func(ctx *Context) {

					compile2, _ := regexp.Compile("[a-zA-z]+://[^\s]*")

					string := compile2.FindAllString(ctx.GetText(), 2)

					ctx.AddQueue(&request.Request{
						Url:          string[1],
						Method:       "GET",
						EnableCookie: true,
						Rule:         "重定向二",
					})

				},
			},

			"重定向二": {
				ParseFunc: func(ctx *Context) {

					compile2, _ := regexp.Compile("[a-zA-z]+://[^\s]*")

					string := compile2.FindAllString(ctx.GetText(), 3)
					ctx.AddQueue(&request.Request{
						Url:          string[2],
						Method:       "GET",
						EnableCookie: true,
						Rule:         "进入首页",
					})

				},
			},

			"进入首页": {
				ParseFunc: func(ctx *Context) {
					for z := 1;z<=2;z++{
						ctx.AddQueue(&request.Request{
							Url:          "https://weibo.cn/search/user/?keyword="+ ctx.GetKeyin() + "&page=" + strconv.Itoa(z),// //,
							Rule:         "查找微博",
							Method:		  "GET",
							EnableCookie: true,
							//PostData:"keyword=财经&suser=找人",
							//DownloaderID:1,smblog
						})
					}

				},
			},

			"查找微博": {

				ParseFunc: func(ctx *Context) {

					println("---------------查找微博-------------")

					query := ctx.GetDom()

					navBox := query.Find("table")

					navBox.Each(func(i int, s *goquery.Selection) {

						str := s.Find("tr").Text()
						j := strings.LastIndex(str,"粉丝")
						z := strings.LastIndex(str,"人")

						//昵称
						name := str[0:j]
						//粉丝数
						fansNum := str[j+6:z]
						//地区
						city := str[z+5:len(str)]

						println("name" + name)

						//链接
						if url, ok := s.Find("table tr td a").Attr("href"); ok {

							ctx.AddQueue(&request.Request{
								Url:  "https://weibo.cn" + url,
								Rule: "博主首页",
								Temp: map[string]interface{}{
									"name":  name,
									"fansNum": fansNum,
									"city":  city,
								},
							})
						}
					})

				},
			},

			"博主首页": {
				ParseFunc: func(ctx *Context) {

					//昵称
					name := ctx.GetTemp("name","").(string)
					//粉丝数
					fansNum := ctx.GetTemp("fansNum","").(string)
					//地区
					city := ctx.GetTemp("city","").(string)

					//微博数
					weiboNum := ctx.GetDom().Find(".tc").Text()
					j := strings.LastIndex(weiboNum,"[")
					z := strings.LastIndex(weiboNum,"]")
					weiboNum = weiboNum[j+1:z]

					//关注数
					attentionNum := ctx.GetDom().Find(".tip2 a").Eq(0).Text()
					j = strings.LastIndex(attentionNum,"[")
					z = strings.LastIndex(attentionNum,"]")
					attentionNum = attentionNum[j+1:z]

					a :=ctx.GetDom().Find(".ut a").Eq(1)


					if a.Text() == "加关注"{
						if url, ok := ctx.GetDom().Find(".ut a").Eq(3).Attr("href"); ok {

							ctx.AddQueue(&request.Request{
								Url:  "https://weibo.cn" + url,
								Rule: "资料页",
								EnableCookie: true,
								Temp: map[string]interface{}{
									"name":  name,
									"fansNum": fansNum,
									"city":  city,
									"weiboNum":  weiboNum,
									"attentionNum":  attentionNum,
								},
							})
						}
					} else{
						if url, ok := ctx.GetDom().Find(".ut a").Eq(2).Attr("href"); ok {

							ctx.AddQueue(&request.Request{
								Url:  "https://weibo.cn" + url,
								Rule: "资料页",
								EnableCookie: true,
								Temp: map[string]interface{}{
									"name":  name,
									"fansNum": fansNum,
									"city":  city,
									"weiboNum":  weiboNum,
									"attentionNum":  attentionNum,
								},
							})
						}
					}

				},
			},

			"资料页": {

				ItemFields: []string{
					"昵称",
					"粉丝数",
					"地区",
					"微博数",
					"关注数",
					"标签",
					"详细信息",
				},
				ParseFunc: func(ctx *Context) {

					//昵称
					name := ctx.GetTemp("name","").(string)
					//粉丝数
					fansNum := ctx.GetTemp("fansNum","").(string)
					//地区
					city := ctx.GetTemp("city","").(string)
					//微博数
					weiboNum := ctx.GetTemp("weiboNum","").(string)
					//关注数
					attentionNum := ctx.GetTemp("attentionNum","").(string)

					str := ctx.GetDom().Find("div").Eq(5).Text()

					i := strings.LastIndex(str,"标签")
					z := strings.LastIndex(str,"更多")

					var str2,str3 string
					if i == -1{
						str2 = ""
						str3 = str
					}else{
						//标签
						str2 = str[i+7:z]

						//详细信息
						str3 = str[0:i]
					}
					ctx.Output(map[int]interface{}{
						0: name,
						1: fansNum,
						2: city,
						3: weiboNum,
						4: attentionNum,
						5: str2,
						6: str3,
					})


				},
			},

		},
	},
}

 2.相关方法

//获取unix时间
func  getMillisecond() int64{
	MS := time.Now().UnixNano()/1000
	return MS
}

//用户名base64加密
func encryptUname(uname string) string {  // 获取username base64加密后的结果
	//println(base64.RawURLEncoding.EncodeToString([]byte(uname)))
	return base64.URLEncoding.EncodeToString([]byte(uname))
}

//密码加密
//把字符串转换bigint
func string2big(s string) *big.Int {
	ret := new(big.Int)
	ret.SetString(s, 16)  // 将字符串转换成16进制
	return ret
}

func encryptPassword(pubkey string,servertime string,nonce string, password string) string{
	pub := rsa.PublicKey{
		N: string2big(pubkey),
		E: 65537,                       // 10001是十六进制数,65537是它的十进制表示
	}

	// servertime、nonce之间加	,然后在
 ,和password拼接
	encryString := servertime + "	" + nonce + "
" + password

	// 拼接字符串加密
	encryResult, _ := rsa.EncryptPKCS1v15(rand.Reader, &pub, []byte(encryString))
	return hex.EncodeToString(encryResult)
}