doupafu6980 2018-05-29 04:19
浏览 191

使用Colly框架,我无法登录Evernote帐户

I am using colly framework for scrapping the website. Am trying to login the Evernote account for scrapping some things. But I can't go through it. I used "username" and "password" titles for giving the credentials. Is this the right way ?.

Thank you in advance.

package main

import (
 "log"
 "github.com/gocolly/colly"
)

func main() {
 // create a new collector
 c := colly.NewCollector()

// authenticate
err := c.Post("https://www.evernote.com/Login.action", 
map[string]string{"username": 
  "XXXXXX@XXX.com", "password": "*********"})

if err != nil {
    log.Fatal("Error : ",err)
}

    // attach callbacks after login
   c.OnResponse(func(r *colly.Response) {
        log.Println("response received", r.StatusCode)
   })

    // start scraping
   c.Visit("https://www.evernote.com/")
}
  • 写回答

1条回答 默认 最新

  • dov6891 2018-05-29 07:35
    关注

    You should try to mimic the browser behavior, take a look at this implementation, I've added comments on each step:

    package evernote
    
    import (
        "bytes"
        "errors"
        "fmt"
        "io/ioutil"
        "net/http"
        "net/http/cookiejar"
        "net/url"
        "regexp"
        "strings"
    )
    
    const (
        evernoteLoginURL = "https://www.evernote.com/Login.action"
    )
    
    var (
        evernoteJSParamsExpr = regexp.MustCompile(`document.getElementById\("(.*)"\).value = "(.*)"`)
        evernoteRedirectExpr = regexp.MustCompile(`Redirecting to <a href="(.*)">`)
    
        errNoMatches   = errors.New("No matches")
        errRedirectURL = errors.New("Redirect URL not found")
    )
    
    // EvernoteClient wraps all methods required to interact with the website.
    type EvernoteClient struct {
        Username   string
        Password   string
        httpClient *http.Client
    
        // These parameters persist during the login process:
        hpts  string
        hptsh string
    }
    
    // NewEvernoteClient initializes a new Evernote client.
    func NewEvernoteClient(username, password string) *EvernoteClient {
        // Allocate a new cookie jar to mimic the browser behavior:
        cookieJar, _ := cookiejar.New(nil)
    
        // Fill up basic data:
        c := &EvernoteClient{
            Username: username,
            Password: password,
        }
    
        // When initializing the http.Client, copy default values from http.DefaultClient
        // Pass a pointer to the cookie jar that was created earlier:
        c.httpClient = &http.Client{
            Transport:     http.DefaultTransport,
            CheckRedirect: http.DefaultClient.CheckRedirect,
            Jar:           cookieJar,
            Timeout:       http.DefaultClient.Timeout,
        }
        return c
    }
    
    func (e *EvernoteClient) extractJSParams(body []byte) (err error) {
        matches := evernoteJSParamsExpr.FindAllSubmatch(body, -1)
        if len(matches) == 0 {
            return errNoMatches
        }
        for _, submatches := range matches {
            if len(submatches) < 3 {
                err = errNoMatches
                break
            }
            key := submatches[1]
            val := submatches[2]
    
            if bytes.Compare(key, hptsKey) == 0 {
                e.hpts = string(val)
            }
            if bytes.Compare(key, hptshKey) == 0 {
                e.hptsh = string(val)
            }
        }
        return nil
    }
    
    // Login handles the login action.
    func (e *EvernoteClient) Login() error {
        // First step: fetch the login page as a browser visitor would do:
        res, err := e.httpClient.Get(evernoteLoginURL)
        if err != nil {
            return err
        }
        if res.Body == nil {
            return errors.New("No response body")
        }
        body, err := ioutil.ReadAll(res.Body)
        if err != nil {
            return err
        }
        err = e.extractJSParams(body)
        if err != nil {
            return err
        }
    
        // Second step: we have extracted the "hpts" and "hptsh" parameters
        // We send a request using only the username and setting "evaluateUsername":
        values := &url.Values{}
        values.Set("username", e.Username)
        values.Set("evaluateUsername", "")
        values.Set("analyticsLoginOrigin", "login_action")
        values.Set("clipperFlow", "false")
        values.Set("showSwitchService", "true")
        values.Set("hpts", e.hpts)
        values.Set("hptsh", e.hptsh)
    
        rawValues := values.Encode()
        req, err := http.NewRequest(http.MethodPost, evernoteLoginURL, bytes.NewBufferString(rawValues))
        if err != nil {
            return err
        }
        req.Header.Set("Accept", "application/json")
        req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
        req.Header.Set("x-requested-with", "XMLHttpRequest")
        req.Header.Set("referer", evernoteLoginURL)
        res, err = e.httpClient.Do(req)
        if err != nil {
            return err
        }
        body, err = ioutil.ReadAll(res.Body)
        if err != nil {
            return err
        }
        bodyStr := string(body)
        if !strings.Contains(bodyStr, `"usePasswordAuth":true`) {
            return errors.New("Password auth not enabled")
        }
    
        // Third step: do the final request, append password to form data:
        values.Del("evaluateUsername")
        values.Set("password", e.Password)
        values.Set("login", "Sign in")
    
        rawValues = values.Encode()
        req, err = http.NewRequest(http.MethodPost, evernoteLoginURL, bytes.NewBufferString(rawValues))
        if err != nil {
            return err
        }
        req.Header.Set("Accept", "text/html")
        req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
        req.Header.Set("x-requested-with", "XMLHttpRequest")
        req.Header.Set("referer", evernoteLoginURL)
        res, err = e.httpClient.Do(req)
        if err != nil {
            return err
        }
    
        // Check the body in order to find the redirect URL:
        body, err = ioutil.ReadAll(res.Body)
        if err != nil {
            return err
        }
        bodyStr = string(body)
        matches := evernoteRedirectExpr.FindAllStringSubmatch(bodyStr, -1)
        if len(matches) == 0 {
            return errRedirectURL
        }
        m := matches[0]
        if len(m) < 2 {
            return errRedirectURL
        }
        redirectURL := m[1]
        fmt.Println("Login is ok, redirect URL:", redirectURL)
        return nil
    }
    

    After you successfully get the redirect URL, you should be able to send authenticated requests as long as you keep using the HTTP client that was used for the login process, the cookie jar plays a very important role here.

    To call this code use:

    func main() {
        evernoteClient := NewEvernoteClient("user@company", "password")
        err := evernoteClient.Login()
        if err != nil {
            panic(err)
        }
    }
    
    评论

报告相同问题?

悬赏问题

  • ¥15 网络设备配置与管理这个该怎么弄
  • ¥20 机器学习能否像多层线性模型一样处理嵌套数据
  • ¥20 西门子S7-Graph,S7-300,梯形图
  • ¥50 用易语言http 访问不了网页
  • ¥50 safari浏览器fetch提交数据后数据丢失问题
  • ¥15 matlab不知道怎么改,求解答!!
  • ¥15 永磁直线电机的电流环pi调不出来
  • ¥15 用stata实现聚类的代码
  • ¥15 请问paddlehub能支持移动端开发吗?在Android studio上该如何部署?
  • ¥20 docker里部署springboot项目,访问不到扬声器