douxiajia6309 2019-02-22 23:28
浏览 150
已采纳

https开始使用libcurl vs golang

Say, I've a list of urls:

$ for i in `seq 1 90`; do echo "$RANDOM$RANDOM.blogspot.com" ; done >> /tmp/urls.txt

My GETs in C takes much longer than when using my go code.

Here's the C code:

n_memory.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>

struct MemoryStruct {
  char *memory;
  size_t size;
};

static size_t
WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
{
  size_t realsize = size * nmemb;
  struct MemoryStruct *mem = (struct MemoryStruct *)userp;
  char *ptr = realloc(mem->memory, mem->size + realsize + 1);
  if(ptr == NULL) {
    printf("not enough memory (realloc returned NULL)
");
    return 0;
  }

  mem->memory = ptr;
  memcpy(&(mem->memory[mem->size]), contents, realsize);
  mem->size += realsize;
  mem->memory[mem->size] = 0;
  return realsize;
}

int try_url(char *url);

int main(int argc, char **argv){

        if (argc < 2){
                fprintf(stderr, "error, argc
");
                return 1;
        }
        FILE *fp = fopen(argv[1],"r");
        if (!fp){
                fprintf(stderr, "fopen, argc
");
                return 1;
        }
        int count = 1;
        char _line[2048];
        char url[8192];
        while ( fgets(_line, 1024, fp) ){
               _line[strcspn(_line, "
")] = 0;
                char *part1 = "https://dns.google.com/resolve?name=";
                char *part3 = "&type=A";
                snprintf(url, 4096, "%s%s%s", part1, _line, part3);
                printf("%d %s
", count, url);
                try_url(url);
                if (count > 80){
                        break;
                }
                count++;
        }

        //try_url(argv[1]);
        puts("Done");
        return 0;
}



int try_url(char *url)
{
  CURL *hnd;
  CURLcode res;
  struct curl_slist *slist1;
  struct MemoryStruct chunk;

  chunk.memory = malloc(1);  /* will be grown as needed by the realloc above */
  chunk.size = 0;    /* no data at this point */

  curl_global_init(CURL_GLOBAL_ALL);
  hnd = curl_easy_init();
  curl_easy_setopt(hnd, CURLOPT_NOPROGRESS, 1L);
  curl_easy_setopt(hnd, CURLOPT_MAXREDIRS, 50L);
  curl_easy_setopt(hnd, CURLOPT_HTTP_VERSION, (long)CURL_HTTP_VERSION_2TLS);
  curl_easy_setopt(hnd, CURLOPT_SSL_VERIFYPEER, 0L);
  curl_easy_setopt(hnd, CURLOPT_SSL_VERIFYHOST, 0L);
  curl_easy_setopt(hnd, CURLOPT_TCP_KEEPALIVE, 1L);
  curl_easy_setopt(hnd, CURLOPT_RESOLVE, slist1);

  slist1 = NULL;
  slist1 = curl_slist_append(slist1, "dns.google.com:443:172.217.5.110");

  curl_easy_setopt(hnd, CURLOPT_SSL_VERIFYPEER, 0);
  curl_easy_setopt(hnd, CURLOPT_SSL_VERIFYHOST, 0);
  curl_easy_setopt(hnd, CURLOPT_URL, url);
  curl_easy_setopt(hnd, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
  curl_easy_setopt(hnd, CURLOPT_WRITEDATA, (void *)&chunk);
  curl_easy_setopt(hnd, CURLOPT_USERAGENT, "libcurl-agent/1.0");
  res = curl_easy_perform(hnd);
  if(res != CURLE_OK) {
    fprintf(stderr, "curl_easy_perform() failed: %s
",
            curl_easy_strerror(res));
  }
  else {
    printf("%lu bytes retrieved
", (unsigned long)chunk.size);
  }

  curl_easy_cleanup(hnd);
  free(chunk.memory);
  curl_global_cleanup();
  return 0;
}

and here's the go code:

n_get.go

package main

import (
        "bufio"
        "fmt"
        "log"
        "net/http"
        "os"
        "time"
)

func main() {

        if len(os.Args) < 2 {
                fmt.Println("Invalid usage")
                os.Exit(1)
        }

        filename := os.Args[1]

        f, err := os.Open(filename)
        checkerr(err)
        defer f.Close()
        fscanner := bufio.NewScanner(f)
        i := 1
        for fscanner.Scan() {
                text := fscanner.Text()
                // https://dns.google.com/resolve?name=1.bp.blogspot.com&type=A
                url := "https://dns.google.com/resolve?name=" + text + "&type=A"
                //fmt.Println(i, url);
                get_url(url)
                if i == 80 {
                        break
                }
                i = i + 1
        }

        fmt.Println("Hello!")
}

func checkerr(err error) {
        if err != nil {
                fmt.Println(err)
                log.Fatal(err)
        }
}

func get_url(url string) int {
        fmt.Println(url)
        t1 := time.Now()
        resp, err := http.Get(url)
        t2 := time.Now()
        checkerr(err)
        fmt.Println(resp.Status)
        diff := t2.Sub(t1)
        fmt.Println(url, "Took us", diff)
        if resp.StatusCode == 200 {
                fmt.Println("OK")
                return 0
        } else {
                fmt.Println("Failed")
                return 1
        }

}

I even tried to assist libcurl with a --resolve option to pass in an IP address it can use, thus saving it from having to do name lookups. However this does not seem to help much.

Even tried a --insecure option with curl, still does not make much of a dent.

Here's the time it took to do 80 HTTPS GETs:

+------------------+-----------------+
|      golang      |   c             |
+------------------------------------+
| real    0m2.670s |real    0m20.024s|
| user    0m0.555s |user    0m13.393s|
| sys     0m0.086s |sys     0m0.242s |
+------------------------------------+

This is a bit lopsided, and I am looking for pointers to close the gap. How can I improve my C code's speed? Any points would be much appreciated.

  • 写回答

1条回答 默认 最新

  • douying7289 2019-02-22 23:59
    关注

    First off, do not run all of curl init for every try. Do that one time.

    I don't think you need to do all of the options each time either.

    And don't do that malloc of 1 byte either. Just leave it NULL. Realloc knows how to handle it.

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

悬赏问题

  • ¥15 关于#linux#的问题(输入输出错误):出现这个界面接着我重新装系统,又让修电脑的师傅帮我扫描硬盘(没有问题)用着用着又卡死(相关搜索:固态硬盘)
  • ¥15 cv::resize不同线程时间不同
  • ¥15 web课程,怎么做啊😭没好好听课 根本不知道怎么下手
  • ¥15 做一个关于单片机的比较难的代码,然后搞一个PPT进行解释
  • ¥15 python提取.csv文件中的链接会经常出现爬取失败
  • ¥15 数据结构中的数组地址问题
  • ¥15 maya的mel里,怎样先选择模型A,然后利用mel脚本自动选择有相同名字的模型B呢。
  • ¥15 Python题,根本不会啊
  • ¥15 会会信号与系统和python的来
  • ¥15 关于#python#的问题