cURL:处理多个异步请求

cURL: Handle multiple asynchronous requests

本文关键字:异步 请求 处理 cURL      更新时间:2023-10-16

我从来没有在c++中真正做过任何多线程或异步的事情,到目前为止我只使用cURL来做单个同步请求
为了更好地可视化我要做的事情,我写了一个简单的Javascript,它可以用C++中的cURL做我想做的事情。

function AddRequest( method, url, data, id ) {
    var httpObj = new ActiveXObject("Msxml2.XMLHTTP.6.0"); //new XMLHttpRequest();
    httpObj.onreadystatechange = function() {
        if (httpObj.readyState == 4)
            ResponseCallback( httpObj, id );
    };
    httpObj.Open( method, url, true );
    httpObj.Send( data );
}
function ResponseCallback( httpObj, id ) {
    WScript.Echo( id ); //alert( id );
    WScript.Echo( httpObj.ResponseText ); //alert( httpObj.ResponseText );
}
//It could now be used like this:
AddRequest("GET","http://example.com/","",1);
AddRequest("GET","https://www.facebook.com","",2);
WScript.Echo( "all requests sent" ); //alert( "all requests sent" );
//these requests are all done at the same time 
//and every time a request has finished it calls the ResponseCallback() function,
//telling it which request has finished

CURL似乎与XmlHttpRequest完全不同,而且没有必要比XmlHttpRequest更复杂,尽管两者都只是发送http请求
这是我的第一个方法(基于hogren的回答):

#include "stdafx.hpp"
#include <iostream> //#include <stdio.h>
#include <curl.h>
#include <pthread.h>
#include <map>
#include <string>
using namespace std;
bool printing = false; //will allow us to prevent prints overlapping each other
struct requestStruct { //will allow us to pass more than one argument to the threaded functions
    int id;
    const char* url;
    const char* method;
    const char* body;
    map<const char*, const char*> headers;
    const char* proxy;
    int timeout;
};
struct responseStruct { //will allow us to return more than one value from the Request function
    long statusCode;
    //map<const char*, const char*> headers;
    const char* body;
};
size_t writeToString(void *ptr, size_t size, size_t count, void *stream) {
    ((string*)stream)->append((char*)ptr, 0, size* count);
    return size* count;
}
static void *ResponseCallback(int id, struct responseStruct *response) {
    long statusCode = response -> statusCode;
    //map<const char*, const char*> headers = response -> headers;
    const char* body = response -> body;
    //while (printing) {} //wait for other threads to stop printing
    printing = true; //tell other threads to not print anything
      cout << id << " response received! Code: " << statusCode << endl << body << endl;
    printing = false; //tell other threads printing is okay again
    return NULL;
}
struct responseStruct HttpRequest(const char* url, const char* method, const char* body, map<const char*, const char*> &headers, const char* proxy, long timeout) {
    CURL *curl;
    curl = curl_easy_init();
    long statusCode = 0;
    map<const char*, const char*> respHeaders;
    string respBody;
    string _url(url);
    string _method(method);
    string _proxy(proxy);
    struct curl_slist *headerList = NULL;
    string headerString;
    curl_easy_setopt(curl, CURLOPT_URL, url); //set url
    curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method); //set method
    for (auto header=headers.begin(); header!=headers.end(); ++header) { //make header list
        headerString = header->first;
        headerString.append(": ").append(header->second);
        headerList = curl_slist_append(headerList, headerString.c_str()); 
        //cout << headerString << 'n';
    }
    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headerList); //set headers
    if (_method == "POST" || _method == "PUT" || _method == "DELETE") //set body if the request method would allow it
        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body);
    if (_url.find(string("https://")) != string::npos) //set ssl verifypeer if it's an https url
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
    if (_proxy != "") //set proxy
        curl_easy_setopt(curl, CURLOPT_PROXY, proxy);
    if (timeout != 0) //set timeout
        curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
    curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); //follow redirects
    //curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, writeToString);
    //curl_easy_setopt(curl, CURLOPT_WRITEHEADER, &respHeaders); //to receive response headers
    //??
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeToString);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &respBody); //to receive response body
    curl_easy_perform(curl); //send the request
    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &statusCode); //get status code
    struct responseStruct response;
    response.statusCode = statusCode;
    //response.headers;
    response.body = respBody.c_str();
    curl_easy_cleanup(curl);
    return response;
}
static void *AddRequest( void *arguments ) {
    // get arguments:
      struct requestStruct *args = (struct requestStruct*)arguments;
      int id = args->id; 
      const char* url = args->url; 
      const char* method = args->method; 
      const char* body = args->body; 
      map<const char*, const char*> headers = args->headers; 
      const char* proxy = args->proxy; 
      int timeout = args->timeout;
    // print arguments:
      //while (printing) {} //wait for other threads to stop printing
      //printing = true; //tell other threads to not print anything
      //  cout << id << endl << url << endl << method << endl;
      //printing = false; //tell the other threads it's okay to print again now
    struct responseStruct response = HttpRequest(url, method, body, headers, proxy, timeout);
    ResponseCallback(id,&response);
    pthread_exit(0);
    return NULL;
}
int main() {
    //map<const char*, const char*> headers;
    //headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0";
    //struct responseStruct response = HttpRequest("https://facebook.com", "GET", "", headers, "localhost:8888", 6000);
    //cout << response.body << endl;
    pthread_t threads[3];
    struct requestStruct reqArguments[3];
    map<const char*, const char*> headers;
    headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0";
    const char* proxy = "";
    reqArguments[0].id = 0;
    reqArguments[0].url = "https://www.facebook.com/";
    reqArguments[0].method = "GET";
    reqArguments[0].headers = headers;
    reqArguments[0].body = "";
    reqArguments[0].proxy = proxy;
    reqArguments[0].timeout = 6000;
    pthread_create(&threads[0], NULL, &AddRequest, (void *)&reqArguments[0]); //create a thread on AddRequest() passing a full struct of arguments
    reqArguments[1].id = 1;
    reqArguments[1].url = "https://www.facebook.com/";
    reqArguments[1].method = "GET";
    reqArguments[1].headers = headers;
    reqArguments[1].body = "";
    reqArguments[1].proxy = proxy;
    reqArguments[1].timeout = 6000;
    pthread_create(&threads[1], NULL, &AddRequest, (void *)&reqArguments[1]); //create a thread on AddRequest() passing a full struct of arguments
    reqArguments[2].id = 2;
    reqArguments[2].url = "https://www.facebook.com/";
    reqArguments[2].method = "GET";
    reqArguments[2].headers = headers;
    reqArguments[2].body = "";
    reqArguments[2].proxy = proxy;
    reqArguments[2].timeout = 6000;
    pthread_create(&threads[2], NULL, &AddRequest, (void *)&reqArguments[2]); //create a thread on AddRequest() passing a full struct of arguments
    getchar(); //prevent console from closing instantly
    return 0;
}

我真的不确定我是否正确地完成了整个pthread
存在一些问题:
1.由于某些原因,只有第一个请求成功,其他请求甚至没有发送
除非我取消注释主函数的前4行,否则它将在没有新线程的情况下进行直接请求,但我显然不想使用该代码
2.HttpRequest()函数没有正确返回响应html代码,我只收到垃圾
我认为问题2可能是HttpRequest()的返回结构的指针相关问题,但我无法修复它3.我最后一个也不那么重要的问题是,我不知道如何接收响应标头并将它们放在映射中
顺便说一句:我正在用Visual C++2010进行编译,我正在用Fiddler调试http流量。

EDIT:这是我更正的代码。

实际上并没有什么错误。但经过几次测试,我发现同时启动几个curl_perform会导致问题。所以我增加了一个延迟(5000ms是很大的,你可以减少它)。

pthread_exit()导致prolems,并出现响应错误。

#include "stdafx.hpp"
#include <iostream> //#include <stdio.h>
#include <curl/curl.h>
#include <pthread.h>
#include <map>
#include <string>
using namespace std;
bool printing = false; //will allow us to prevent prints overlapping each other
#if defined(__WIN32__) || defined(_WIN32) || defined(WIN32) || defined(__WINDOWS__) || defined(__TOS_WIN__)
  #include <windows.h>
  inline void delay( unsigned long ms )
    {
    Sleep( ms );
    }
#else  /* presume POSIX */
  #include <unistd.h>
  inline void delay( unsigned long ms )
    {
    usleep( ms * 1000 );
    }
#endif 

struct requestStruct { //will allow us to pass more than one argument to the threaded functions
    int id;
    const char* url;
    const char* method;
    const char* body;
    map<const char*, const char*> headers;
    const char* proxy;
    int timeout;
};
struct responseStruct { //will allow us to return more than one value from the Request function
    long statusCode;
    //map<const char*, const char*> headers;
    const char* body;
};
size_t writeToString(void *ptr, size_t size, size_t count, void *stream) {
    ((string*)stream)->append((char*)ptr, 0, size* count);
    return size* count;
}
static void *ResponseCallback(int id, struct responseStruct *response) {
    long statusCode = response -> statusCode;
    //map<const char*, const char*> headers = response -> headers;
    const char* body = response -> body;
    //while (printing) {} //wait for other threads to stop printing
    printing = true; //tell other threads to not print anything
      cout << id << " response received! Code: " << statusCode << endl << body << endl;
    printing = false; //tell other threads printing is okay again
    return NULL;
}
struct responseStruct HttpRequest(const char* url, const char* method, const char* body, map<const char*, const char*> &headers, const char* proxy, long timeout) {
    CURL *curl;
    curl = curl_easy_init();
    long statusCode = 0;
    map<const char*, const char*> respHeaders;
    string respBody;
    string _url(url);
    string _method(method);
    string _proxy(proxy);
    struct curl_slist *headerList = NULL;
    string headerString;
    curl_easy_setopt(curl, CURLOPT_URL, url); //set url
    curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method); //set method
    for (std::map<const char*, const char*>::iterator header=headers.begin(); header!=headers.end(); ++header) { //make header list
        headerString = header->first;
        headerString.append(": ").append(header->second);
        headerList = curl_slist_append(headerList, headerString.c_str()); 
        //cout << headerString << 'n';
    }
    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headerList); //set headers
    if (_method == "POST" || _method == "PUT" || _method == "DELETE") //set body if the request method would allow it
        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body);
    if (_url.find(string("https://")) != string::npos) //set ssl verifypeer if it's an https url
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
    if (_proxy != "") //set proxy
        curl_easy_setopt(curl, CURLOPT_PROXY, proxy);
    if (timeout != 0) //set timeout
        curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
    curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); //follow redirects
    //curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, writeToString);
    //curl_easy_setopt(curl, CURLOPT_WRITEHEADER, &respHeaders); //to receive response headers
    //??
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeToString);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &respBody); //to receive response body
    static int i=0;
    delay(5000*(i++));
    std::cout << "url: " << _url << ";" << std::endl;
    curl_easy_perform(curl); //send the request
    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &statusCode); //get status code
    struct responseStruct response;
    response.statusCode = statusCode;
    //response.headers;
    response.body = respBody.c_str();
    curl_easy_cleanup(curl);
    return response;
}
static void *AddRequest( void *arguments ) {
    // get arguments:
      struct requestStruct *args = (struct requestStruct*)arguments;
      int id = args->id; 
      const char* url = args->url; 
      const char* method = args->method; 
      const char* body = args->body; 
      map<const char*, const char*> headers = args->headers; 
      const char* proxy = args->proxy; 
      int timeout = args->timeout;
    // print arguments:
      //while (printing) {} //wait for other threads to stop printing
      //printing = true; //tell other threads to not print anything
      //  cout << id << endl << url << endl << method << endl;
      //printing = false; //tell the other threads it's okay to print again now
      struct responseStruct response = HttpRequest(url, method, body, headers, proxy, timeout);
    ResponseCallback(id,&response);
    /* this code cause trouble (no response code) */
    //pthread_exit(0);
    return NULL;
}
int main() {
    //map<const char*, const char*> headers;
    //headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0";
    //struct responseStruct response = HttpRequest("https://facebook.com", "GET", "", headers, "localhost:8888", 6000);
    //cout << response.body << endl;
    pthread_t threads[3];
    struct requestStruct reqArguments[3];
    map<const char*, const char*> headers;
    headers["User-Agent"] = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:32.0) Gecko/20100101 Firefox/32.0";
    const char* proxy = "";
    reqArguments[0].id = 0;
    reqArguments[0].url = "https://www.duckduckgo.com/";
    reqArguments[0].method = "GET";
    reqArguments[0].headers = headers;
    reqArguments[0].body = "";
    reqArguments[0].proxy = proxy;
    reqArguments[0].timeout = 6000;
    pthread_create(&threads[0], NULL, &AddRequest, (void *)&reqArguments[0]); //create a thread on AddRequest() passing a full struct of arguments
    reqArguments[1].id = 1;
    reqArguments[1].url = "https://www.google.com/";
    reqArguments[1].method = "GET";
    reqArguments[1].headers = headers;
    reqArguments[1].body = "";
    reqArguments[1].proxy = proxy;
    reqArguments[1].timeout = 6000;
    pthread_create(&threads[1], NULL, &AddRequest, (void *)&reqArguments[1]); //create a thread on AddRequest() passing a full struct of arguments
    reqArguments[2].id = 2;
    reqArguments[2].url = "https://www.facebook.com/";
    reqArguments[2].method = "GET";
    reqArguments[2].headers = headers;
    reqArguments[2].body = "";
    reqArguments[2].proxy = proxy;
    reqArguments[2].timeout = 6000;
    pthread_create(&threads[2], NULL, &AddRequest, (void *)&reqArguments[2]); //create a thread on AddRequest() passing a full struct of arguments
    //        getchar();
    // that is cleaner
    for (int i=0; i<3; ++i) {
      int rc = pthread_join(threads[i], NULL);
      printf("In main: thread %d is completen", i);
    }
    return 0;
}

关于标题的最后一个问题,请在stackoverflow上发布另一个问题。因为(我认为)一个学科里还有很多学科。

还有一点建议,使用对象更容易编写和读取代码源。

结束编辑

这是使用libcurl进行多线程处理的官方示例的副本:http://curl.haxx.se/libcurl/c/multithread.html

/***************************************************************************
 *                                  _   _ ____  _
 *  Project                     ___| | | |  _ | |
 *                             / __| | | | |_) | |
 *                            | (__| |_| |  _ <| |___
 *                             ___|___/|_| ______|
 *
 * Copyright (C) 1998 - 2011, Daniel Stenberg, <daniel@haxx.se>, et al.
 *
 * This software is licensed as described in the file COPYING, which
 * you should have received as part of this distribution. The terms
 * are also available at http://curl.haxx.se/docs/copyright.html.
 *
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
 * copies of the Software, and permit persons to whom the Software is
 * furnished to do so, under the terms of the COPYING file.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 ***************************************************************************/ 
/* A multi-threaded example that uses pthreads extensively to fetch
 * X remote files at once */ 
#include <stdio.h>
#include <pthread.h>
#include <curl/curl.h>
#define NUMT 4
/*
  List of URLs to fetch.
  If you intend to use a SSL-based protocol here you MUST setup the OpenSSL
  callback functions as described here:
  http://www.openssl.org/docs/crypto/threads.html#DESCRIPTION
*/ 
const char * const urls[NUMT]= {
  "http://curl.haxx.se/",
  "ftp://cool.haxx.se/",
  "http://www.contactor.se/",
  "www.haxx.se"
};
static void *pull_one_url(void *url)
{
  CURL *curl;
  curl = curl_easy_init();
  curl_easy_setopt(curl, CURLOPT_URL, url);
  curl_easy_perform(curl); /* ignores error */ 
  curl_easy_cleanup(curl);
  return NULL;
}

/*
   int pthread_create(pthread_t *new_thread_ID,
   const pthread_attr_t *attr,
   void * (*start_func)(void *), void *arg);
*/ 
int main(int argc, char **argv)
{
  pthread_t tid[NUMT];
  int i;
  int error;
  /* Must initialize libcurl before any threads are started */ 
  curl_global_init(CURL_GLOBAL_ALL);
  for(i=0; i< NUMT; i++) {
    error = pthread_create(&tid[i],
                           NULL, /* default attributes please */ 
                           pull_one_url,
                           (void *)urls[i]);
    if(0 != error)
      fprintf(stderr, "Couldn't run thread number %d, errno %dn", i, error);
    else
      fprintf(stderr, "Thread %d, gets %sn", i, urls[i]);
  }
  /* now wait for all threads to terminate */ 
  for(i=0; i< NUMT; i++) {
    error = pthread_join(tid[i], NULL);
    fprintf(stderr, "Thread %d terminatedn", i);
  }
  return 0;
}

对于交互式使用,您可以将urlsArray转换为向量。

我希望它能帮助你!