bes Updated for version 3.20.10
CurlHandlePool.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of the BES
4
5// Copyright (c) 2018 OPeNDAP, Inc.
6// Author: James Gallagher<jgallagher@opendap.org>
7//
8// This library is free software; you can redistribute it and/or
9// modify it under the terms of the GNU Lesser General Public
10// License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12//
13// This library is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// Lesser General Public License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21//
22// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23
24#include "config.h"
25
26#include <string>
27#include <locale>
28#include <sstream>
29
30#include <cstring>
31#include <unistd.h>
32
33#include <curl/curl.h>
34
35#include "CurlUtils.h"
36#include "HttpNames.h"
37
38#include <time.h>
39
40#include <libdap/util.h> // long_to_string()
41
42#include "BESLog.h"
43#include "BESDebug.h"
44#include "BESInternalError.h"
45#include "BESForbiddenError.h"
46#include "AllowedHosts.h"
47
48#include "DmrppCommon.h"
49#include "DmrppNames.h"
50#include "awsv4.h"
51#include "CurlHandlePool.h"
52#include "Chunk.h"
53#include "CredentialsManager.h"
54#include "AccessCredentials.h"
55
56#define KEEP_ALIVE 1 // Reuse libcurl easy handles (1) or not (0).
57#define CURL_VERBOSE 0 // Logs curl info to the bes.log
58
59#define prolog std::string("CurlHandlePool::").append(__func__).append("() - ")
60
61#if 0
62// Shutdown this block of unsed variables. ndp - 3/1/21
63//static const int MAX_WAIT_MSECS = 30 * 1000; // Wait max. 30 seconds
64//static const unsigned int retry_limit = 10; // Amazon's suggestion
65//static const useconds_t uone_second = 1000 * 1000; // one second, in microseconds
66//namespace dmrpp {
67//const bool have_curl_multi_api = false;
68//}
69#endif
70
71using namespace dmrpp;
72using namespace std;
73
74string pthread_error(unsigned int err){
75 string error_msg;
76 switch(err){
77 case EINVAL:
78 error_msg = "The mutex was either created with the "
79 "protocol attribute having the value "
80 "PTHREAD_PRIO_PROTECT and the calling "
81 "thread's priority is higher than the "
82 "mutex's current priority ceiling."
83 "OR The value specified by mutex does not "
84 "refer to an initialized mutex object.";
85 break;
86
87 case EBUSY:
88 error_msg = "The mutex could not be acquired "
89 "because it was already locked.";
90 break;
91
92 case EAGAIN:
93 error_msg = "The mutex could not be acquired because "
94 "the maximum number of recursive locks "
95 "for mutex has been exceeded.";
96 break;
97
98 case EDEADLK:
99 error_msg = "The current thread already owns the mutex";
100 break;
101
102 case EPERM:
103 error_msg = "The current thread does not own the mutex.";
104 break;
105
106 default:
107 error_msg = "Unknown pthread error type.";
108 break;
109 }
110
111 return error_msg;
112}
113
114Lock::Lock(pthread_mutex_t &lock) : m_mutex(lock) {
115 int status = pthread_mutex_lock(&m_mutex);
116 if (status != 0)
117 throw BESInternalError(prolog + "Failed to acquire mutex lock. msg: " + pthread_error(status), __FILE__, __LINE__);
118}
119
120Lock::~Lock() {
121 int status = pthread_mutex_unlock(&m_mutex);
122 if (status != 0)
123 ERROR_LOG(prolog + "Failed to release mutex lock. msg: " + pthread_error(status));
124}
125
131#if 0
132static
133string dump(const char *text, unsigned char *ptr, size_t size)
134{
135 size_t i;
136 size_t c;
137 unsigned int width=0x10;
138
139 ostringstream oss;
140 oss << text << ", " << std::setw(10) << (long)size << std::setbase(16) << (long)size << endl;
141
142 for(i=0; i<size; i+= width) {
143 oss << std::setw(4) << (long)i;
144 // fprintf(stream, "%4.4lx: ", (long)i);
145
146 /* show hex to the left */
147 for(c = 0; c < width; c++) {
148 if(i+c < size) {
149 oss << std::setw(2) << ptr[i+c];
150 //fprintf(stream, "%02x ", ptr[i+c]);
151 }
152 else {
153 oss << " ";
154 // fputs(" ", stream);
155 }
156 }
157
158 /* show data on the right */
159 for(c = 0; (c < width) && (i+c < size); c++) {
160 char x = (ptr[i+c] >= 0x20 && ptr[i+c] < 0x80) ? ptr[i+c] : '.';
161 // fputc(x, stream);
162 oss << std::setw(1) << x;
163 }
164
165 // fputc('\n', stream); /* newline */
166 oss << endl;
167 }
168
169 return oss.str();
170}
171#endif
172
173#if CURL_VERBOSE
179static
180int curl_trace(CURL */*handle*/, curl_infotype type, char *data, size_t /*size*/, void */*userp*/)
181{
182 string text = "";
183 switch (type) {
184 // print info
185 case CURLINFO_TEXT:
186 case CURLINFO_HEADER_OUT:
187 case CURLINFO_HEADER_IN: {
188 text = data;
189 size_t pos;
190 while ((pos = text.find('\n')) != string::npos)
191 text = text.substr(0, pos);
192 break;
193 }
194
195 // Do not build up 'text' for the data transfers
196 case CURLINFO_DATA_OUT:
197 case CURLINFO_SSL_DATA_OUT:
198 case CURLINFO_DATA_IN:
199 case CURLINFO_SSL_DATA_IN:
200 default: /* in case a new one is introduced to shock us */
201 break;
202 }
203
204 switch (type) {
205 // print info
206 case CURLINFO_TEXT:
207 LOG("libcurl == Info: " << text << endl);
208 break;
209
210 case CURLINFO_HEADER_OUT:
211 LOG("libcurl == Send header: " << text << endl);
212 break;
213 case CURLINFO_HEADER_IN:
214 LOG("libcurl == Recv header: " << text << endl);
215 break;
216
217 // Only print these if we're desperate and the above code has been hacked to match
218 case CURLINFO_DATA_OUT:
219 case CURLINFO_SSL_DATA_OUT:
220 case CURLINFO_DATA_IN:
221 case CURLINFO_SSL_DATA_IN:
222 default:
223 break;
224 }
225
226 return 0;
227}
228#endif
229
230 // FIXME - This code does not make a cURL handle that follows links and I think that's a bug!
231dmrpp_easy_handle::dmrpp_easy_handle() : d_url(nullptr), d_request_headers(nullptr) {
232
233 CURLcode res;
234
235 d_handle = curl_easy_init();
236 if (!d_handle) throw BESInternalError("Could not allocate CURL handle", __FILE__, __LINE__);
237
238 curl::set_error_buffer(d_handle, d_errbuf);
239
240 res = curl_easy_setopt(d_handle, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2);
241 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_SSLVERSION", d_errbuf, __FILE__, __LINE__);
242
243
244#if CURL_VERBOSE
245 res = curl_easy_setopt(d_handle, CURLOPT_DEBUGFUNCTION, curl_trace);
246 curl::check_setopt_result(res, prolog, "CURLOPT_DEBUGFUNCTION", d_errbuf, __FILE__, __LINE__);
247 // Many tests fail with this option, but it's still useful to see how connections
248 // are treated. jhrg 10/2/18
249 res = curl_easy_setopt(d_handle, CURLOPT_VERBOSE, 1L);
250 curl::check_setopt_result(res, prolog, "CURLOPT_VERBOSE", d_errbuf, __FILE__, __LINE__);
251#endif
252
253 res = curl_easy_setopt(d_handle, CURLOPT_HEADERFUNCTION, chunk_header_callback);
254 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HEADERFUNCTION", d_errbuf, __FILE__, __LINE__);
255
256 // Pass all data to the 'write_data' function
257 res = curl_easy_setopt(d_handle, CURLOPT_WRITEFUNCTION, chunk_write_data);
258 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_WRITEFUNCTION", d_errbuf, __FILE__, __LINE__);
259
260#ifdef CURLOPT_TCP_KEEPALIVE
261 /* enable TCP keep-alive for this transfer */
262 res = curl_easy_setopt(d_handle, CURLOPT_TCP_KEEPALIVE, 1L);
263 curl::check_setopt_result(res, prolog, "CURLOPT_TCP_KEEPALIVE", d_errbuf, __FILE__, __LINE__);
264#endif
265
266#ifdef CURLOPT_TCP_KEEPIDLE
267 /* keep-alive idle time to 120 seconds */
268 res = curl_easy_setopt(d_handle, CURLOPT_TCP_KEEPIDLE, 120L);
269 curl::check_setopt_result(res, prolog, "CURLOPT_TCP_KEEPIDLE", d_errbuf, __FILE__, __LINE__);
270#endif
271
272#ifdef CURLOPT_TCP_KEEPINTVL
273 /* interval time between keep-alive probes: 120 seconds */
274 res = curl_easy_setopt(d_handle, CURLOPT_TCP_KEEPINTVL, 120L)
275 curl::check_setopt_result(res, prolog, "CURLOPT_TCP_KEEPINTVL", d_errbuf, __FILE__, __LINE__);
276#endif
277
278 d_in_use = false;
279 d_chunk = 0;
280}
281
282dmrpp_easy_handle::~dmrpp_easy_handle() {
283 if (d_handle) curl_easy_cleanup(d_handle);
284 if (d_request_headers) curl_slist_free_all(d_request_headers);
285}
286
300 // Treat HTTP/S requests specially; retry some kinds of failures.
301 if (d_url->protocol() == HTTPS_PROTOCOL || d_url->protocol() == HTTP_PROTOCOL) {
302 curl::super_easy_perform(d_handle);
303 }
304 else {
305 CURLcode curl_code = curl_easy_perform(d_handle);
306 if (CURLE_OK != curl_code) {
307 string msg = prolog + "ERROR - Data transfer error: ";
308 throw BESInternalError(msg.append(curl::error_message(curl_code, d_errbuf)), __FILE__, __LINE__);
309 }
310 }
311
312 d_chunk->set_is_read(true);
313}
314
315#if 0
316// This implmentation of the default constructor should have:
317// a) Utilized the other constructor:
318// CurlHandlePool::CurlHandlePool() { CurlHandlePool(DmrppRequestHandler::d_max_transfer_threads); }
319// rather than duplicating the logic.
320// b) Skipped because the only code that called it in the first place was DmrppRequestHandler::DmrppRequestHandler()
321// which is already owns DmrppRequestHandler::d_max_transfer_threads and can pass it in.
322//
323//
324// Old default constructor. Duplicates logic.
325//
326CurlHandlePool::CurlHandlePool() {
327 d_max_easy_handles = DmrppRequestHandler::d_max_transfer_threads;
328
329 for (unsigned int i = 0; i < d_max_easy_handles; ++i) {
330 d_easy_handles.push_back(new dmrpp_easy_handle());
331 }
332 unsigned int status = pthread_mutex_init(&d_get_easy_handle_mutex, 0);
333 if (status != 0)
334 throw BESInternalError("Could not initialize mutex in CurlHandlePool. msg: " + pthread_error(status), __FILE__, __LINE__);
335}
336//
337// One alternate would be to do this for the default constructor:
338CurlHandlePool::CurlHandlePool() {
340}
341//
342// - ndp 12/02/20
343#endif
344
345
346CurlHandlePool::CurlHandlePool(unsigned int max_handles) : d_max_easy_handles(max_handles) {
347 for (unsigned int i = 0; i < d_max_easy_handles; ++i) {
348 d_easy_handles.push_back(new dmrpp_easy_handle());
349 }
350
351 unsigned int status = pthread_mutex_init(&d_get_easy_handle_mutex, 0);
352 if (status != 0)
353 throw BESInternalError("Could not initialize mutex in CurlHandlePool. msg: " + pthread_error(status), __FILE__, __LINE__);
354}
355
367#if 0
368static struct curl_slist *append_http_header(curl_slist *slist, const string &header, const string &value) {
369 string full_header = header;
370 full_header.append(" ").append(value);
371
372 struct curl_slist *temp = curl_slist_append(slist, full_header.c_str());
373 return temp;
374}
375#endif
376
377
394 // Here we check to make sure that the we are only going to
395 // access an approved location with this easy_handle
396 string reason = "The requested resource does not match any of the AllowedHost rules.";
397; if (!http::AllowedHosts::theHosts()->is_allowed(chunk->get_data_url(),reason)) {
398 stringstream ss;
399 ss << "ERROR! The chunk url "<< chunk->get_data_url()->str() << " was rejected because: " << reason;
400 throw BESForbiddenError(ss.str(), __FILE__, __LINE__);
401 }
402
403 Lock lock(d_get_easy_handle_mutex); // RAII
404
405 dmrpp_easy_handle *handle = 0;
406 for (auto i = d_easy_handles.begin(), e = d_easy_handles.end(); i != e; ++i) {
407 if (!(*i)->d_in_use) {
408 handle = *i;
409 break;
410 }
411 }
412
413 if (handle) {
414 // Once here, d_easy_handle holds a CURL* we can use.
415 handle->d_in_use = true;
416 handle->d_url = chunk->get_data_url();
417
418 handle->d_chunk = chunk;
419
420 CURLcode res = curl_easy_setopt(handle->d_handle, CURLOPT_URL, chunk->get_data_url()->str().c_str());
421 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_URL", handle->d_errbuf, __FILE__, __LINE__);
422
423 // get the offset to offset + size bytes
424 res = curl_easy_setopt(handle->d_handle, CURLOPT_RANGE, chunk->get_curl_range_arg_string().c_str());
425 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_RANGE", handle->d_errbuf, __FILE__, __LINE__);
426
427 // Pass this to chunk_header_callback as the fourth argument
428 res = curl_easy_setopt(handle->d_handle, CURLOPT_HEADERDATA, reinterpret_cast<void *>(chunk));
429 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HEADERDATA", handle->d_errbuf, __FILE__, __LINE__);
430
431 // Pass this to chunk_write_data as the fourth argument
432 res = curl_easy_setopt(handle->d_handle, CURLOPT_WRITEDATA, reinterpret_cast<void *>(chunk));
433 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_WRITEDATA", handle->d_errbuf, __FILE__, __LINE__);
434
435 // store the easy_handle so that we can call release_handle in multi_handle::read_data()
436 res = curl_easy_setopt(handle->d_handle, CURLOPT_PRIVATE, reinterpret_cast<void *>(handle));
437 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_PRIVATE", handle->d_errbuf, __FILE__, __LINE__);
438
439 // Enabled cookies
440 res = curl_easy_setopt(handle->d_handle, CURLOPT_COOKIEFILE, curl::get_cookie_filename().c_str());
441 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_COOKIEFILE", handle->d_errbuf, __FILE__, __LINE__);
442
443 res = curl_easy_setopt(handle->d_handle, CURLOPT_COOKIEJAR, curl::get_cookie_filename().c_str());
444 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_COOKIEJAR", handle->d_errbuf, __FILE__, __LINE__);
445
446 // Follow 302 (redirect) responses
447 res = curl_easy_setopt(handle->d_handle, CURLOPT_FOLLOWLOCATION, 1);
448 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_FOLLOWLOCATION", handle->d_errbuf, __FILE__, __LINE__);
449
450 res = curl_easy_setopt(handle->d_handle, CURLOPT_MAXREDIRS, curl::max_redirects());
451 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_MAXREDIRS", handle->d_errbuf, __FILE__, __LINE__);
452
453 // Set the user agent something otherwise TEA will never redirect to URS.
454 res = curl_easy_setopt(handle->d_handle, CURLOPT_USERAGENT, curl::hyrax_user_agent().c_str());
455 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_USERAGENT", handle->d_errbuf, __FILE__, __LINE__);
456
457 // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
458 // choosing the the 'safest' one supported by the server.
459 // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
460 res = curl_easy_setopt(handle->d_handle, CURLOPT_HTTPAUTH, (long) CURLAUTH_ANY);
461 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HTTPAUTH", handle->d_errbuf, __FILE__, __LINE__);
462
463 // Enable using the .netrc credentials file.
464 res = curl_easy_setopt(handle->d_handle, CURLOPT_NETRC, CURL_NETRC_OPTIONAL);
465 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_NETRC", handle->d_errbuf, __FILE__, __LINE__);
466
467 // If the configuration specifies a particular .netrc credentials file, use it.
468 // TODO move this operation into constructor and stash the value.
469 string netrc_file = curl::get_netrc_filename();
470 if (!netrc_file.empty()) {
471 res = curl_easy_setopt(handle->d_handle, CURLOPT_NETRC_FILE, netrc_file.c_str());
472 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_NETRC_FILE", handle->d_errbuf, __FILE__, __LINE__);
473 }
474 //VERBOSE(__FILE__ << "::get_easy_handle() is using the netrc file '"
475 //<< ((!netrc_file.empty()) ? netrc_file : "~/.netrc") << "'" << endl);
476
477 AccessCredentials *credentials = CredentialsManager::theCM()->get(handle->d_url);
478 if (credentials && credentials->is_s3_cred()) {
479 BESDEBUG(DMRPP_CURL,
480 prolog << "Got AccessCredentials instance: " << endl << credentials->to_json() << endl);
481 // If there are available credentials, and they are S3 credentials then we need to sign
482 // the request
483 const std::time_t request_time = std::time(0);
484
485 const std::string auth_header =
486 AWSV4::compute_awsv4_signature(
487 handle->d_url,
488 request_time,
489 credentials->get(AccessCredentials::ID_KEY),
490 credentials->get(AccessCredentials::KEY_KEY),
491 credentials->get(AccessCredentials::REGION_KEY),
492 "s3");
493
494
495 handle->d_request_headers = curl::append_http_header((curl_slist *)0, "Authorization", auth_header);
496 handle->d_request_headers = curl::append_http_header(handle->d_request_headers, "x-amz-content-sha256",
497 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
498 handle->d_request_headers = curl::append_http_header(handle->d_request_headers, "x-amz-date", AWSV4::ISO8601_date(request_time));
499#if 0
500
501 // passing nullptr for the first call allocates the curl_slist
502 // The following code builds the slist that holds the headers. This slist is freed
503 // once the URL is dereferenced in dmrpp_easy_handle::read_data(). jhrg 11/26/19
504 handle->d_request_headers = append_http_header(0, "Authorization:", auth_header);
505 if (!handle->d_request_headers)
506 throw BESInternalError(
507 string("CURL Error setting Authorization header: ").append(
508 curl::error_message(res, handle->d_errbuf)), __FILE__, __LINE__);
509
510 // We pre-compute the sha256 hash of a null message body
511 curl_slist *temp = append_http_header(handle->d_request_headers, "x-amz-content-sha256:",
512 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
513 if (!temp)
514 throw BESInternalError(
515 string("CURL Error setting x-amz-content-sha256: ").append(
516 curl::error_message(res, handle->d_errbuf)),
517 __FILE__, __LINE__);
518 handle->d_request_headers = temp;
519
520 temp = append_http_header(handle->d_request_headers, "x-amz-date:", AWSV4::ISO8601_date(request_time));
521 if (!temp)
522 throw BESInternalError(
523 string("CURL Error setting x-amz-date header: ").append(
524 curl::error_message(res, handle->d_errbuf)),
525 __FILE__, __LINE__);
526 handle->d_request_headers = temp;
527#endif
528
529 // handle->d_request_headers = curl::add_edl_auth_headers(handle->d_request_headers);
530
531 res = curl_easy_setopt(handle->d_handle, CURLOPT_HTTPHEADER, handle->d_request_headers);
532 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HTTPHEADER", handle->d_errbuf, __FILE__, __LINE__);
533 }
534 }
535
536 return handle;
537}
538
546 // In get_easy_handle, it's possible that d_in_use could be false and d_chunk
547 // could not be set to 0 (because a separate thread could be running these
548 // methods). In that case, the thread running get_easy_handle could set d_chunk,
549 // and then this thread could clear it (... unlikely, but an optimizing compiler is
550 // free to reorder statements so long as they don't alter the function's behavior).
551 // Timing tests indicate this lock does not cost anything that can be measured.
552 // jhrg 8/21/18
553 Lock lock(d_get_easy_handle_mutex);
554
555 // TODO Add a call to curl reset() here. jhrg 9/23/20
556
557#if KEEP_ALIVE
558 handle->d_url = nullptr;
559 handle->d_chunk = 0;
560 handle->d_in_use = false;
561#else
562 // This is to test the effect of libcurl Keep Alive support
563 // Find the handle; erase from the vector; delete; allocate a new handle and push it back on
564 for (std::vector<dmrpp_easy_handle *>::iterator i = d_easy_handles.begin(), e = d_easy_handles.end(); i != e; ++i) {
565 if (*i == handle) {
566 BESDEBUG("dmrpp:5", "Found a handle match for the " << i - d_easy_handles.begin() << "th easy handle." << endl);
567 delete handle;
568 *i = new dmrpp_easy_handle();
569 break;
570 }
571 }
572#endif
573}
574
581 for (std::vector<dmrpp_easy_handle *>::iterator i = d_easy_handles.begin(), e = d_easy_handles.end(); i != e; ++i) {
582 if ((*i)->d_chunk == chunk) {
583 release_handle(*i);
584 break;
585 }
586 }
587}
588
597 for (std::vector<dmrpp_easy_handle *>::iterator i = d_easy_handles.begin(), e = d_easy_handles.end(); i != e; ++i) {
598 release_handle(*i);
599 }
600}
virtual std::string get(const std::string &key)
virtual bool is_s3_cred()
Do the URL, ID, Key amd Region items make up an S3 Credential?
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
AccessCredentials * get(std::shared_ptr< http::url > &url)
static CredentialsManager * theCM()
Returns the singleton instance of the CrednetialsManager.
virtual std::string get_curl_range_arg_string()
Returns a curl range argument. The libcurl requires a string argument for range-ge activitys,...
Definition: Chunk.cc:509
virtual std::shared_ptr< http::url > get_data_url() const
Get the data url for this Chunk's data block.
Definition: Chunk.cc:903
void release_handle(dmrpp_easy_handle *h)
dmrpp_easy_handle * get_easy_handle(Chunk *chunk)
Add the given header & value to the curl slist.
Bundle a libcurl easy handle with other information.
void read_data()
This is the read_data() method for all transfers.
dmrpp_easy_handle()
Build a string with hex info about stuff libcurl gets.
static AllowedHosts * theHosts()
Static accessor for the singleton.
Definition: AllowedHosts.cc:69