libdap++  Updated for version 3.14.0
HTTPCache.cc
Go to the documentation of this file.
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #include "config.h"
27 
28 //#define DODS_DEBUG
29 // #define DODS_DEBUG2
30 #undef USE_GETENV
31 
32 #include <pthread.h>
33 #include <limits.h>
34 #include <unistd.h> // for stat
35 #include <sys/types.h> // for stat and mkdir
36 #include <sys/stat.h>
37 
38 #include <cstring>
39 #include <iostream>
40 #include <sstream>
41 #include <algorithm>
42 #include <iterator>
43 #include <set>
44 
45 #include "Error.h"
46 #include "InternalErr.h"
47 #include "ResponseTooBigErr.h"
48 #ifndef WIN32
49 #include "SignalHandler.h"
50 #endif
52 #include "HTTPCacheTable.h"
53 #include "HTTPCache.h"
54 #include "HTTPCacheMacros.h"
55 
56 #include "util_mit.h"
57 #include "debug.h"
58 
59 using namespace std;
60 
61 namespace libdap {
62 
63 HTTPCache *HTTPCache::_instance = 0;
64 
65 // instance_mutex is used to ensure that only one instance is created.
66 // That is, it protects the body of the HTTPCache::instance() method. This
67 // mutex is initialized from within the static function once_init_routine()
68 // and the call to that takes place using pthread_once_init() where the mutex
69 // once_block is used to protect that call. All of this ensures that no matter
70 // how many threads call the instance() method, only one instance is ever
71 // made.
72 static pthread_mutex_t instance_mutex;
73 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
74 
75 
76 #define NO_LM_EXPIRATION 24*3600 // 24 hours
77 
78 #define DUMP_FREQUENCY 10 // Dump index every x loads
79 
80 #define MEGA 0x100000L
81 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
82 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
83 #define CACHE_GC_PCT 10 // 10% of cache size free after GC
84 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
85 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
86 
87 static void
88 once_init_routine()
89 {
90  int status;
91  status = INIT(&instance_mutex);
92 
93  if (status != 0)
94  throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
95 }
96 
125 HTTPCache *
126 HTTPCache::instance(const string &cache_root, bool force)
127 {
128  int status = pthread_once(&once_block, once_init_routine);
129  if (status != 0)
130  throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
131 
132  LOCK(&instance_mutex);
133 
134  DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")"
135  << "... ");
136 
137  try {
138  if (!_instance) {
139  _instance = new HTTPCache(cache_root, force);
140 
141  DBG(cerr << "New instance: " << _instance << ", cache root: "
142  << _instance->d_cache_root << endl);
143 
144  atexit(delete_instance);
145 
146 #ifndef WIN32
147  // Register the interrupt handler. If we've already registered
148  // one, barf. If this becomes a problem, hack SignalHandler so
149  // that we can chain these handlers... 02/10/04 jhrg
150  //
151  // Technically we're leaking memory here. However, since this
152  // class is a singleton, we know that only three objects will
153  // ever be created and they will all exist until the process
154  // exits. We can let this slide... 02/12/04 jhrg
155  EventHandler *old_eh = SignalHandler::instance()->register_handler(SIGINT, new HTTPCacheInterruptHandler, true);
156  if (old_eh) {
157  SignalHandler::instance()->register_handler(SIGINT, old_eh);
159  "Could not register event handler for SIGINT without superseding an existing one.");
160  }
161 
162  old_eh = SignalHandler::instance()->register_handler(SIGPIPE, new HTTPCacheInterruptHandler, true);
163  if (old_eh) {
164  SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
166  "Could not register event handler for SIGPIPE without superseding an existing one.");
167  }
168 
169  old_eh = SignalHandler::instance()->register_handler(SIGTERM, new HTTPCacheInterruptHandler, true);
170  if (old_eh) {
171  SignalHandler::instance()->register_handler(SIGTERM, old_eh);
173  "Could not register event handler for SIGTERM without superseding an existing one.");
174  }
175 #endif
176  }
177  }
178  catch (...) {
179  DBG2(cerr << "The constructor threw an Error!" << endl);
180  UNLOCK(&instance_mutex);
181  throw;
182  }
183 
184  UNLOCK(&instance_mutex);
185  DBGN(cerr << "returning " << hex << _instance << dec << endl);
186 
187  return _instance;
188 }
189 
193 void
194 HTTPCache::delete_instance()
195 {
196  DBG(cerr << "Entering delete_instance()..." << endl);
197 
198  if (HTTPCache::_instance) {
199  DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
200  delete HTTPCache::_instance;
201  HTTPCache::_instance = 0;
202 
203  //Now remove the signal handlers
204  delete SignalHandler::instance()->remove_handler(SIGINT);
205  delete SignalHandler::instance()->remove_handler(SIGPIPE);
206  delete SignalHandler::instance()->remove_handler(SIGTERM);
207  }
208 
209  DBG(cerr << "Exiting delete_instance()" << endl);
210 }
211 
226 HTTPCache::HTTPCache(string cache_root, bool force) :
227  d_locked_open_file(0),
228  d_cache_enabled(false),
229  d_cache_protected(false),
230  d_expire_ignored(false),
231  d_always_validate(false),
232  d_total_size(CACHE_TOTAL_SIZE * MEGA),
233  d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
234  d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
235  d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
236  d_default_expiration(NO_LM_EXPIRATION),
237  d_max_age(-1),
238  d_max_stale(-1),
239  d_min_fresh(-1),
240  d_http_cache_table(0)
241 {
242  DBG(cerr << "Entering the constructor for " << this << "... ");
243 #if 0
244  int status = pthread_once(&once_block, once_init_routine);
245  if (status != 0)
246  throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
247 #endif
248  INIT(&d_cache_mutex);
249 
250  // This used to throw an Error object if we could not get the
251  // single user lock. However, that results in an invalid object. It's
252  // better to have an instance that has default values. If we cannot get
253  // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
254  //
255  // I fixed this block so that the cache root is set before we try to get
256  // the single user lock. That was the fix for bug #661. To make that
257  // work, I had to move the call to create_cache_root out of
258  // set_cache_root(). 09/08/03 jhrg
259 
260  set_cache_root(cache_root);
261  int block_size;
262 
263  if (!get_single_user_lock(force))
264  throw Error("Could not get single user lock for the cache");
265 
266 #ifdef WIN32
267  // Windows is unable to provide us this information. 4096 appears
268  // a best guess. It is likely to be in the range [2048, 8192] on
269  // windows, but will the level of truth of that statement vary over
270  // time ?
271  block_size = 4096;
272 #else
273  struct stat s;
274  if (stat(cache_root.c_str(), &s) == 0)
275  block_size = s.st_blksize;
276  else
277  throw Error("Could not set file system block size.");
278 #endif
279  d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size);
280  d_cache_enabled = true;
281 
282  DBGN(cerr << "exiting" << endl);
283 }
284 
298 {
299  DBG(cerr << "Entering the destructor for " << this << "... ");
300 
301  try {
302  if (startGC())
303  perform_garbage_collection();
304 
305  d_http_cache_table->cache_index_write();
306  }
307  catch (Error &e) {
308  // If the cache index cannot be written, we've got problems. However,
309  // unless we're debugging, still free up the cache table in memory.
310  // How should we let users know they cache index is not being
311  // written?? 10/03/02 jhrg
312  DBG(cerr << e.get_error_message() << endl);
313  }
314 
315  delete d_http_cache_table;
316 
317  release_single_user_lock();
318 
319  DBGN(cerr << "exiting destructor." << endl);
320  DESTROY(&d_cache_mutex);
321 }
322 
323 
327 
331 bool
332 HTTPCache::stopGC() const
333 {
334  return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
335 }
336 
343 bool
344 HTTPCache::startGC() const
345 {
346  DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
347  return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
348 }
349 
364 void
365 HTTPCache::perform_garbage_collection()
366 {
367  DBG(cerr << "Performing garbage collection" << endl);
368 
369  // Remove all the expired responses.
370  expired_gc();
371 
372  // Remove entries larger than max_entry_size.
373  too_big_gc();
374 
375  // Remove entries starting with zero hits, 1, ..., until stopGC()
376  // returns true.
377  hits_gc();
378 }
379 
385 void
386 HTTPCache::expired_gc()
387 {
388  if (!d_expire_ignored) {
389  d_http_cache_table->delete_expired_entries();
390  }
391 }
392 
409 void
410 HTTPCache::hits_gc()
411 {
412  int hits = 0;
413 
414  if (startGC()) {
415  while (!stopGC()) {
416  d_http_cache_table->delete_by_hits(hits);
417  hits++;
418  }
419  }
420 }
421 
426 void HTTPCache::too_big_gc() {
427  if (startGC())
428  d_http_cache_table->delete_by_size(d_max_entry_size);
429 }
430 
432 
443 bool HTTPCache::get_single_user_lock(bool force)
444 {
445  if (!d_locked_open_file) {
446  FILE * fp = NULL;
447 
448  try {
449  // It's OK to call create_cache_root if the directory already
450  // exists.
451  create_cache_root(d_cache_root);
452  }
453  catch (Error &e) {
454  // We need to catch and return false because this method is
455  // called from a ctor and throwing at this point will result in a
456  // partially constructed object. 01/22/04 jhrg
457  DBG(cerr << "Failure to create the cache root" << endl);
458  return false;
459  }
460 
461  // Try to read the lock file. If we can open for reading, it exists.
462  string lock = d_cache_root + CACHE_LOCK;
463  if ((fp = fopen(lock.c_str(), "r")) != NULL) {
464  int res = fclose(fp);
465  if (res) {
466  DBG(cerr << "Failed to close " << (void *)fp << endl);
467  }
468  if (force)
469  REMOVE(lock.c_str());
470  else
471  return false;
472  }
473 
474  if ((fp = fopen(lock.c_str(), "w")) == NULL) {
475  DBG(cerr << "Could not open for write access" << endl);
476  return false;
477  }
478 
479  d_locked_open_file = fp;
480  return true;
481  }
482 
483  DBG(cerr << "locked_open_file is true" << endl);
484  return false;
485 }
486 
489 void
490 HTTPCache::release_single_user_lock()
491 {
492  if (d_locked_open_file) {
493  int res = fclose(d_locked_open_file);
494  if (res) {
495  DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ;
496  }
497  d_locked_open_file = 0;
498  }
499 
500  string lock = d_cache_root + CACHE_LOCK;
501  REMOVE(lock.c_str());
502 }
503 
506 
510 string
512 {
513  return d_cache_root;
514 }
515 
516 
525 void
526 HTTPCache::create_cache_root(const string &cache_root)
527 {
528  struct stat stat_info;
529  string::size_type cur = 0;
530 
531 #ifdef WIN32
532  cur = cache_root[1] == ':' ? 3 : 1;
533  typedef int mode_t;
534 #else
535  cur = 1;
536 #endif
537  while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
538  string dir = cache_root.substr(0, cur);
539  if (stat(dir.c_str(), &stat_info) == -1) {
540  DBG2(cerr << "Cache....... Creating " << dir << endl);
541  mode_t mask = UMASK(0);
542  if (MKDIR(dir.c_str(), 0777) < 0) {
543  DBG2(cerr << "Error: can't create." << endl);
544  UMASK(mask);
545  throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string("."));
546  }
547  UMASK(mask);
548  }
549  else {
550  DBG2(cerr << "Cache....... Found " << dir << endl);
551  }
552  cur++;
553  }
554 }
555 
570 void
571 HTTPCache::set_cache_root(const string &root)
572 {
573  if (root != "") {
574  d_cache_root = root;
575  // cache root should end in /.
576  if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
577  d_cache_root += DIR_SEPARATOR_CHAR;
578  }
579  else {
580  // If no cache root has been indicated then look for a suitable
581  // location.
582 #ifdef USE_GETENV
583  char * cr = (char *) getenv("DODS_CACHE");
584  if (!cr) cr = (char *) getenv("TMP");
585  if (!cr) cr = (char *) getenv("TEMP");
586  if (!cr) cr = (char*)CACHE_LOCATION;
587  d_cache_root = cr;
588 #else
589  d_cache_root = CACHE_LOCATION;
590 #endif
591 
592  if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
593  d_cache_root += DIR_SEPARATOR_CHAR;
594 
595  d_cache_root += CACHE_ROOT;
596  }
597 
598  // Test d_hhtp_cache_table because this method can be called before that
599  // instance is created and also can be called later to cahnge the cache
600  // root. jhrg 05.14.08
601  if (d_http_cache_table)
602  d_http_cache_table->set_cache_root(d_cache_root);
603 }
604 
616 void
618 {
620 
621  d_cache_enabled = mode;
622 
624 }
625 
628 bool
630 {
631  DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")"
632  << endl);
633  return d_cache_enabled;
634 }
635 
646 void
648 {
650 
651  d_cache_disconnected = mode;
652 
654 }
655 
660 {
661  return d_cache_disconnected;
662 }
663 
672 void
674 {
676 
677  d_expire_ignored = mode;
678 
680 }
681 
682 /* Is the cache ignoring Expires headers returned with responses that have
683  been cached? */
684 
685 bool
687 {
688  return d_expire_ignored;
689 }
690 
706 void
707 HTTPCache::set_max_size(unsigned long size)
708 {
710 
711  try {
712  unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
714  (size > ULONG_MAX ? ULONG_MAX : size * MEGA);
715  unsigned long old_size = d_total_size;
716  d_total_size = new_size;
717  d_folder_size = d_total_size / CACHE_FOLDER_PCT;
718  d_gc_buffer = d_total_size / CACHE_GC_PCT;
719 
720  if (new_size < old_size && startGC()) {
721  perform_garbage_collection();
722  d_http_cache_table->cache_index_write();
723  }
724  }
725  catch (...) {
727  DBGN(cerr << "Unlocking interface." << endl);
728  throw;
729  }
730 
731  DBG2(cerr << "Cache....... Total cache size: " << d_total_size
732  << " with " << d_folder_size
733  << " bytes for meta information and folders and at least "
734  << d_gc_buffer << " bytes free after every gc" << endl);
735 
737 }
738 
741 unsigned long
743 {
744  return d_total_size / MEGA;
745 }
746 
755 void
756 HTTPCache::set_max_entry_size(unsigned long size)
757 {
759 
760  try {
761  unsigned long new_size = size * MEGA;
762  if (new_size > 0 && new_size < d_total_size - d_folder_size) {
763  unsigned long old_size = d_max_entry_size;
764  d_max_entry_size = new_size;
765  if (new_size < old_size && startGC()) {
766  perform_garbage_collection();
767  d_http_cache_table->cache_index_write();
768  }
769  }
770  }
771  catch (...) {
773  throw;
774  }
775 
776  DBG2(cerr << "Cache...... Max entry cache size is "
777  << d_max_entry_size << endl);
778 
780 }
781 
786 unsigned long
788 {
789  return d_max_entry_size / MEGA;
790 }
791 
802 void
804 {
806 
807  d_default_expiration = exp_time;
808 
810 }
811 
814 int
816 {
817  return d_default_expiration;
818 }
819 
824 void
826 {
827  d_always_validate = validate;
828 }
829 
833 bool
835 {
836  return d_always_validate;
837 }
838 
855 void
856 HTTPCache::set_cache_control(const vector<string> &cc)
857 {
859 
860  try {
861  d_cache_control = cc;
862 
863  vector<string>::const_iterator i;
864  for (i = cc.begin(); i != cc.end(); ++i) {
865  string header = (*i).substr(0, (*i).find(':'));
866  string value = (*i).substr((*i).find(": ") + 2);
867  if (header != "Cache-Control") {
868  throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
869  }
870  else {
871  if (value == "no-cache" || value == "no-store")
872  d_cache_enabled = false;
873  else if (value.find("max-age") != string::npos) {
874  string max_age = value.substr(value.find("=" + 1));
875  d_max_age = parse_time(max_age.c_str());
876  }
877  else if (value == "max-stale")
878  d_max_stale = 0; // indicates will take anything;
879  else if (value.find("max-stale") != string::npos) {
880  string max_stale = value.substr(value.find("=" + 1));
881  d_max_stale = parse_time(max_stale.c_str());
882  }
883  else if (value.find("min-fresh") != string::npos) {
884  string min_fresh = value.substr(value.find("=" + 1));
885  d_min_fresh = parse_time(min_fresh.c_str());
886  }
887  }
888  }
889  }
890  catch (...) {
892  throw;
893  }
894 
896 }
897 
898 
903 vector<string>
905 {
906  return d_cache_control;
907 }
908 
910 
919 bool
920 HTTPCache::is_url_in_cache(const string &url)
921 {
922  DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
923 
924  HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
925  bool status = entry != 0;
926  if (entry) {
927  entry->unlock_read_response();
928  }
929  return status;
930 }
931 
937 bool
938 is_hop_by_hop_header(const string &header)
939 {
940  return header.find("Connection") != string::npos
941  || header.find("Keep-Alive") != string::npos
942  || header.find("Proxy-Authenticate") != string::npos
943  || header.find("Proxy-Authorization") != string::npos
944  || header.find("Transfer-Encoding") != string::npos
945  || header.find("Upgrade") != string::npos;
946 }
947 
959 void
960 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers)
961 {
962  string fname = cachename + CACHE_META;
963  d_open_files.push_back(fname);
964 
965  FILE *dest = fopen(fname.c_str(), "w");
966  if (!dest) {
967  throw InternalErr(__FILE__, __LINE__,
968  "Could not open named cache entry file.");
969  }
970 
971  vector<string>::const_iterator i;
972  for (i = headers.begin(); i != headers.end(); ++i) {
973  if (!is_hop_by_hop_header(*i)) {
974  int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
975  if (s != 1) {
976  fclose(dest);
977  throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s));
978  }
979  s = fwrite("\n", 1, 1, dest);
980  if (s != 1) {
981  fclose(dest);
982  throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s));
983  }
984  }
985  }
986 
987  int res = fclose(dest);
988  if (res) {
989  DBG(cerr << "HTTPCache::write_metadata - Failed to close "
990  << dest << endl);
991  }
992 
993  d_open_files.pop_back();
994 }
995 
1006 void
1007 HTTPCache::read_metadata(const string &cachename, vector<string> &headers)
1008 {
1009  FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
1010  if (!md) {
1011  throw InternalErr(__FILE__, __LINE__,
1012  "Could not open named cache entry meta data file.");
1013  }
1014 
1015  char line[1024];
1016  while (!feof(md) && fgets(line, 1024, md)) {
1017  line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline
1018  headers.push_back(string(line));
1019  }
1020 
1021  int res = fclose(md);
1022  if (res) {
1023  DBG(cerr << "HTTPCache::read_metadata - Failed to close "
1024  << md << endl);
1025  }
1026 }
1027 
1049 int
1050 HTTPCache::write_body(const string &cachename, const FILE *src)
1051 {
1052  d_open_files.push_back(cachename);
1053 
1054  FILE *dest = fopen(cachename.c_str(), "wb");
1055  if (!dest) {
1056  throw InternalErr(__FILE__, __LINE__,
1057  "Could not open named cache entry file.");
1058  }
1059 
1060  // Read and write in 1k blocks; an attempt at doing this efficiently.
1061  // 09/30/02 jhrg
1062  char line[1024];
1063  size_t n;
1064  int total = 0;
1065  while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
1066  total += fwrite(line, 1, n, dest);
1067  DBG2(sleep(3));
1068  }
1069 
1070  if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
1071  int res = fclose(dest);
1072  res = res & unlink(cachename.c_str());
1073  if (res) {
1074  DBG(cerr << "HTTPCache::write_body - Failed to close/unlink "
1075  << dest << endl);
1076  }
1077  throw InternalErr(__FILE__, __LINE__,
1078  "I/O error transferring data to the cache.");
1079  }
1080 
1081  rewind(const_cast<FILE *>(src));
1082 
1083  int res = fclose(dest);
1084  if (res) {
1085  DBG(cerr << "HTTPCache::write_body - Failed to close "
1086  << dest << endl);
1087  }
1088 
1089  d_open_files.pop_back();
1090 
1091  return total;
1092 }
1093 
1102 FILE *
1103 HTTPCache::open_body(const string &cachename)
1104 {
1105  DBG(cerr << "cachename: " << cachename << endl);
1106 
1107  FILE *src = fopen(cachename.c_str(), "rb"); // Read only
1108  if (!src)
1109  throw InternalErr(__FILE__, __LINE__, "Could not open cache file.");
1110 
1111  return src;
1112 }
1113 
1139 bool
1140 HTTPCache::cache_response(const string &url, time_t request_time,
1141  const vector<string> &headers, const FILE *body)
1142 {
1144 
1145  DBG(cerr << "Caching url: " << url << "." << endl);
1146 
1147  try {
1148  // If this is not an http or https URL, don't cache.
1149  if (url.find("http:") == string::npos &&
1150  url.find("https:") == string::npos) {
1152  return false;
1153  }
1154 
1155  // This does nothing if url is not already in the cache. It's
1156  // more efficient to do this than to first check and see if the entry
1157  // exists. 10/10/02 jhrg
1158  d_http_cache_table->remove_entry_from_cache_table(url);
1159 
1161  entry->lock_write_response();
1162 
1163  try {
1164  d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age.
1165  if (entry->is_no_cache()) {
1166  DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
1167  << "(" << url << ")" << endl);
1168  entry->unlock_write_response();
1169  delete entry; entry = 0;
1171  return false;
1172  }
1173 
1174  // corrected_initial_age, freshness_lifetime, response_time.
1175  d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1176 
1177  d_http_cache_table->create_location(entry); // cachename, cache_body_fd
1178  // move these write function to cache table
1179  entry->set_size(write_body(entry->get_cachename(), body));
1180  write_metadata(entry->get_cachename(), headers);
1181  d_http_cache_table->add_entry_to_cache_table(entry);
1182  entry->unlock_write_response();
1183  }
1184  catch (ResponseTooBigErr &e) {
1185  // Oops. Bummer. Clean up and exit.
1186  DBG(cerr << e.get_error_message() << endl);
1187  REMOVE(entry->get_cachename().c_str());
1188  REMOVE(string(entry->get_cachename() + CACHE_META).c_str());
1189  DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url
1190  << ")" << endl);
1191  entry->unlock_write_response();
1192  delete entry; entry = 0;
1194  return false;
1195  }
1196 
1197  if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
1198  if (startGC())
1199  perform_garbage_collection();
1200 
1201  d_http_cache_table->cache_index_write(); // resets new_entries
1202  }
1203  }
1204  catch (...) {
1206  throw;
1207  }
1208 
1210 
1211  return true;
1212 }
1213 
1232 vector<string>
1234 {
1236 
1237  HTTPCacheTable::CacheEntry *entry = 0;
1238  vector<string> headers;
1239 
1240  DBG(cerr << "Getting conditional request headers for " << url << endl);
1241 
1242  try {
1243  entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1244  if (!entry)
1245  throw Error("There is no cache entry for the URL: " + url);
1246 
1247  if (entry->get_etag() != "")
1248  headers.push_back(string("If-None-Match: ") + entry->get_etag());
1249 
1250  if (entry->get_lm() > 0) {
1251  time_t lm = entry->get_lm();
1252  headers.push_back(string("If-Modified-Since: ")
1253  + date_time_str(&lm));
1254  }
1255  else if (entry->get_max_age() > 0) {
1256  time_t max_age = entry->get_max_age();
1257  headers.push_back(string("If-Modified-Since: ")
1258  + date_time_str(&max_age));
1259  }
1260  else if (entry->get_expires() > 0) {
1261  time_t expires = entry->get_expires();
1262  headers.push_back(string("If-Modified-Since: ")
1263  + date_time_str(&expires));
1264  }
1265  entry->unlock_read_response();
1267  }
1268  catch (...) {
1270  if (entry) {
1271  entry->unlock_read_response();
1272  }
1273  throw;
1274  }
1275 
1276  return headers;
1277 }
1278 
1282 struct HeaderLess: binary_function<const string&, const string&, bool>
1283 {
1284  bool operator()(const string &s1, const string &s2) const {
1285  return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
1286  }
1287 };
1288 
1302 void
1303 HTTPCache::update_response(const string &url, time_t request_time,
1304  const vector<string> &headers)
1305 {
1307 
1308  HTTPCacheTable::CacheEntry *entry = 0;
1309  DBG(cerr << "Updating the response headers for: " << url << endl);
1310 
1311  try {
1312  entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
1313  if (!entry)
1314  throw Error("There is no cache entry for the URL: " + url);
1315 
1316  // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object.
1317  d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
1318 
1319  // Update corrected_initial_age, freshness_lifetime, response_time.
1320  d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1321 
1322  // Merge the new headers with those in the persistent store. How:
1323  // Load the new headers into a set, then merge the old headers. Since
1324  // set<> ignores duplicates, old headers with the same name as a new
1325  // header will got into the bit bucket. Define a special compare
1326  // functor to make sure that headers are compared using only their
1327  // name and not their value too.
1328  set<string, HeaderLess> merged_headers;
1329 
1330  // Load in the new headers
1331  copy(headers.begin(), headers.end(),
1332  inserter(merged_headers, merged_headers.begin()));
1333 
1334  // Get the old headers and load them in.
1335  vector<string> old_headers;
1336  read_metadata(entry->get_cachename(), old_headers);
1337  copy(old_headers.begin(), old_headers.end(),
1338  inserter(merged_headers, merged_headers.begin()));
1339 
1340  // Read the values back out. Use reverse iterators with back_inserter
1341  // to preserve header order. NB: vector<> does not support push_front
1342  // so we can't use front_inserter(). 01/09/03 jhrg
1343  vector<string> result;
1344  copy(merged_headers.rbegin(), merged_headers.rend(),
1345  back_inserter(result));
1346 
1347  write_metadata(entry->get_cachename(), result);
1348  entry->unlock_write_response();
1350  }
1351  catch (...) {
1352  if (entry) {
1353  entry->unlock_read_response();
1354  }
1356  throw;
1357  }
1358 }
1359 
1371 bool
1372 HTTPCache::is_url_valid(const string &url)
1373 {
1375 
1376  bool freshness;
1377  HTTPCacheTable::CacheEntry *entry = 0;
1378 
1379  DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
1380 
1381  try {
1382  if (d_always_validate) {
1384  return false; // force re-validation.
1385  }
1386 
1387  entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1388  if (!entry)
1389  throw Error("There is no cache entry for the URL: " + url);
1390 
1391  // If we supported range requests, we'd need code here to check if
1392  // there was only a partial response in the cache. 10/02/02 jhrg
1393 
1394  // In case this entry is of type "must-revalidate" then we consider it
1395  // invalid.
1396  if (entry->get_must_revalidate()) {
1397  entry->unlock_read_response();
1399  return false;
1400  }
1401 
1402  time_t resident_time = time(NULL) - entry->get_response_time();
1403  time_t current_age = entry->get_corrected_initial_age() + resident_time;
1404 
1405  // Check that the max-age, max-stale, and min-fresh directives
1406  // given in the request cache control header is followed.
1407  if (d_max_age >= 0 && current_age > d_max_age) {
1408  DBG(cerr << "Cache....... Max-age validation" << endl);
1409  entry->unlock_read_response();
1411  return false;
1412  }
1413  if (d_min_fresh >= 0
1414  && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
1415  DBG(cerr << "Cache....... Min-fresh validation" << endl);
1416  entry->unlock_read_response();
1418  return false;
1419  }
1420 
1421  freshness = (entry->get_freshness_lifetime()
1422  + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
1423  entry->unlock_read_response();
1425  }
1426  catch (...) {
1427  if (entry) {
1428  entry->unlock_read_response();
1429  }
1431  throw;
1432  }
1433 
1434  return freshness;
1435 }
1436 
1464 FILE * HTTPCache::get_cached_response(const string &url,
1465  vector<string> &headers, string &cacheName) {
1467 
1468  FILE *body = 0;
1469  HTTPCacheTable::CacheEntry *entry = 0;
1470 
1471  DBG(cerr << "Getting the cached response for " << url << endl);
1472 
1473  try {
1474  entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1475  if (!entry) {
1477  return 0;
1478  }
1479 
1480  cacheName = entry->get_cachename();
1481  read_metadata(entry->get_cachename(), headers);
1482 
1483  DBG(cerr << "Headers just read from cache: " << endl);
1484  DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
1485 
1486  body = open_body(entry->get_cachename());
1487 
1488  DBG(cerr << "Returning: " << url << " from the cache." << endl);
1489 
1490  d_http_cache_table->bind_entry_to_data(entry, body);
1491  }
1492  catch (...) {
1493  // Why make this unlock operation conditional on entry?
1494  if (entry)
1496  if (body != 0)
1497  fclose(body);
1498  throw;
1499  }
1500 
1502 
1503  return body;
1504 }
1505 
1517 FILE *
1518 HTTPCache::get_cached_response(const string &url, vector<string> &headers)
1519 {
1520  string discard_name;
1521  return get_cached_response(url, headers, discard_name);
1522 }
1523 
1534 FILE *
1536 {
1537  string discard_name;
1538  vector<string> discard_headers;
1539  return get_cached_response(url, discard_headers, discard_name);
1540 }
1541 
1554 void
1556 {
1558 
1559  try {
1560  // fclose(body); This results in a seg fault on linux jhrg 8/27/13
1561  d_http_cache_table->uncouple_entry_from_data(body);
1562  }
1563  catch (...) {
1565  throw;
1566  }
1567 
1569 }
1570 
1583 void
1585 {
1587 
1588  try {
1589  if (d_http_cache_table->is_locked_read_responses())
1590  throw Error("Attempt to purge the cache with entries in use.");
1591 
1592  d_http_cache_table->delete_all_entries();
1593  }
1594  catch (...) {
1596  throw;
1597  }
1598 
1600 }
1601 
1602 } // namespace libdap
vector< string > get_conditional_request_headers(const string &url)
Definition: HTTPCache.cc:1233
bool is_cache_enabled() const
Definition: HTTPCache.cc:629
vector< string > get_cache_control()
Definition: HTTPCache.cc:904
void set_cache_enabled(bool mode)
Definition: HTTPCache.cc:617
time_t parse_time(const char *str, bool expand)
Definition: util_mit.cc:129
bool is_url_valid(const string &url)
Definition: HTTPCache.cc:1372
string get_error_message() const
Definition: Error.cc:275
void create_location(CacheEntry *entry)
virtual ~HTTPCache()
Definition: HTTPCache.cc:297
void set_max_size(unsigned long size)
Definition: HTTPCache.cc:707
void parse_headers(HTTPCacheTable::CacheEntry *entry, unsigned long max_entry_size, const vector< string > &headers)
#define UMASK(a)
#define DBGN(x)
Definition: debug.h:59
void add_entry_to_cache_table(CacheEntry *entry)
#define UNLOCK(m)
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
Definition: HTTPCache.cc:1464
void calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time)
void delete_by_size(unsigned int size)
#define DESTROY(m)
int get_new_entries() const
#define DBG2(x)
Definition: debug.h:73
int get_default_expiration() const
Definition: HTTPCache.cc:815
A class for software fault reporting.
Definition: InternalErr.h:64
#define DUMP_FREQUENCY
Definition: HTTPCache.cc:78
bool is_hop_by_hop_header(const string &header)
Definition: HTTPCache.cc:938
unsigned long get_max_entry_size() const
Definition: HTTPCache.cc:787
#define DBG(x)
Definition: debug.h:58
#define CACHE_GC_PCT
Definition: HTTPCache.cc:83
#define CACHE_TOTAL_SIZE
Definition: HTTPCache.cc:81
bool is_expire_ignored() const
Definition: HTTPCache.cc:686
#define MAX_CACHE_ENTRY_SIZE
Definition: HTTPCache.cc:85
void set_size(unsigned long sz)
#define LOCK(m)
CacheDisconnectedMode get_cache_disconnected() const
Definition: HTTPCache.cc:659
#define DIR_SEPARATOR_CHAR
void update_response(const string &url, time_t request_time, const vector< string > &headers)
Definition: HTTPCache.cc:1303
#define MEGA
Definition: HTTPCache.cc:80
bool get_always_validate() const
Definition: HTTPCache.cc:834
#define CACHE_ROOT
#define NO_LM_EXPIRATION
Definition: HTTPCache.cc:76
void bind_entry_to_data(CacheEntry *entry, FILE *body)
string get_cache_root() const
Definition: HTTPCache.cc:511
void set_cache_control(const vector< string > &cc)
Definition: HTTPCache.cc:856
void delete_expired_entries(time_t time=0)
string date_time_str(time_t *calendar, bool local)
Definition: util_mit.cc:281
unsigned long get_current_size() const
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
Definition: HTTPCache.cc:1140
#define MIN_CACHE_TOTAL_SIZE
Definition: HTTPCache.cc:84
string long_to_string(long val, int base)
Definition: util.cc:1012
#define INIT(m)
#define CACHE_FOLDER_PCT
Definition: HTTPCache.cc:82
void set_always_validate(bool validate)
Definition: HTTPCache.cc:825
void set_default_expiration(int exp_time)
Definition: HTTPCache.cc:803
#define CACHE_META
void remove_entry_from_cache_table(const string &url)
#define CACHE_LOCK
void release_cached_response(FILE *response)
Definition: HTTPCache.cc:1555
#define REMOVE(a)
void unlock_cache_interface()
Definition: HTTPCache.h:248
CacheEntry * get_write_locked_entry_from_cache_table(const string &url)
void lock_cache_interface()
Definition: HTTPCache.h:243
unsigned long get_max_size() const
Definition: HTTPCache.cc:742
void delete_by_hits(int hits)
A class for error processing.
Definition: Error.h:90
void set_expire_ignored(bool mode)
Definition: HTTPCache.cc:673
#define MKDIR(a, b)
#define CACHE_LOCATION
void set_max_entry_size(unsigned long size)
Definition: HTTPCache.cc:756
void uncouple_entry_from_data(FILE *body)
void set_cache_root(const string &cr)
void set_cache_disconnected(CacheDisconnectedMode mode)
Definition: HTTPCache.cc:647