libdap++  Updated for version 3.8.2
HTTPCache.cc
Go to the documentation of this file.
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #include "config.h"
27 
28 // #define DODS_DEBUG
29 // #define DODS_DEBUG2
30 #undef USE_GETENV
31 
32 #include <pthread.h>
33 #include <limits.h>
34 #include <unistd.h> // for stat
35 #include <sys/types.h> // for stat and mkdir
36 #include <sys/stat.h>
37 
38 #include <cstring>
39 #include <iostream>
40 #include <sstream>
41 #include <algorithm>
42 #include <iterator>
43 #include <set>
44 
45 #include "Error.h"
46 #include "InternalErr.h"
47 #include "ResponseTooBigErr.h"
48 #ifndef WIN32
49 #include "SignalHandler.h"
50 #endif
52 #include "HTTPCacheTable.h"
53 #include "HTTPCache.h"
54 
55 #include "util_mit.h"
56 #include "debug.h"
57 
58 using namespace std;
59 
60 namespace libdap {
61 
62 HTTPCache *HTTPCache::_instance = 0;
63 
64 // instance_mutex is used to ensure that only one instance is created.
65 // That is, it protects the body of the HTTPCache::instance() method. This
66 // mutex is initialized from within the static function once_init_routine()
67 // and the call to that takes place using pthread_once_init() where the mutex
68 // once_block is used to protect that call. All of this ensures that no matter
69 // how many threads call the instance() method, only one instance is ever
70 // made.
71 static pthread_mutex_t instance_mutex;
72 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
73 
74 #ifdef WIN32
75 #include <direct.h>
76 #include <time.h>
77 #include <fcntl.h>
78 #define MKDIR(a,b) _mkdir((a))
79 #define UMASK(a) _umask((a))
80 #define REMOVE(a) remove((a))
81 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE)
82 #define DIR_SEPARATOR_CHAR '\\'
83 #define DIR_SEPARATOR_STR "\\"
84 #else
85 #define MKDIR(a,b) mkdir((a), (b))
86 #define UMASK(a) umask((a))
87 #define REMOVE(a) remove((a))
88 #define MKSTEMP(a) mkstemp((a))
89 #define DIR_SEPARATOR_CHAR '/'
90 #define DIR_SEPARATOR_STR "/"
91 #endif
92 
93 #ifdef WIN32
94 #define CACHE_LOCATION "\\tmp\\"
95 #define CACHE_ROOT "dods-cache\\"
96 #else
97 #define CACHE_LOCATION "/tmp/"
98 #define CACHE_ROOT "dods-cache/"
99 #endif
100 #define CACHE_INDEX ".index"
101 #define CACHE_LOCK ".lock"
102 #define CACHE_META ".meta"
103 //#define CACHE_EMPTY_ETAG "@cache@"
104 
105 #define NO_LM_EXPIRATION 24*3600 // 24 hours
106 
107 #define DUMP_FREQUENCY 10 // Dump index every x loads
108 
109 #define MEGA 0x100000L
110 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
111 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
112 #define CACHE_GC_PCT 10 // 10% of cache size free after GC
113 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
114 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
115 
116 static void
117 once_init_routine()
118 {
119  int status;
120  status = INIT(&instance_mutex);
121 
122  if (status != 0)
123  throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
124 }
125 
154 HTTPCache *
155 HTTPCache::instance(const string &cache_root, bool force)
156 {
157  int status = pthread_once(&once_block, once_init_routine);
158  if (status != 0)
159  throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
160 
161  LOCK(&instance_mutex);
162 
163  DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")"
164  << "... ");
165 
166  try {
167  if (!_instance) {
168  _instance = new HTTPCache(cache_root, force);
169 
170  DBG(cerr << "New instance: " << _instance << ", cache root: "
171  << _instance->d_cache_root << endl);
172 
173  atexit(delete_instance);
174 
175 #ifndef WIN32
176  // Register the interrupt handler. If we've already registered
177  // one, barf. If this becomes a problem, hack SignalHandler so
178  // that we can chain these handlers... 02/10/04 jhrg
179  //
180  // Technically we're leaking memory here. However, since this
181  // class is a singleton, we know that only three objects will
182  // ever be created and they will all exist until the process
183  // exits. We can let this slide... 02/12/04 jhrg
184  EventHandler *old_eh = SignalHandler::instance()->register_handler
185  (SIGINT, new HTTPCacheInterruptHandler);
186  if (old_eh) {
187  SignalHandler::instance()->register_handler(SIGINT, old_eh);
189  "Could not register event handler for SIGINT without superseding an existing one.");
190  }
191 
192  old_eh = SignalHandler::instance()->register_handler
193  (SIGPIPE, new HTTPCacheInterruptHandler);
194  if (old_eh) {
195  SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
197  "Could not register event handler for SIGPIPE without superseding an existing one.");
198  }
199 
200  old_eh = SignalHandler::instance()->register_handler
201  (SIGTERM, new HTTPCacheInterruptHandler);
202  if (old_eh) {
203  SignalHandler::instance()->register_handler(SIGTERM, old_eh);
205  "Could not register event handler for SIGTERM without superseding an existing one.");
206  }
207 #endif
208  }
209  }
210  catch (...) {
211  DBG2(cerr << "The constructor threw an Error!" << endl);
212  UNLOCK(&instance_mutex);
213  throw;
214  }
215 
216  UNLOCK(&instance_mutex);
217  DBGN(cerr << "returning " << hex << _instance << dec << endl);
218 
219  return _instance;
220 }
221 
225 void
226 HTTPCache::delete_instance()
227 {
228  DBG(cerr << "Entering delete_instance()..." << endl);
229  if (HTTPCache::_instance) {
230  DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
231  delete HTTPCache::_instance;
232  HTTPCache::_instance = 0;
233  }
234 
235  DBG(cerr << "Exiting delete_instance()" << endl);
236 }
237 
252 HTTPCache::HTTPCache(string cache_root, bool force) :
253  d_locked_open_file(0),
254  d_cache_enabled(false),
255  d_cache_protected(false),
256  d_expire_ignored(false),
257  d_always_validate(false),
258  d_total_size(CACHE_TOTAL_SIZE * MEGA),
259  d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
260  d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
261  d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
262  d_default_expiration(NO_LM_EXPIRATION),
263  d_max_age(-1),
264  d_max_stale(-1),
265  d_min_fresh(-1),
266  d_http_cache_table(0)
267 {
268  DBG(cerr << "Entering the constructor for " << this << "... ");
269 #if 0
270  int status = pthread_once(&once_block, once_init_routine);
271  if (status != 0)
272  throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
273 #endif
274  INIT(&d_cache_mutex);
275 
276  // This used to throw an Error object if we could not get the
277  // single user lock. However, that results in an invalid object. It's
278  // better to have an instance that has default values. If we cannot get
279  // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
280  //
281  // I fixed this block so that the cache root is set before we try to get
282  // the single user lock. That was the fix for bug #661. To make that
283  // work, I had to move the call to create_cache_root out of
284  // set_cache_root(). 09/08/03 jhrg
285 
286  set_cache_root(cache_root);
287  int block_size;
288 
289  if (!get_single_user_lock(force))
290  throw Error("Could not get single user lock for the cache");
291 
292 #ifdef WIN32
293  // Windows is unable to provide us this information. 4096 appears
294  // a best guess. It is likely to be in the range [2048, 8192] on
295  // windows, but will the level of truth of that statement vary over
296  // time ?
297  block_size = 4096;
298 #else
299  struct stat s;
300  if (stat(cache_root.c_str(), &s) == 0)
301  block_size = s.st_blksize;
302  else
303  throw Error("Could not set file system block size.");
304 #endif
305  d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size);
306  d_cache_enabled = true;
307 
308  DBGN(cerr << "exiting" << endl);
309 }
310 
324 {
325  DBG(cerr << "Entering the destructor for " << this << "... ");
326 
327  try {
328  if (startGC())
329  perform_garbage_collection();
330 
331  d_http_cache_table->cache_index_write();
332  }
333  catch (Error &e) {
334  // If the cache index cannot be written, we've got problems. However,
335  // unless we're debugging, still free up the cache table in memory.
336  // How should we let users know they cache index is not being
337  // written?? 10/03/02 jhrg
338  DBG(cerr << e.get_error_message() << endl);
339  }
340 
341  delete d_http_cache_table;
342 
343  release_single_user_lock();
344 
345  DBGN(cerr << "exiting destructor." << endl);
346  DESTROY(&d_cache_mutex);
347 }
348 
349 
353 
357 bool
358 HTTPCache::stopGC() const
359 {
360  return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
361 }
362 
369 bool
370 HTTPCache::startGC() const
371 {
372  DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
373  return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
374 }
375 
390 void
391 HTTPCache::perform_garbage_collection()
392 {
393  DBG(cerr << "Performing garbage collection" << endl);
394 
395  // Remove all the expired responses.
396  expired_gc();
397 
398  // Remove entries larger than max_entry_size.
399  too_big_gc();
400 
401  // Remove entries starting with zero hits, 1, ..., until stopGC()
402  // returns true.
403  hits_gc();
404 }
405 
411 void
412 HTTPCache::expired_gc()
413 {
414  if (!d_expire_ignored) {
415  d_http_cache_table->delete_expired_entries();
416  }
417 }
418 
435 void
436 HTTPCache::hits_gc()
437 {
438  int hits = 0;
439 
440  if (startGC()) {
441  while (!stopGC()) {
442  d_http_cache_table->delete_by_hits(hits);
443  hits++;
444  }
445  }
446 }
447 
452 void HTTPCache::too_big_gc() {
453  if (startGC())
454  d_http_cache_table->delete_by_size(d_max_entry_size);
455 }
456 
458 
469 bool HTTPCache::get_single_user_lock(bool force)
470 {
471  if (!d_locked_open_file) {
472  FILE * fp = NULL;
473 
474  try {
475  // It's OK to call create_cache_root if the directory already
476  // exists.
477  create_cache_root(d_cache_root);
478  }
479  catch (Error &e) {
480  // We need to catch and return false because this method is
481  // called from a ctor and throwing at this point will result in a
482  // partially constructed object. 01/22/04 jhrg
483  DBG(cerr << "Failure to create the cache root" << endl);
484  return false;
485  }
486 
487  // Try to read the lock file. If we can open for reading, it exists.
488  string lock = d_cache_root + CACHE_LOCK;
489  if ((fp = fopen(lock.c_str(), "r")) != NULL) {
490  int res = fclose(fp);
491  if (res) {
492  DBG(cerr << "Failed to close " << (void *)fp << endl);
493  }
494  if (force)
495  REMOVE(lock.c_str());
496  else
497  return false;
498  }
499 
500  if ((fp = fopen(lock.c_str(), "w")) == NULL) {
501  DBG(cerr << "Could not open for write access" << endl);
502  return false;
503  }
504 
505  d_locked_open_file = fp;
506  return true;
507  }
508 
509  cerr << "locked_open_file is true" << endl;
510  return false;
511 }
512 
515 void
516 HTTPCache::release_single_user_lock()
517 {
518  if (d_locked_open_file) {
519  int res = fclose(d_locked_open_file);
520  if (res) {
521  DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ;
522  }
523  d_locked_open_file = 0;
524  }
525 
526  string lock = d_cache_root + CACHE_LOCK;
527  REMOVE(lock.c_str());
528 }
529 
532 
536 string
538 {
539  return d_cache_root;
540 }
541 
542 
551 void
552 HTTPCache::create_cache_root(const string &cache_root)
553 {
554  struct stat stat_info;
555  string::size_type cur = 0;
556 
557 #ifdef WIN32
558  cur = cache_root[1] == ':' ? 3 : 1;
559  typedef int mode_t;
560 #else
561  cur = 1;
562 #endif
563  while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
564  string dir = cache_root.substr(0, cur);
565  if (stat(dir.c_str(), &stat_info) == -1) {
566  DBG2(cerr << "Cache....... Creating " << dir << endl);
567  mode_t mask = UMASK(0);
568  if (MKDIR(dir.c_str(), 0777) < 0) {
569  DBG2(cerr << "Error: can't create." << endl);
570  UMASK(mask);
571  throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string("."));
572  }
573  UMASK(mask);
574  }
575  else {
576  DBG2(cerr << "Cache....... Found " << dir << endl);
577  }
578  cur++;
579  }
580 }
581 
596 void
597 HTTPCache::set_cache_root(const string &root)
598 {
599  if (root != "") {
600  d_cache_root = root;
601  // cache root should end in /.
602  if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
603  d_cache_root += DIR_SEPARATOR_CHAR;
604  }
605  else {
606  // If no cache root has been indicated then look for a suitable
607  // location.
608 #ifdef USE_GETENV
609  char * cr = (char *) getenv("DODS_CACHE");
610  if (!cr) cr = (char *) getenv("TMP");
611  if (!cr) cr = (char *) getenv("TEMP");
612  if (!cr) cr = (char*)CACHE_LOCATION;
613  d_cache_root = cr;
614 #else
615  d_cache_root = CACHE_LOCATION;
616 #endif
617 
618  if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
619  d_cache_root += DIR_SEPARATOR_CHAR;
620 
621  d_cache_root += CACHE_ROOT;
622  }
623 
624  // Test d_hhtp_cache_table because this method can be called before that
625  // instance is created and also can be called later to cahnge the cache
626  // root. jhrg 05.14.08
627  if (d_http_cache_table)
628  d_http_cache_table->set_cache_root(d_cache_root);
629 }
630 
642 void
644 {
646 
647  d_cache_enabled = mode;
648 
650 }
651 
654 bool
656 {
657  DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")"
658  << endl);
659  return d_cache_enabled;
660 }
661 
672 void
674 {
676 
677  d_cache_disconnected = mode;
678 
680 }
681 
686 {
687  return d_cache_disconnected;
688 }
689 
698 void
700 {
702 
703  d_expire_ignored = mode;
704 
706 }
707 
708 /* Is the cache ignoring Expires headers returned with responses that have
709  been cached? */
710 
711 bool
713 {
714  return d_expire_ignored;
715 }
716 
732 void
733 HTTPCache::set_max_size(unsigned long size)
734 {
736 
737  try {
738  unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
740  (size > ULONG_MAX ? ULONG_MAX : size * MEGA);
741  unsigned long old_size = d_total_size;
742  d_total_size = new_size;
743  d_folder_size = d_total_size / CACHE_FOLDER_PCT;
744  d_gc_buffer = d_total_size / CACHE_GC_PCT;
745 
746  if (new_size < old_size && startGC()) {
747  perform_garbage_collection();
748  d_http_cache_table->cache_index_write();
749  }
750  }
751  catch (...) {
753  DBGN(cerr << "Unlocking interface." << endl);
754  throw;
755  }
756 
757  DBG2(cerr << "Cache....... Total cache size: " << d_total_size
758  << " with " << d_folder_size
759  << " bytes for meta information and folders and at least "
760  << d_gc_buffer << " bytes free after every gc" << endl);
761 
763 }
764 
767 unsigned long
769 {
770  return d_total_size / MEGA;
771 }
772 
781 void
782 HTTPCache::set_max_entry_size(unsigned long size)
783 {
785 
786  try {
787  unsigned long new_size = size * MEGA;
788  if (new_size > 0 && new_size < d_total_size - d_folder_size) {
789  unsigned long old_size = d_max_entry_size;
790  d_max_entry_size = new_size;
791  if (new_size < old_size && startGC()) {
792  perform_garbage_collection();
793  d_http_cache_table->cache_index_write();
794  }
795  }
796  }
797  catch (...) {
799  throw;
800  }
801 
802  DBG2(cerr << "Cache...... Max entry cache size is "
803  << d_max_entry_size << endl);
804 
806 }
807 
812 unsigned long
814 {
815  return d_max_entry_size / MEGA;
816 }
817 
828 void
830 {
832 
833  d_default_expiration = exp_time;
834 
836 }
837 
840 int
842 {
843  return d_default_expiration;
844 }
845 
850 void
852 {
853  d_always_validate = validate;
854 }
855 
859 bool
861 {
862  return d_always_validate;
863 }
864 
881 void
882 HTTPCache::set_cache_control(const vector<string> &cc)
883 {
885 
886  try {
887  d_cache_control = cc;
888 
889  vector<string>::const_iterator i;
890  for (i = cc.begin(); i != cc.end(); ++i) {
891  string header = (*i).substr(0, (*i).find(':'));
892  string value = (*i).substr((*i).find(": ") + 2);
893  if (header != "Cache-Control") {
894  throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
895  }
896  else {
897  if (value == "no-cache" || value == "no-store")
898  d_cache_enabled = false;
899  else if (value.find("max-age") != string::npos) {
900  string max_age = value.substr(value.find("=" + 1));
901  d_max_age = parse_time(max_age.c_str());
902  }
903  else if (value == "max-stale")
904  d_max_stale = 0; // indicates will take anything;
905  else if (value.find("max-stale") != string::npos) {
906  string max_stale = value.substr(value.find("=" + 1));
907  d_max_stale = parse_time(max_stale.c_str());
908  }
909  else if (value.find("min-fresh") != string::npos) {
910  string min_fresh = value.substr(value.find("=" + 1));
911  d_min_fresh = parse_time(min_fresh.c_str());
912  }
913  }
914  }
915  }
916  catch (...) {
918  throw;
919  }
920 
922 }
923 
924 
929 vector<string>
931 {
932  return d_cache_control;
933 }
934 
936 
945 bool
946 HTTPCache::is_url_in_cache(const string &url)
947 {
948  DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
949 
950  HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
951  bool status = entry != 0;
952  if (entry) {
953  entry->unlock_read_response();
954  }
955  return status;
956 }
957 
963 bool
964 is_hop_by_hop_header(const string &header)
965 {
966  return header.find("Connection") != string::npos
967  || header.find("Keep-Alive") != string::npos
968  || header.find("Proxy-Authenticate") != string::npos
969  || header.find("Proxy-Authorization") != string::npos
970  || header.find("Transfer-Encoding") != string::npos
971  || header.find("Upgrade") != string::npos;
972 }
973 
985 void
986 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers)
987 {
988  string fname = cachename + CACHE_META;
989  d_open_files.push_back(fname);
990 
991  FILE *dest = fopen(fname.c_str(), "w");
992  if (!dest) {
993  throw InternalErr(__FILE__, __LINE__,
994  "Could not open named cache entry file.");
995  }
996 
997  vector<string>::const_iterator i;
998  for (i = headers.begin(); i != headers.end(); ++i) {
999  if (!is_hop_by_hop_header(*i)) {
1000  int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
1001  if (s != 1)
1002  throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s));
1003  s = fwrite("\n", 1, 1, dest);
1004  if (s != 1)
1005  throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s));
1006  }
1007  }
1008 
1009  int res = fclose(dest);
1010  if (res) {
1011  DBG(cerr << "HTTPCache::write_metadata - Failed to close "
1012  << dest << endl);
1013  }
1014 
1015  d_open_files.pop_back();
1016 }
1017 
1028 void
1029 HTTPCache::read_metadata(const string &cachename, vector<string> &headers)
1030 {
1031  FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
1032  if (!md) {
1033  throw InternalErr(__FILE__, __LINE__,
1034  "Could not open named cache entry meta data file.");
1035  }
1036 
1037  char line[1024];
1038  while (!feof(md) && fgets(line, 1024, md)) {
1039  line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline
1040  headers.push_back(string(line));
1041  }
1042 
1043  int res = fclose(md);
1044  if (res) {
1045  DBG(cerr << "HTTPCache::read_metadata - Failed to close "
1046  << md << endl);
1047  }
1048 }
1049 
1071 int
1072 HTTPCache::write_body(const string &cachename, const FILE *src)
1073 {
1074  d_open_files.push_back(cachename);
1075 
1076  FILE *dest = fopen(cachename.c_str(), "wb");
1077  if (!dest) {
1078  throw InternalErr(__FILE__, __LINE__,
1079  "Could not open named cache entry file.");
1080  }
1081 
1082  // Read and write in 1k blocks; an attempt at doing this efficiently.
1083  // 09/30/02 jhrg
1084  char line[1024];
1085  size_t n;
1086  int total = 0;
1087  while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
1088  total += fwrite(line, 1, n, dest);
1089  DBG2(sleep(3));
1090  }
1091 
1092  if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
1093  int res = fclose(dest);
1094  res = res & unlink(cachename.c_str());
1095  if (res) {
1096  DBG(cerr << "HTTPCache::write_body - Failed to close/unlink "
1097  << dest << endl);
1098  }
1099  throw InternalErr(__FILE__, __LINE__,
1100  "I/O error transferring data to the cache.");
1101  }
1102 
1103  rewind(const_cast<FILE *>(src));
1104 
1105  int res = fclose(dest);
1106  if (res) {
1107  DBG(cerr << "HTTPCache::write_body - Failed to close "
1108  << dest << endl);
1109  }
1110 
1111  d_open_files.pop_back();
1112 
1113  return total;
1114 }
1115 
1124 FILE *
1125 HTTPCache::open_body(const string &cachename)
1126 {
1127  DBG(cerr << "cachename: " << cachename << endl);
1128 
1129  FILE *src = fopen(cachename.c_str(), "rb"); // Read only
1130  if (!src)
1131  throw InternalErr(__FILE__, __LINE__, "Could not open cache file.");
1132 
1133  return src;
1134 }
1135 
1161 bool
1162 HTTPCache::cache_response(const string &url, time_t request_time,
1163  const vector<string> &headers, const FILE *body)
1164 {
1166 
1167  DBG(cerr << "Caching url: " << url << "." << endl);
1168 
1169  try {
1170  // If this is not an http or https URL, don't cache.
1171  if (url.find("http:") == string::npos &&
1172  url.find("https:") == string::npos) {
1174  return false;
1175  }
1176 
1177  // This does nothing if url is not already in the cache. It's
1178  // more efficient to do this than to first check and see if the entry
1179  // exists. 10/10/02 jhrg
1180  d_http_cache_table->remove_entry_from_cache_table(url);
1181 
1183  entry->lock_write_response();
1184 
1185  try {
1186  d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age.
1187  if (entry->is_no_cache()) {
1188  DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
1189  << "(" << url << ")" << endl);
1190  entry->unlock_write_response();
1191  delete entry; entry = 0;
1193  return false;
1194  }
1195 
1196  // corrected_initial_age, freshness_lifetime, response_time.
1197  d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1198 
1199  d_http_cache_table->create_location(entry); // cachename, cache_body_fd
1200  // move these write function to cache table
1201  entry->set_size(write_body(entry->get_cachename(), body));
1202  write_metadata(entry->get_cachename(), headers);
1203  d_http_cache_table->add_entry_to_cache_table(entry);
1204  entry->unlock_write_response();
1205  }
1206  catch (ResponseTooBigErr &e) {
1207  // Oops. Bummer. Clean up and exit.
1208  DBG(cerr << e.get_error_message() << endl);
1209  REMOVE(entry->get_cachename().c_str());
1210  REMOVE(string(entry->get_cachename() + CACHE_META).c_str());
1211  DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url
1212  << ")" << endl);
1213  entry->unlock_write_response();
1214  delete entry; entry = 0;
1216  return false;
1217  }
1218 
1219  if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
1220  if (startGC())
1221  perform_garbage_collection();
1222 
1223  d_http_cache_table->cache_index_write(); // resets new_entries
1224  }
1225  }
1226  catch (...) {
1228  throw;
1229  }
1230 
1232 
1233  return true;
1234 }
1235 
1254 vector<string>
1256 {
1258 
1259  HTTPCacheTable::CacheEntry *entry = 0;
1260  vector<string> headers;
1261 
1262  DBG(cerr << "Getting conditional request headers for " << url << endl);
1263 
1264  try {
1265  entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1266  if (!entry)
1267  throw Error("There is no cache entry for the URL: " + url);
1268 
1269  if (entry->get_etag() != "")
1270  headers.push_back(string("If-None-Match: ") + entry->get_etag());
1271 
1272  if (entry->get_lm() > 0) {
1273  time_t lm = entry->get_lm();
1274  headers.push_back(string("If-Modified-Since: ")
1275  + date_time_str(&lm));
1276  }
1277  else if (entry->get_max_age() > 0) {
1278  time_t max_age = entry->get_max_age();
1279  headers.push_back(string("If-Modified-Since: ")
1280  + date_time_str(&max_age));
1281  }
1282  else if (entry->get_expires() > 0) {
1283  time_t expires = entry->get_expires();
1284  headers.push_back(string("If-Modified-Since: ")
1285  + date_time_str(&expires));
1286  }
1287  entry->unlock_read_response();
1289  }
1290  catch (...) {
1292  if (entry) {
1293  entry->unlock_read_response();
1294  }
1295  throw;
1296  }
1297 
1298  return headers;
1299 }
1300 
1304 struct HeaderLess: binary_function<const string&, const string&, bool>
1305 {
1306  bool operator()(const string &s1, const string &s2) const {
1307  return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
1308  }
1309 };
1310 
1324 void
1325 HTTPCache::update_response(const string &url, time_t request_time,
1326  const vector<string> &headers)
1327 {
1329 
1330  HTTPCacheTable::CacheEntry *entry = 0;
1331  DBG(cerr << "Updating the response headers for: " << url << endl);
1332 
1333  try {
1334  entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
1335  if (!entry)
1336  throw Error("There is no cache entry for the URL: " + url);
1337 
1338  // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object.
1339  d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
1340 
1341  // Update corrected_initial_age, freshness_lifetime, response_time.
1342  d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1343 
1344  // Merge the new headers with those in the persistent store. How:
1345  // Load the new headers into a set, then merge the old headers. Since
1346  // set<> ignores duplicates, old headers with the same name as a new
1347  // header will got into the bit bucket. Define a special compare
1348  // functor to make sure that headers are compared using only their
1349  // name and not their value too.
1350  set<string, HeaderLess> merged_headers;
1351 
1352  // Load in the new headers
1353  copy(headers.begin(), headers.end(),
1354  inserter(merged_headers, merged_headers.begin()));
1355 
1356  // Get the old headers and load them in.
1357  vector<string> old_headers;
1358  read_metadata(entry->get_cachename(), old_headers);
1359  copy(old_headers.begin(), old_headers.end(),
1360  inserter(merged_headers, merged_headers.begin()));
1361 
1362  // Read the values back out. Use reverse iterators with back_inserter
1363  // to preserve header order. NB: vector<> does not support push_front
1364  // so we can't use front_inserter(). 01/09/03 jhrg
1365  vector<string> result;
1366  copy(merged_headers.rbegin(), merged_headers.rend(),
1367  back_inserter(result));
1368 
1369  write_metadata(entry->get_cachename(), result);
1370  entry->unlock_write_response();
1372  }
1373  catch (...) {
1374  if (entry) {
1375  entry->unlock_read_response();
1376  }
1378  throw;
1379  }
1380 }
1381 
1393 bool
1394 HTTPCache::is_url_valid(const string &url)
1395 {
1397 
1398  bool freshness;
1399  HTTPCacheTable::CacheEntry *entry = 0;
1400 
1401  DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
1402 
1403  try {
1404  if (d_always_validate) {
1406  return false; // force re-validation.
1407  }
1408 
1409  entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1410  if (!entry)
1411  throw Error("There is no cache entry for the URL: " + url);
1412 
1413  // If we supported range requests, we'd need code here to check if
1414  // there was only a partial response in the cache. 10/02/02 jhrg
1415 
1416  // In case this entry is of type "must-revalidate" then we consider it
1417  // invalid.
1418  if (entry->get_must_revalidate()) {
1419  entry->unlock_read_response();
1421  return false;
1422  }
1423 
1424  time_t resident_time = time(NULL) - entry->get_response_time();
1425  time_t current_age = entry->get_corrected_initial_age() + resident_time;
1426 
1427  // Check that the max-age, max-stale, and min-fresh directives
1428  // given in the request cache control header is followed.
1429  if (d_max_age >= 0 && current_age > d_max_age) {
1430  DBG(cerr << "Cache....... Max-age validation" << endl);
1431  entry->unlock_read_response();
1433  return false;
1434  }
1435  if (d_min_fresh >= 0
1436  && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
1437  DBG(cerr << "Cache....... Min-fresh validation" << endl);
1438  entry->unlock_read_response();
1440  return false;
1441  }
1442 
1443  freshness = (entry->get_freshness_lifetime()
1444  + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
1445  entry->unlock_read_response();
1447  }
1448  catch (...) {
1449  if (entry) {
1450  entry->unlock_read_response();
1451  }
1453  throw;
1454  }
1455 
1456  return freshness;
1457 }
1458 
1486 FILE * HTTPCache::get_cached_response(const string &url,
1487  vector<string> &headers, string &cacheName) {
1489 
1490  FILE *body;
1491  HTTPCacheTable::CacheEntry *entry = 0;
1492 
1493  DBG(cerr << "Getting the cached response for " << url << endl);
1494 
1495  try {
1496  entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1497  if (!entry) {
1499  return 0;
1500  }
1501 
1502  cacheName = entry->get_cachename();
1503  read_metadata(entry->get_cachename(), headers);
1504 
1505  DBG(cerr << "Headers just read from cache: " << endl);
1506  DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
1507 
1508  body = open_body(entry->get_cachename());
1509 
1510  DBG(cerr << "Returning: " << url << " from the cache." << endl);
1511 
1512  d_http_cache_table->bind_entry_to_data(entry, body);
1513  }
1514  catch (...) {
1515  // Why make this unlock operation conditional on entry?
1516  if (entry)
1518  fclose(body);
1519  throw;
1520  }
1521 
1523 
1524  return body;
1525 }
1526 
1538 FILE *
1539 HTTPCache::get_cached_response(const string &url, vector<string> &headers)
1540 {
1541  string discard_name;
1542  return get_cached_response(url, headers, discard_name);
1543 }
1544 
1555 FILE *
1557 {
1558  string discard_name;
1559  vector<string> discard_headers;
1560  return get_cached_response(url, discard_headers, discard_name);
1561 }
1562 
1575 void
1577 {
1579 
1580  try {
1581  d_http_cache_table->uncouple_entry_from_data(body);
1582  }
1583  catch (...) {
1585  throw;
1586  }
1587 
1589 }
1590 
1603 void
1605 {
1607 
1608  try {
1609  if (d_http_cache_table->is_locked_read_responses())
1610  throw Error("Attempt to purge the cache with entries in use.");
1611 
1612  d_http_cache_table->delete_all_entries();
1613  }
1614  catch (...) {
1616  throw;
1617  }
1618 
1620 }
1621 
1622 } // namespace libdap
vector< string > get_conditional_request_headers(const string &url)
Definition: HTTPCache.cc:1255
bool is_cache_enabled() const
Definition: HTTPCache.cc:655
vector< string > get_cache_control()
Definition: HTTPCache.cc:930
void set_cache_enabled(bool mode)
Definition: HTTPCache.cc:643
time_t parse_time(const char *str, bool expand)
Definition: util_mit.cc:132
bool is_url_valid(const string &url)
Definition: HTTPCache.cc:1394
string get_error_message() const
Definition: Error.cc:279
void create_location(CacheEntry *entry)
virtual ~HTTPCache()
Definition: HTTPCache.cc:323
void set_max_size(unsigned long size)
Definition: HTTPCache.cc:733
void parse_headers(HTTPCacheTable::CacheEntry *entry, unsigned long max_entry_size, const vector< string > &headers)
#define DBGN(x)
Definition: debug.h:59
void add_entry_to_cache_table(CacheEntry *entry)
#define UNLOCK(m)
#define CACHE_ROOT
Definition: HTTPCache.cc:98
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
Definition: HTTPCache.cc:1486
void calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time)
void delete_by_size(unsigned int size)
#define DESTROY(m)
int get_new_entries() const
#define DBG2(x)
Definition: debug.h:73
int get_default_expiration() const
Definition: HTTPCache.cc:841
A class for software fault reporting.
Definition: InternalErr.h:64
#define DUMP_FREQUENCY
Definition: HTTPCache.cc:107
#define MKDIR(a, b)
Definition: HTTPCache.cc:85
bool is_hop_by_hop_header(const string &header)
Definition: HTTPCache.cc:964
unsigned long get_max_entry_size() const
Definition: HTTPCache.cc:813
#define DBG(x)
Definition: debug.h:58
#define CACHE_GC_PCT
Definition: HTTPCache.cc:112
#define CACHE_TOTAL_SIZE
Definition: HTTPCache.cc:110
bool is_expire_ignored() const
Definition: HTTPCache.cc:712
#define MAX_CACHE_ENTRY_SIZE
Definition: HTTPCache.cc:114
void set_size(unsigned long sz)
#define LOCK(m)
CacheDisconnectedMode get_cache_disconnected() const
Definition: HTTPCache.cc:685
void update_response(const string &url, time_t request_time, const vector< string > &headers)
Definition: HTTPCache.cc:1325
#define MEGA
Definition: HTTPCache.cc:109
bool get_always_validate() const
Definition: HTTPCache.cc:860
#define REMOVE(a)
Definition: HTTPCache.cc:87
#define NO_LM_EXPIRATION
Definition: HTTPCache.cc:105
void bind_entry_to_data(CacheEntry *entry, FILE *body)
string get_cache_root() const
Definition: HTTPCache.cc:537
void set_cache_control(const vector< string > &cc)
Definition: HTTPCache.cc:882
void delete_expired_entries(time_t time=0)
string date_time_str(time_t *calendar, bool local)
Definition: util_mit.cc:284
unsigned long get_current_size() const
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
Definition: HTTPCache.cc:1162
#define CACHE_LOCK
Definition: HTTPCache.cc:101
#define MIN_CACHE_TOTAL_SIZE
Definition: HTTPCache.cc:113
string long_to_string(long val, int base)
Definition: util.cc:440
#define INIT(m)
#define CACHE_FOLDER_PCT
Definition: HTTPCache.cc:111
void set_always_validate(bool validate)
Definition: HTTPCache.cc:851
#define DIR_SEPARATOR_CHAR
Definition: HTTPCache.cc:89
void set_default_expiration(int exp_time)
Definition: HTTPCache.cc:829
void remove_entry_from_cache_table(const string &url)
void release_cached_response(FILE *response)
Definition: HTTPCache.cc:1576
#define UMASK(a)
Definition: HTTPCache.cc:86
void unlock_cache_interface()
Definition: HTTPCache.h:246
CacheEntry * get_write_locked_entry_from_cache_table(const string &url)
void lock_cache_interface()
Definition: HTTPCache.h:241
unsigned long get_max_size() const
Definition: HTTPCache.cc:768
void delete_by_hits(int hits)
#define CACHE_META
Definition: HTTPCache.cc:102
A class for error processing.
Definition: Error.h:90
void set_expire_ignored(bool mode)
Definition: HTTPCache.cc:699
void set_max_entry_size(unsigned long size)
Definition: HTTPCache.cc:782
void uncouple_entry_from_data(FILE *body)
void set_cache_root(const string &cr)
#define CACHE_LOCATION
Definition: HTTPCache.cc:97
void set_cache_disconnected(CacheDisconnectedMode mode)
Definition: HTTPCache.cc:673