35 #include <sys/types.h>
62 HTTPCache *HTTPCache::_instance = 0;
71 static pthread_mutex_t instance_mutex;
72 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
78 #define MKDIR(a,b) _mkdir((a))
79 #define UMASK(a) _umask((a))
80 #define REMOVE(a) remove((a))
81 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE)
82 #define DIR_SEPARATOR_CHAR '\\'
83 #define DIR_SEPARATOR_STR "\\"
85 #define MKDIR(a,b) mkdir((a), (b))
86 #define UMASK(a) umask((a))
87 #define REMOVE(a) remove((a))
88 #define MKSTEMP(a) mkstemp((a))
89 #define DIR_SEPARATOR_CHAR '/'
90 #define DIR_SEPARATOR_STR "/"
94 #define CACHE_LOCATION "\\tmp\\"
95 #define CACHE_ROOT "dods-cache\\"
97 #define CACHE_LOCATION "/tmp/"
98 #define CACHE_ROOT "dods-cache/"
100 #define CACHE_INDEX ".index"
101 #define CACHE_LOCK ".lock"
102 #define CACHE_META ".meta"
105 #define NO_LM_EXPIRATION 24*3600 // 24 hours
107 #define DUMP_FREQUENCY 10 // Dump index every x loads
109 #define MEGA 0x100000L
110 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
111 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
112 #define CACHE_GC_PCT 10 // 10% of cache size free after GC
113 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
114 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
120 status =
INIT(&instance_mutex);
123 throw InternalErr(__FILE__, __LINE__,
"Could not initialize the HTTP Cache mutex. Exiting.");
155 HTTPCache::instance(
const string &cache_root,
bool force)
157 int status = pthread_once(&once_block, once_init_routine);
159 throw InternalErr(__FILE__, __LINE__,
"Could not initialize the HTTP Cache mutex. Exiting.");
161 LOCK(&instance_mutex);
163 DBG(cerr <<
"Entering instance(); (" << hex << _instance << dec <<
")"
168 _instance =
new HTTPCache(cache_root, force);
170 DBG(cerr <<
"New instance: " << _instance <<
", cache root: "
171 << _instance->d_cache_root << endl);
173 atexit(delete_instance);
184 EventHandler *old_eh = SignalHandler::instance()->register_handler
187 SignalHandler::instance()->register_handler(SIGINT, old_eh);
189 "Could not register event handler for SIGINT without superseding an existing one.");
192 old_eh = SignalHandler::instance()->register_handler
195 SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
197 "Could not register event handler for SIGPIPE without superseding an existing one.");
200 old_eh = SignalHandler::instance()->register_handler
203 SignalHandler::instance()->register_handler(SIGTERM, old_eh);
205 "Could not register event handler for SIGTERM without superseding an existing one.");
211 DBG2(cerr <<
"The constructor threw an Error!" << endl);
217 DBGN(cerr <<
"returning " << hex << _instance << dec << endl);
226 HTTPCache::delete_instance()
228 DBG(cerr <<
"Entering delete_instance()..." << endl);
229 if (HTTPCache::_instance) {
230 DBG(cerr <<
"Deleting the cache: " << HTTPCache::_instance << endl);
231 delete HTTPCache::_instance;
232 HTTPCache::_instance = 0;
235 DBG(cerr <<
"Exiting delete_instance()" << endl);
252 HTTPCache::HTTPCache(
string cache_root,
bool force) :
253 d_locked_open_file(0),
254 d_cache_enabled(false),
255 d_cache_protected(false),
256 d_expire_ignored(false),
257 d_always_validate(false),
266 d_http_cache_table(0)
268 DBG(cerr <<
"Entering the constructor for " <<
this <<
"... ");
270 int status = pthread_once(&once_block, once_init_routine);
272 throw InternalErr(__FILE__, __LINE__,
"Could not initialize the HTTP Cache mutex. Exiting.");
274 INIT(&d_cache_mutex);
286 set_cache_root(cache_root);
289 if (!get_single_user_lock(force))
290 throw Error(
"Could not get single user lock for the cache");
300 if (stat(cache_root.c_str(), &s) == 0)
301 block_size = s.st_blksize;
303 throw Error(
"Could not set file system block size.");
305 d_http_cache_table =
new HTTPCacheTable(d_cache_root, block_size);
306 d_cache_enabled =
true;
308 DBGN(cerr <<
"exiting" << endl);
325 DBG(cerr <<
"Entering the destructor for " <<
this <<
"... ");
329 perform_garbage_collection();
341 delete d_http_cache_table;
343 release_single_user_lock();
345 DBGN(cerr <<
"exiting destructor." << endl);
358 HTTPCache::stopGC()
const
360 return (d_http_cache_table->
get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
370 HTTPCache::startGC()
const
373 return (d_http_cache_table->
get_current_size() + d_folder_size > d_total_size);
391 HTTPCache::perform_garbage_collection()
393 DBG(cerr <<
"Performing garbage collection" << endl);
412 HTTPCache::expired_gc()
414 if (!d_expire_ignored) {
452 void HTTPCache::too_big_gc() {
469 bool HTTPCache::get_single_user_lock(
bool force)
471 if (!d_locked_open_file) {
477 create_cache_root(d_cache_root);
483 DBG(cerr <<
"Failure to create the cache root" << endl);
489 if ((fp = fopen(lock.c_str(),
"r")) != NULL) {
490 int res = fclose(fp);
492 DBG(cerr <<
"Failed to close " << (
void *)fp << endl);
500 if ((fp = fopen(lock.c_str(),
"w")) == NULL) {
501 DBG(cerr <<
"Could not open for write access" << endl);
505 d_locked_open_file = fp;
509 cerr <<
"locked_open_file is true" << endl;
516 HTTPCache::release_single_user_lock()
518 if (d_locked_open_file) {
519 int res = fclose(d_locked_open_file);
521 DBG(cerr <<
"Failed to close " << (
void *)d_locked_open_file << endl) ;
523 d_locked_open_file = 0;
552 HTTPCache::create_cache_root(
const string &cache_root)
554 struct stat stat_info;
555 string::size_type cur = 0;
558 cur = cache_root[1] ==
':' ? 3 : 1;
564 string dir = cache_root.substr(0, cur);
565 if (stat(dir.c_str(), &stat_info) == -1) {
566 DBG2(cerr <<
"Cache....... Creating " << dir << endl);
567 mode_t mask =
UMASK(0);
568 if (
MKDIR(dir.c_str(), 0777) < 0) {
569 DBG2(cerr <<
"Error: can't create." << endl);
571 throw Error(
string(
"Could not create the directory for the cache. Failed when building path at ") + dir +
string(
"."));
576 DBG2(cerr <<
"Cache....... Found " << dir << endl);
597 HTTPCache::set_cache_root(
const string &root)
609 char * cr = (
char *) getenv(
"DODS_CACHE");
610 if (!cr) cr = (
char *) getenv(
"TMP");
611 if (!cr) cr = (
char *) getenv(
"TEMP");
627 if (d_http_cache_table)
647 d_cache_enabled = mode;
657 DBG2(cerr <<
"In HTTPCache::is_cache_enabled: (" << d_cache_enabled <<
")"
659 return d_cache_enabled;
677 d_cache_disconnected = mode;
687 return d_cache_disconnected;
703 d_expire_ignored = mode;
714 return d_expire_ignored;
740 (size > ULONG_MAX ? ULONG_MAX : size *
MEGA);
741 unsigned long old_size = d_total_size;
742 d_total_size = new_size;
746 if (new_size < old_size && startGC()) {
747 perform_garbage_collection();
753 DBGN(cerr <<
"Unlocking interface." << endl);
757 DBG2(cerr <<
"Cache....... Total cache size: " << d_total_size
758 <<
" with " << d_folder_size
759 <<
" bytes for meta information and folders and at least "
760 << d_gc_buffer <<
" bytes free after every gc" << endl);
770 return d_total_size /
MEGA;
787 unsigned long new_size = size *
MEGA;
788 if (new_size > 0 && new_size < d_total_size - d_folder_size) {
789 unsigned long old_size = d_max_entry_size;
790 d_max_entry_size = new_size;
791 if (new_size < old_size && startGC()) {
792 perform_garbage_collection();
802 DBG2(cerr <<
"Cache...... Max entry cache size is "
803 << d_max_entry_size << endl);
815 return d_max_entry_size /
MEGA;
833 d_default_expiration = exp_time;
843 return d_default_expiration;
853 d_always_validate = validate;
862 return d_always_validate;
887 d_cache_control = cc;
889 vector<string>::const_iterator i;
890 for (i = cc.begin(); i != cc.end(); ++i) {
891 string header = (*i).substr(0, (*i).find(
':'));
892 string value = (*i).substr((*i).find(
": ") + 2);
893 if (header !=
"Cache-Control") {
894 throw InternalErr(__FILE__, __LINE__,
"Expected cache control header not found.");
897 if (value ==
"no-cache" || value ==
"no-store")
898 d_cache_enabled =
false;
899 else if (value.find(
"max-age") != string::npos) {
900 string max_age = value.substr(value.find(
"=" + 1));
903 else if (value ==
"max-stale")
905 else if (value.find(
"max-stale") != string::npos) {
906 string max_stale = value.substr(value.find(
"=" + 1));
909 else if (value.find(
"min-fresh") != string::npos) {
910 string min_fresh = value.substr(value.find(
"=" + 1));
932 return d_cache_control;
946 HTTPCache::is_url_in_cache(
const string &url)
948 DBG(cerr <<
"Is this url in the cache? (" << url <<
")" << endl);
951 bool status = entry != 0;
966 return header.find(
"Connection") != string::npos
967 || header.find(
"Keep-Alive") != string::npos
968 || header.find(
"Proxy-Authenticate") != string::npos
969 || header.find(
"Proxy-Authorization") != string::npos
970 || header.find(
"Transfer-Encoding") != string::npos
971 || header.find(
"Upgrade") != string::npos;
986 HTTPCache::write_metadata(
const string &cachename,
const vector<string> &headers)
989 d_open_files.push_back(fname);
991 FILE *dest = fopen(fname.c_str(),
"w");
993 throw InternalErr(__FILE__, __LINE__,
994 "Could not open named cache entry file.");
997 vector<string>::const_iterator i;
998 for (i = headers.begin(); i != headers.end(); ++i) {
1000 int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
1002 throw InternalErr(__FILE__, __LINE__,
"could not write header: '" + (*i) +
"' " +
long_to_string(s));
1003 s = fwrite(
"\n", 1, 1, dest);
1005 throw InternalErr(__FILE__, __LINE__,
"could not write header: " +
long_to_string(s));
1009 int res = fclose(dest);
1011 DBG(cerr <<
"HTTPCache::write_metadata - Failed to close "
1015 d_open_files.pop_back();
1029 HTTPCache::read_metadata(
const string &cachename, vector<string> &headers)
1031 FILE *md = fopen(
string(cachename + CACHE_META).c_str(),
"r");
1033 throw InternalErr(__FILE__, __LINE__,
1034 "Could not open named cache entry meta data file.");
1038 while (!feof(md) && fgets(line, 1024, md)) {
1039 line[min(1024, static_cast<int>(strlen(line)))-1] =
'\0';
1040 headers.push_back(
string(line));
1043 int res = fclose(md);
1045 DBG(cerr <<
"HTTPCache::read_metadata - Failed to close "
1072 HTTPCache::write_body(
const string &cachename,
const FILE *src)
1074 d_open_files.push_back(cachename);
1076 FILE *dest = fopen(cachename.c_str(),
"wb");
1078 throw InternalErr(__FILE__, __LINE__,
1079 "Could not open named cache entry file.");
1087 while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
1088 total += fwrite(line, 1, n, dest);
1092 if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
1093 int res = fclose(dest);
1094 res = res & unlink(cachename.c_str());
1096 DBG(cerr <<
"HTTPCache::write_body - Failed to close/unlink "
1099 throw InternalErr(__FILE__, __LINE__,
1100 "I/O error transferring data to the cache.");
1103 rewind(const_cast<FILE *>(src));
1105 int res = fclose(dest);
1107 DBG(cerr <<
"HTTPCache::write_body - Failed to close "
1111 d_open_files.pop_back();
1125 HTTPCache::open_body(
const string &cachename)
1127 DBG(cerr <<
"cachename: " << cachename << endl);
1129 FILE *src = fopen(cachename.c_str(),
"rb");
1131 throw InternalErr(__FILE__, __LINE__,
"Could not open cache file.");
1163 const vector<string> &headers,
const FILE *body)
1167 DBG(cerr <<
"Caching url: " << url <<
"." << endl);
1171 if (url.find(
"http:") == string::npos &&
1172 url.find(
"https:") == string::npos) {
1186 d_http_cache_table->
parse_headers(entry, d_max_entry_size, headers);
1188 DBG(cerr <<
"Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
1189 <<
"(" << url <<
")" << endl);
1191 delete entry; entry = 0;
1197 d_http_cache_table->
calculate_time(entry, d_default_expiration, request_time);
1211 DBG(cerr <<
"Too big; deleting HTTPCacheTable::CacheEntry: " << entry <<
"(" << url
1214 delete entry; entry = 0;
1221 perform_garbage_collection();
1260 vector<string> headers;
1262 DBG(cerr <<
"Getting conditional request headers for " << url << endl);
1265 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1267 throw Error(
"There is no cache entry for the URL: " + url);
1270 headers.push_back(
string(
"If-None-Match: ") + entry->
get_etag());
1272 if (entry->
get_lm() > 0) {
1273 time_t lm = entry->
get_lm();
1274 headers.push_back(
string(
"If-Modified-Since: ")
1279 headers.push_back(
string(
"If-Modified-Since: ")
1284 headers.push_back(
string(
"If-Modified-Since: ")
1304 struct HeaderLess: binary_function<const string&, const string&, bool>
1306 bool operator()(
const string &s1,
const string &s2)
const {
1307 return s1.substr(0, s1.find(
':')) < s2.substr(0, s2.find(
':'));
1326 const vector<string> &headers)
1331 DBG(cerr <<
"Updating the response headers for: " << url << endl);
1336 throw Error(
"There is no cache entry for the URL: " + url);
1339 d_http_cache_table->
parse_headers(entry, d_max_entry_size, headers);
1342 d_http_cache_table->
calculate_time(entry, d_default_expiration, request_time);
1350 set<string, HeaderLess> merged_headers;
1353 copy(headers.begin(), headers.end(),
1354 inserter(merged_headers, merged_headers.begin()));
1357 vector<string> old_headers;
1359 copy(old_headers.begin(), old_headers.end(),
1360 inserter(merged_headers, merged_headers.begin()));
1365 vector<string> result;
1366 copy(merged_headers.rbegin(), merged_headers.rend(),
1367 back_inserter(result));
1401 DBG(cerr <<
"Is this URL valid? (" << url <<
")" << endl);
1404 if (d_always_validate) {
1409 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1411 throw Error(
"There is no cache entry for the URL: " + url);
1429 if (d_max_age >= 0 && current_age > d_max_age) {
1430 DBG(cerr <<
"Cache....... Max-age validation" << endl);
1435 if (d_min_fresh >= 0
1437 DBG(cerr <<
"Cache....... Min-fresh validation" << endl);
1444 + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
1487 vector<string> &headers,
string &cacheName) {
1493 DBG(cerr <<
"Getting the cached response for " << url << endl);
1496 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1505 DBG(cerr <<
"Headers just read from cache: " << endl);
1506 DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr,
"\n")));
1510 DBG(cerr <<
"Returning: " << url <<
" from the cache." << endl);
1541 string discard_name;
1558 string discard_name;
1559 vector<string> discard_headers;
1610 throw Error(
"Attempt to purge the cache with entries in use.");
vector< string > get_conditional_request_headers(const string &url)
bool is_cache_enabled() const
vector< string > get_cache_control()
void set_cache_enabled(bool mode)
time_t parse_time(const char *str, bool expand)
bool is_url_valid(const string &url)
string get_error_message() const
void create_location(CacheEntry *entry)
void unlock_read_response()
void set_max_size(unsigned long size)
void parse_headers(HTTPCacheTable::CacheEntry *entry, unsigned long max_entry_size, const vector< string > &headers)
void add_entry_to_cache_table(CacheEntry *entry)
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
void calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time)
void delete_by_size(unsigned int size)
void lock_write_response()
int get_new_entries() const
int get_default_expiration() const
A class for software fault reporting.
void delete_all_entries()
bool is_hop_by_hop_header(const string &header)
unsigned long get_max_entry_size() const
bool is_expire_ignored() const
#define MAX_CACHE_ENTRY_SIZE
void set_size(unsigned long sz)
CacheDisconnectedMode get_cache_disconnected() const
void update_response(const string &url, time_t request_time, const vector< string > &headers)
bool get_always_validate() const
void bind_entry_to_data(CacheEntry *entry, FILE *body)
string get_cache_root() const
void set_cache_control(const vector< string > &cc)
void delete_expired_entries(time_t time=0)
string date_time_str(time_t *calendar, bool local)
bool get_must_revalidate()
unsigned long get_current_size() const
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
time_t get_freshness_lifetime()
#define MIN_CACHE_TOTAL_SIZE
string long_to_string(long val, int base)
void unlock_write_response()
void set_always_validate(bool validate)
#define DIR_SEPARATOR_CHAR
void set_default_expiration(int exp_time)
void remove_entry_from_cache_table(const string &url)
void release_cached_response(FILE *response)
void unlock_cache_interface()
CacheEntry * get_write_locked_entry_from_cache_table(const string &url)
void lock_cache_interface()
time_t get_response_time()
unsigned long get_max_size() const
void delete_by_hits(int hits)
A class for error processing.
void set_expire_ignored(bool mode)
void set_max_entry_size(unsigned long size)
time_t get_corrected_initial_age()
void uncouple_entry_from_data(FILE *body)
bool is_locked_read_responses()
void set_cache_root(const string &cr)
void set_cache_disconnected(CacheDisconnectedMode mode)