The InspIRCd Project
Home | Developers | Wiki | Forums | Bug Tracker | SVN | Download
Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members

hashcomp.cpp

Go to the documentation of this file.
00001 /*       +------------------------------------+
00002  *       | Inspire Internet Relay Chat Daemon |
00003  *       +------------------------------------+
00004  *
00005  *  InspIRCd: (C) 2002-2008 InspIRCd Development Team
00006  * See: http://www.inspircd.org/wiki/index.php/Credits
00007  *
00008  * This program is free but copyrighted software; see
00009  *            the file COPYING for details.
00010  *
00011  * ---------------------------------------------------
00012  */
00013 
00014 /* $Core */
00015 
00016 #include "inspircd.h"
00017 #include "hashcomp.h"
00018 #include "hash_map.h"
00019 
00020 /******************************************************
00021  *
00022  * The hash functions of InspIRCd are the centrepoint
00023  * of the entire system. If these functions are
00024  * inefficient or wasteful, the whole program suffers
00025  * as a result. A lot of C programmers in the ircd
00026  * scene spend a lot of time debating (arguing) about
00027  * the best way to write hash functions to hash irc
00028  * nicknames, channels etc.
00029  * We are lucky as C++ developers as hash_map does
00030  * a lot of this for us. It does intellegent memory
00031  * requests, bucketing, search functions, insertion
00032  * and deletion etc. All we have to do is write some
00033  * overloaded comparison and hash value operators which
00034  * cause it to act in an irc-like way. The features we
00035  * add to the standard hash_map are:
00036  *
00037  * Case insensitivity: The hash_map will be case
00038  * insensitive.
00039  *
00040  * Scandanavian Comparisons: The characters [, ], \ will
00041  * be considered the lowercase of {, } and |.
00042  *
00043  ******************************************************/
00044 
00045 /* convert a string to lowercase. Note following special circumstances
00046  * taken from RFC 1459. Many "official" server branches still hold to this
00047  * rule so i will too;
00048  *
00049  *  Because of IRC's scandanavian origin, the characters {}| are
00050  *  considered to be the lower case equivalents of the characters []\,
00051  *  respectively. This is a critical issue when determining the
00052  *  equivalence of two nicknames.
00053  */
00054 void nspace::strlower(char *n)
00055 {
00056         if (n)
00057         {
00058                 for (char* t = n; *t; t++)
00059                         *t = rfc_case_insensitive_map[(unsigned char)*t];
00060         }
00061 }
00062 
00063 #ifndef WIN32
00064         #ifdef HASHMAP_DEPRECATED
00065                 size_t nspace::insensitive::operator()(const std::string &s) const
00066         #else
00067                 size_t nspace::hash<std::string>::operator()(const std::string &s) const
00068         #endif
00069 #else
00070         size_t nspace::hash_compare<std::string, std::less<std::string> >::operator()(const std::string &s) const
00071 #endif
00072 {
00073         /* XXX: NO DATA COPIES! :)
00074          * The hash function here is practically
00075          * a copy of the one in STL's hash_fun.h,
00076          * only with *x replaced with rfc_case_insensitive_map[*x].
00077          * This avoids a copy to use hash<const char*>
00078          */
00079         register size_t t = 0;
00080         for (std::string::const_iterator x = s.begin(); x != s.end(); ++x) /* ++x not x++, as its faster */
00081                 t = 5 * t + rfc_case_insensitive_map[(unsigned char)*x];
00082         return t;
00083 }
00084 
00085 
00086 #ifndef WIN32
00087 size_t nspace::hash<irc::string>::operator()(const irc::string &s) const
00088 #else
00089 size_t nspace::hash_compare<irc::string, std::less<irc::string> >::operator()(const irc::string &s) const
00090 #endif
00091 {
00092         register size_t t = 0;
00093         for (irc::string::const_iterator x = s.begin(); x != s.end(); ++x) /* ++x not x++, as its faster */
00094                 t = 5 * t + rfc_case_insensitive_map[(unsigned char)*x];
00095         return t;
00096 }
00097 
00098 bool irc::StrHashComp::operator()(const std::string& s1, const std::string& s2) const
00099 {
00100         const unsigned char* n1 = (const unsigned char*)s1.c_str();
00101         const unsigned char* n2 = (const unsigned char*)s2.c_str();
00102         for (; *n1 && *n2; n1++, n2++)
00103                 if (rfc_case_insensitive_map[*n1] != rfc_case_insensitive_map[*n2])
00104                         return false;
00105         return (rfc_case_insensitive_map[*n1] == rfc_case_insensitive_map[*n2]);
00106 }
00107 
00108 /******************************************************
00109  *
00110  * This is the implementation of our special irc::string
00111  * class which is a case-insensitive equivalent to
00112  * std::string which is not only case-insensitive but
00113  * can also do scandanavian comparisons, e.g. { = [, etc.
00114  *
00115  * This class depends on the const array 'rfc_case_insensitive_map'.
00116  *
00117  ******************************************************/
00118 
00119 bool irc::irc_char_traits::eq(char c1st, char c2nd)
00120 {
00121         return rfc_case_insensitive_map[(unsigned char)c1st] == rfc_case_insensitive_map[(unsigned char)c2nd];
00122 }
00123 
00124 bool irc::irc_char_traits::ne(char c1st, char c2nd)
00125 {
00126         return rfc_case_insensitive_map[(unsigned char)c1st] != rfc_case_insensitive_map[(unsigned char)c2nd];
00127 }
00128 
00129 bool irc::irc_char_traits::lt(char c1st, char c2nd)
00130 {
00131         return rfc_case_insensitive_map[(unsigned char)c1st] < rfc_case_insensitive_map[(unsigned char)c2nd];
00132 }
00133 
00134 int irc::irc_char_traits::compare(const char* str1, const char* str2, size_t n)
00135 {
00136         for(unsigned int i = 0; i < n; i++)
00137         {
00138                 if(rfc_case_insensitive_map[(unsigned char)*str1] > rfc_case_insensitive_map[(unsigned char)*str2])
00139                         return 1;
00140 
00141                 if(rfc_case_insensitive_map[(unsigned char)*str1] < rfc_case_insensitive_map[(unsigned char)*str2])
00142                         return -1;
00143 
00144                 if(*str1 == 0 || *str2 == 0)
00145                         return 0;
00146 
00147                 str1++;
00148                 str2++;
00149         }
00150         return 0;
00151 }
00152 
00153 const char* irc::irc_char_traits::find(const char* s1, int  n, char c)
00154 {
00155         while(n-- > 0 && rfc_case_insensitive_map[(unsigned char)*s1] != rfc_case_insensitive_map[(unsigned char)c])
00156                 s1++;
00157         return s1;
00158 }
00159 
00160 irc::tokenstream::tokenstream(const std::string &source) : tokens(source), last_pushed(false)
00161 {
00162         /* Record starting position and current position */
00163         last_starting_position = tokens.begin();
00164         n = tokens.begin();
00165 }
00166 
00167 irc::tokenstream::~tokenstream()
00168 {
00169 }
00170 
00171 bool irc::tokenstream::GetToken(std::string &token)
00172 {
00173         std::string::iterator lsp = last_starting_position;
00174 
00175         while (n != tokens.end())
00176         {
00179                 while ((n+1 != tokens.end()) && (*n == ' ') && (*(n+1) == ' '))
00180                         n++;
00181 
00182                 if ((last_pushed) && (*n == ':'))
00183                 {
00184                         /* If we find a token thats not the first and starts with :,
00185                          * this is the last token on the line
00186                          */
00187                         std::string::iterator curr = ++n;
00188                         n = tokens.end();
00189                         token = std::string(curr, tokens.end());
00190                         return true;
00191                 }
00192 
00193                 last_pushed = false;
00194 
00195                 if ((*n == ' ') || (n+1 == tokens.end()))
00196                 {
00197                         /* If we find a space, or end of string, this is the end of a token.
00198                          */
00199                         last_starting_position = n+1;
00200                         last_pushed = true;
00201 
00202                         std::string strip(lsp, n+1 == tokens.end() ? n+1  : n++);
00203                         while ((strip.length()) && (strip.find_last_of(' ') == strip.length() - 1))
00204                                 strip.erase(strip.end() - 1);
00205 
00206                         token = strip;
00207                         return !token.empty();
00208                 }
00209 
00210                 n++;
00211         }
00212         token.clear();
00213         return false;
00214 }
00215 
00216 bool irc::tokenstream::GetToken(irc::string &token)
00217 {
00218         std::string stdstring;
00219         bool returnval = GetToken(stdstring);
00220         token = assign(stdstring);
00221         return returnval;
00222 }
00223 
00224 bool irc::tokenstream::GetToken(int &token)
00225 {
00226         std::string tok;
00227         bool returnval = GetToken(tok);
00228         token = ConvToInt(tok);
00229         return returnval;
00230 }
00231 
00232 bool irc::tokenstream::GetToken(long &token)
00233 {
00234         std::string tok;
00235         bool returnval = GetToken(tok);
00236         token = ConvToInt(tok);
00237         return returnval;
00238 }
00239 
00240 irc::sepstream::sepstream(const std::string &source, char seperator) : tokens(source), sep(seperator)
00241 {
00242         last_starting_position = tokens.begin();
00243         n = tokens.begin();
00244 }
00245 
00246 bool irc::sepstream::GetToken(std::string &token)
00247 {
00248         std::string::iterator lsp = last_starting_position;
00249 
00250         while (n != tokens.end())
00251         {
00252                 if ((*n == sep) || (n+1 == tokens.end()))
00253                 {
00254                         last_starting_position = n+1;
00255                         token = std::string(lsp, n+1 == tokens.end() ? n+1  : n++);
00256 
00257                         while ((token.length()) && (token.find_last_of(sep) == token.length() - 1))
00258                                 token.erase(token.end() - 1);
00259 
00260                         if (token.empty())
00261                                 n++;
00262 
00263                         return n == tokens.end() ? false : true;
00264                 }
00265 
00266                 n++;
00267         }
00268 
00269         token = "";
00270         return false;
00271 }
00272 
00273 const std::string irc::sepstream::GetRemaining()
00274 {
00275         return std::string(n, tokens.end());
00276 }
00277 
00278 bool irc::sepstream::StreamEnd()
00279 {
00280         return ((n + 1) == tokens.end());
00281 }
00282 
00283 irc::sepstream::~sepstream()
00284 {
00285 }
00286 
00287 std::string irc::hex(const unsigned char *raw, size_t rawsz)
00288 {
00289         if (!rawsz)
00290                 return "";
00291 
00292         /* EWW! This used to be using sprintf, which is WAY inefficient. -Special */
00293         
00294         const char *hex = "0123456789abcdef";
00295         static char hexbuf[MAXBUF];
00296 
00297         size_t i, j;
00298         for (i = 0, j = 0; j < rawsz; ++j)
00299         {
00300                 hexbuf[i++] = hex[raw[j] / 16];
00301                 hexbuf[i++] = hex[raw[j] % 16];
00302         }
00303         hexbuf[i] = 0;
00304 
00305         return hexbuf;
00306 }
00307 
00308 CoreExport const char* irc::Spacify(const char* n)
00309 {
00310         static char x[MAXBUF];
00311         strlcpy(x,n,MAXBUF);
00312         for (char* y = x; *y; y++)
00313                 if (*y == '_')
00314                         *y = ' ';
00315         return x;
00316 }
00317 
00318 
00319 irc::modestacker::modestacker(InspIRCd* Instance, bool add) : ServerInstance(Instance), adding(add)
00320 {
00321         sequence.clear();
00322         sequence.push_back("");
00323 }
00324 
00325 void irc::modestacker::Push(char modeletter, const std::string &parameter)
00326 {
00327         *(sequence.begin()) += modeletter;
00328         sequence.push_back(parameter);
00329 }
00330 
00331 void irc::modestacker::Push(char modeletter)
00332 {
00333         this->Push(modeletter,"");
00334 }
00335 
00336 void irc::modestacker::PushPlus()
00337 {
00338         this->Push('+',"");
00339 }
00340 
00341 void irc::modestacker::PushMinus()
00342 {
00343         this->Push('-',"");
00344 }
00345 
00346 int irc::modestacker::GetStackedLine(std::deque<std::string> &result, int max_line_size)
00347 {
00348         if (sequence.empty())
00349         {
00350                 result.clear();
00351                 return 0;
00352         }
00353 
00354         int n = 0;
00355         int size = 1; /* Account for initial +/- char */
00356         int nextsize = 0;
00357         result.clear();
00358         result.push_back(adding ? "+" : "-");
00359 
00360         if (sequence.size() > 1)
00361                 nextsize = sequence[1].length() + 2;
00362 
00363         while (!sequence[0].empty() && (sequence.size() > 1) && (result.size() < ServerInstance->Config->Limits.MaxModes) && ((size + nextsize) < max_line_size))
00364         {
00365                 result[0] += *(sequence[0].begin());
00366                 if (!sequence[1].empty())
00367                 {
00368                         result.push_back(sequence[1]);
00369                         size += nextsize; /* Account for mode character and whitespace */
00370                 }
00371                 sequence[0].erase(sequence[0].begin());
00372                 sequence.erase(sequence.begin() + 1);
00373 
00374                 if (sequence.size() > 1)
00375                         nextsize = sequence[1].length() + 2;
00376 
00377                 n++;
00378         }
00379 
00380         return n;
00381 }
00382 
00383 irc::stringjoiner::stringjoiner(const std::string &seperator, const std::vector<std::string> &sequence, int begin, int end)
00384 {
00385         if (end < begin)
00386                 throw "stringjoiner logic error, this causes problems.";
00387 
00388         for (int v = begin; v < end; v++)
00389                 joined.append(sequence[v]).append(seperator);
00390         joined.append(sequence[end]);
00391 }
00392 
00393 irc::stringjoiner::stringjoiner(const std::string &seperator, const std::deque<std::string> &sequence, int begin, int end)
00394 {
00395         if (end < begin)
00396                 throw "stringjoiner logic error, this causes problems.";
00397 
00398         for (int v = begin; v < end; v++)
00399                 joined.append(sequence[v]).append(seperator);
00400         joined.append(sequence[end]);
00401 }
00402 
00403 irc::stringjoiner::stringjoiner(const std::string &seperator, const char* const* sequence, int begin, int end)
00404 {
00405         if (end < begin)
00406                 throw "stringjoiner logic error, this causes problems.";
00407 
00408         for (int v = begin; v < end; v++)
00409                 joined.append(sequence[v]).append(seperator);
00410         joined.append(sequence[end]);
00411 }
00412 
00413 std::string& irc::stringjoiner::GetJoined()
00414 {
00415         return joined;
00416 }
00417 
00418 irc::portparser::portparser(const std::string &source, bool allow_overlapped) : in_range(0), range_begin(0), range_end(0), overlapped(allow_overlapped)
00419 {
00420         sep = new irc::commasepstream(source);
00421         overlap_set.clear();
00422 }
00423 
00424 irc::portparser::~portparser()
00425 {
00426         delete sep;
00427 }
00428 
00429 bool irc::portparser::Overlaps(long val)
00430 {
00431         if (!overlapped)
00432                 return false;
00433 
00434         if (overlap_set.find(val) == overlap_set.end())
00435         {
00436                 overlap_set[val] = true;
00437                 return false;
00438         }
00439         else
00440                 return true;
00441 }
00442 
00443 long irc::portparser::GetToken()
00444 {
00445         if (in_range > 0)
00446         {
00447                 in_range++;
00448                 if (in_range <= range_end)
00449                 {
00450                         if (!Overlaps(in_range))
00451                         {
00452                                 return in_range;
00453                         }
00454                         else
00455                         {
00456                                 while (((Overlaps(in_range)) && (in_range <= range_end)))
00457                                         in_range++;
00458                                 
00459                                 if (in_range <= range_end)
00460                                         return in_range;
00461                         }
00462                 }
00463                 else
00464                         in_range = 0;
00465         }
00466 
00467         std::string x;
00468         sep->GetToken(x);
00469 
00470         if (x.empty())
00471                 return 0;
00472 
00473         while (Overlaps(atoi(x.c_str())))
00474         {
00475                 if (!sep->GetToken(x))
00476                         return 0;
00477         }
00478 
00479         std::string::size_type dash = x.rfind('-');
00480         if (dash != std::string::npos)
00481         {
00482                 std::string sbegin = x.substr(0, dash);
00483                 std::string send = x.substr(dash+1, x.length());
00484                 range_begin = atoi(sbegin.c_str());
00485                 range_end = atoi(send.c_str());
00486 
00487                 if ((range_begin > 0) && (range_end > 0) && (range_begin < 65536) && (range_end < 65536) && (range_begin < range_end))
00488                 {
00489                         in_range = range_begin;
00490                         return in_range;
00491                 }
00492                 else
00493                 {
00494                         /* Assume its just the one port */
00495                         return atoi(sbegin.c_str());
00496                 }
00497         }
00498         else
00499         {
00500                 return atoi(x.c_str());
00501         }
00502 }
00503 
00504 irc::dynamicbitmask::dynamicbitmask() : bits_size(4)
00505 {
00506         /* We start with 4 bytes allocated which is room
00507          * for 4 items. Something makes me doubt its worth
00508          * allocating less than 4 bytes.
00509          */
00510         bits = new unsigned char[bits_size];
00511         memset(bits, 0, bits_size);
00512 }
00513 
00514 irc::dynamicbitmask::~dynamicbitmask()
00515 {
00516         /* Tidy up the entire used memory on delete */
00517         delete[] bits;
00518 }
00519 
00520 irc::bitfield irc::dynamicbitmask::Allocate()
00521 {
00522         /* Yeah, this isnt too efficient, however a module or the core
00523          * should only be allocating bitfields on load, the Toggle and
00524          * Get methods are O(1) as these are called much more often.
00525          */
00526         unsigned char* freebits = this->GetFreeBits();
00527         for (unsigned char i = 0; i < bits_size; i++)
00528         {
00529                 /* Yes, this is right. You'll notice we terminate the  loop when !current_pos,
00530                  * this is because we logic shift our bit off the end of unsigned char, and its
00531                  * lost, making the loop counter 0 when we're done.
00532                  */
00533                 for (unsigned char current_pos = 1; current_pos; current_pos = current_pos << 1)
00534                 {
00535                         if (!(freebits[i] & current_pos))
00536                         {
00537                                 freebits[i] |= current_pos;
00538                                 return std::make_pair(i, current_pos);
00539                         }
00540                 }
00541         }
00542         /* We dont have any free space left, increase by one */
00543 
00544         if (bits_size == 255)
00545                 /* Oh dear, cant grow it any further */
00546                 throw std::bad_alloc();
00547 
00548         unsigned char old_bits_size = bits_size;
00549         bits_size++;
00550         /* Allocate new bitfield space */
00551         unsigned char* temp_bits = new unsigned char[bits_size];
00552         unsigned char* temp_freebits = new unsigned char[bits_size];
00553         /* Copy the old data in */
00554         memcpy(temp_bits, bits, old_bits_size);
00555         memcpy(temp_freebits, freebits, old_bits_size);
00556         /* Delete the old data pointers */
00557         delete[] bits;
00558         delete[] freebits;
00559         /* Swap the pointers over so now the new 
00560          * pointers point to our member values
00561          */
00562         bits = temp_bits;
00563         freebits = temp_freebits;
00564         this->SetFreeBits(freebits);
00565         /* Initialize the new byte on the end of
00566          * the bitfields, pre-allocate the one bit
00567          * for this allocation
00568          */
00569         bits[old_bits_size] = 0;
00570         freebits[old_bits_size] = 1;
00571         /* We already know where we just allocated
00572          * the bitfield, so no loop needed
00573          */
00574         return std::make_pair(old_bits_size, 1);
00575 }
00576 
00577 bool irc::dynamicbitmask::Deallocate(irc::bitfield &pos)
00578 {
00579         /* We dont bother to shrink the bitfield
00580          * on deallocation, the most we could do
00581          * is save one byte (!) and this would cost
00582          * us a loop (ugly O(n) stuff) so we just
00583          * clear the bit and leave the memory
00584          * claimed -- nobody will care about one
00585          * byte.
00586          */
00587         if (pos.first < bits_size)
00588         {
00589                 this->GetFreeBits()[pos.first] &= ~pos.second;
00590                 return true;
00591         }
00592         /* They gave a bitfield outside of the
00593          * length of our array. BAD programmer.
00594          */
00595         return false;
00596 }
00597 
00598 void irc::dynamicbitmask::Toggle(irc::bitfield &pos, bool state)
00599 {
00600         /* Range check the value */
00601         if (pos.first < bits_size)
00602         {
00603                 if (state)
00604                         /* Set state, OR the state in */
00605                         bits[pos.first] |= pos.second;
00606                 else
00607                         /* Clear state, AND the !state out */
00608                         bits[pos.first] &= ~pos.second;
00609         }
00610 }
00611 
00612 bool irc::dynamicbitmask::Get(irc::bitfield &pos)
00613 {
00614         /* Range check the value */
00615         if (pos.first < bits_size)
00616                 return (bits[pos.first] & pos.second);
00617         else
00618                 /* We can't return false, otherwise we can't
00619                  * distinguish between failure and a cleared bit!
00620                  * Our only sensible choice is to throw (ew).
00621                  */
00622                 throw ModuleException("irc::dynamicbitmask::Get(): Invalid bitfield, out of range");
00623 }
00624 
00625 unsigned char irc::dynamicbitmask::GetSize()
00626 {
00627         return bits_size;
00628 }
00629