You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

FilterChain.cpp 72KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320
  1. // FilterChain.cpp
  2. //
  3. // (C) 2002-2009 MicroNeil Research Corporation
  4. //
  5. // Main code file for module FilterChain.
  6. // 20041116 _M Added UrlDecode module. The module will repeat a decoded version of
  7. // any anchor tag that it sees which contains decodable %xx bytes. Other anchor
  8. // tags are not repeated.
  9. // 20041116 _M Upgrades to the Defunker module. The module now decodes any HTML
  10. // encoded bytes that could have been normal ascii.
  11. // 20041114 _M Completed basic defunker engine which strips out all HTML and some
  12. // basic   encoding.
  13. // 20041113 _M Began heavy upgrades to this module to improve performance and
  14. // provide additional obfuscation removal. This modification will include a move
  15. // from the use of switch(State) mechanisms to the use of function pointers. This
  16. // should save a few cycles on every byte processed.
  17. #include "SNFMulti/FilterChain.hpp"
  18. using namespace std;
  19. namespace SNFMulti {
  20. // FilterChainBase64 Methods.
  21. // GetByte()
  22. // Returns the next byte from this filter module.
  23. unsigned char FilterChainBase64::GetByte() {
  24. switch(State) { // What are we doing?
  25. case SCANNING:{ // We're scanning to turn on...
  26. // In this mode we are hunting for a reason to turn
  27. // ourselves on. If we find our startup sequence then
  28. // we will go into decoding mode. Until then, we try
  29. // to match each incoming character with our startup
  30. // sequence.
  31. while(true) { // Search for our startup string or get out.
  32. try { // Try this...
  33. ValidBuffer = false; // No valid buffer yet.
  34. x=FilterChain::GetByte(); // Get the next byte from source.
  35. } // If we get the empty signal
  36. // here, we've failed to match.
  37. catch(const Empty&) { // If so - and we haven't
  38. if(0==ScanIx) throw Empty("FilterChainBase64: No more data"); // started then just throw Empty.
  39. State=DEQUEING;DequeIx=0; // If we have then we'll dequeue
  40. return GetByte(); // it and throw when that's done
  41. } // because Buffer is not valid.
  42. // It's important that no empty's get beyond this point unless
  43. // we've got a match started. Otherwise we'll return corruption.
  44. if(x!=Base64Start[ScanIx]){ // If the byte doesnt match,
  45. // and we've started matching
  46. if(0!=ScanIx) { // the sequence then save the
  47. Buffer=x; ValidBuffer=true; // byte for later, change to
  48. State=DEQUEING;DequeIx=0; // DEQUING mode, and return
  49. return GetByte(); // the first Dequeued byte.
  50. }
  51. // If there's no match
  52. else return x; // started then shortcut that:
  53. } // just send back the byte.
  54. // We've handled non matches, now time for the good stuff...
  55. else { // This byte matches :-)
  56. ScanIx++; // Move forward!
  57. if(ScanIx>=sizeof(Base64Start)-1){ // If we've matched it all
  58. // then prep for decoding.
  59. // At this point we've got our trigger - but we need to
  60. // eat up any extra junk before we start decoding. What
  61. // we're looking for is a blank line (CRLFCRLF) within
  62. // the next couple of lines. While we're at this if we
  63. // get an exception we'll just pass it through.
  64. ScanIx=DequeIx=0; // Let's reset our indexes.
  65. // We're SCANNING now - so if we fail to get to good base64
  66. // stuff then we'll be starting from scratch - and that's ok.
  67. // Here we will allow some number of additional header lines
  68. // to occur before we give up on this being a base64 segment.
  69. // If we give up then we go back to scanning agian.
  70. // 20030114 _M Increased limit to 150 - lots of X- headers cause
  71. // the engine to stop decoding base64!! 30 was too small.
  72. const int LineLimit = 150; // We'll allow this many.
  73. for(int LineCount=0; LineCount<LineLimit; LineCount++) {
  74. do{ // Eat up characters through
  75. x=FilterChain::GetByte(); // the end of the line.
  76. } while(x!='\n');
  77. x=FilterChain::GetByte(); // Get the next byte.
  78. if(x=='\n'){ // Next line is blank?
  79. State=DECODING; // Then get ready to DECODE!
  80. break; // NO MORE LOOPING!
  81. }
  82. // If the line is not blank then we'll go around again up
  83. // to the number of lines we allow. Then we're done trying
  84. // and we will fall through.
  85. }
  86. // At this point we are either ready to decode base64 data
  87. // or we're still in SCANNING mode because of too much junk.
  88. if(DECODING==State) { // If we're ready to decode
  89. Workspace = 0x0000000a; // then set up a pair of
  90. DequeIx=3; // <LF> lines so they will
  91. ScanIx=2; // be the first bytes decoded.
  92. } // Here we pump <LF> into the
  93. // workspace. Then we return one <LF>
  94. return x; // (usually).
  95. // The deal is, if we're decoding then we will pump in LF and
  96. // return what must be the last LF. If we're not decoding then we
  97. // end up returning the last byte we read before giving up which should
  98. // be the first byte of the next line.
  99. }
  100. }
  101. }
  102. // The above will be tried repeatedly in the first call to
  103. // this object's GetByte() until we either return a byte or
  104. // throw an exception. The result is that once we start to match
  105. // our startup sequence we will either match all of it or we will
  106. // grab as much of it as we can until we don't match - then we'll
  107. // fail and move into DEQUEING.
  108. // You may be asking yourself, why go through all that complex
  109. // Turing engine stuff when a simple line load and string comparison
  110. // would do nicely. The answer is SPEED. Without getting too deep,
  111. // the above code will identify the startup string in roughly 2
  112. // comparisons per byte. If I were to load the entire line first
  113. // then that alone would be 2 comparisons before I got started. This
  114. // way I cut the number of comparisons down by at least 50%.
  115. break;
  116. }
  117. case DEQUEING:{ // We're recovering from a false start...
  118. // When we get here, ScanIx will be one greater than the last
  119. // matching byte. The last byte read will be stored in our buffer
  120. // so that it can be returned here as the last step. The calling
  121. // program will request each byte one at a time... starting with
  122. // the first byte coming out of this code. For all positions in our
  123. // startup string less than ScanIx, we know we had a matching input.
  124. // We start our output at the first byte. The Scanning engine should
  125. // have set our DequeIx to 0 before we got here - so that part should
  126. // be automatic. Here goes...
  127. if(DequeIx < ScanIx) { // If we're still returning a
  128. unsigned char x = // partial match, grab the next byte
  129. Base64Start[DequeIx]; // from the startup string, Increment
  130. DequeIx++; // our Deque index for next time, and
  131. return x; // return the byte that's needed.
  132. } else { // When we're done with that part,
  133. State=SCANNING; // we set our mode back to scanning,
  134. ScanIx=DequeIx=0; // reset our indexes to start again,
  135. // Here we either have a buffered byte to dequeue, or we ran out
  136. // of data while attempting to match our startup sequence. If we
  137. // have a vaild byte we return it. If not, we throw No More Data!
  138. if(ValidBuffer) return Buffer;
  139. else throw Empty("FilterChainBase64: No more data");
  140. }
  141. break;
  142. }
  143. case DECODING:{ // We're decoding data...
  144. // DequeIx will be used here to indicate how many decoded
  145. // bytes are ready to be delivered. This is compatible with
  146. // the normal startup for other modes.
  147. // ScanIx will be used here to indicate which byte position
  148. // we should be reading from. This combination helps to handle
  149. // pads and simplifies processing. For example, if we've got two
  150. // pads then we'll have a single byte to read starting at index
  151. // zero.
  152. // If we get an exception from up the chain while we're decoding
  153. // then we'll just pass it along.
  154. if(0==DequeIx) { // If there are no bytes ready then get some!
  155. // First Byte:
  156. // Eat anything up to the first byte that doesn't look like
  157. // a base64 digit. If we hit a '\n-' then we'll assume we've got
  158. // a segment boundary and we'll quit. Everything else will be
  159. // ignored to get us to the next line.
  160. do{ // Empty out any in-between bytes.
  161. y=x;x=FilterChain::GetByte(); // Read one byte at a time.
  162. if('-'==x && '\n'==y) { // If we get to a segment separator
  163. ScanIx=DequeIx=0; // then reset our indexes, set our
  164. State=SCANNING; // state to SCANNING...
  165. do { // Eat up the rest of this line
  166. x=FilterChain::GetByte(); // one byte at a time including
  167. } while('\n'!=x); // the <LF> at the end, then
  168. return '\n'; // return the that <LF> byte.
  169. // On the next incoming call, the scanner section "should"
  170. // return the following <LF> byte to complete the end of line.
  171. // This ensures that we put a new line at the end of our
  172. // decoded segment. Four message scanning purposes this is
  173. // desireable. If we wanted a clean segment then we'd probably
  174. // eat through the new line rather than the carriage return.
  175. }
  176. } while(XX64==Base64Table[x]); // Eat all invalid bytes.
  177. // At this point x should have the first valid byte for us :-)
  178. if('='==x) { // First byte can't be a pad.
  179. ScanIx=DequeIx=0; // If it is then we reset ourself,
  180. do{ // eat the rest of this line,
  181. y=x;x=FilterChain::GetByte(); // and then go on with scanning.
  182. }while('\n'!=x);
  183. return x;
  184. }
  185. // At this point we have a clean byte, presumably at the start
  186. // of a base64 block which we can decode.
  187. x = Base64Table[x]; // Convert the byte.
  188. // This first one we assign to clear out the register. The rest
  189. // get added to keep things in place.
  190. Workspace = // Add it to the workspace in the
  191. x << base64_seg0_shift; // correct position.
  192. // Byte number 2 of the block...
  193. x=FilterChain::GetByte(); // Grab the byte...
  194. if('='==x) { // This byte can't be a pad.
  195. ScanIx=DequeIx=0; // If it is then we reset ourself,
  196. do{ // eat the rest of this line,
  197. y=x;x=FilterChain::GetByte(); // and then go on with scanning.
  198. }while('\n'!=x);
  199. return x;
  200. }
  201. x=Base64Table[x]; // Convert the byte.
  202. if(XX64==x) { // The byte can't be invalid...
  203. ScanIx=DequeIx=0; // If it is then we reset ourself,
  204. do{ // eat the rest of this line,
  205. y=x;x=FilterChain::GetByte(); // and then go on with scanning.
  206. }while('\n'!=x);
  207. return x;
  208. }
  209. // At this point we have a clean byte...
  210. Workspace += // Add it to the workspace in the
  211. x << base64_seg1_shift; // correct position.
  212. // Byte number 3 of the block...
  213. x=FilterChain::GetByte(); // Grab the byte...
  214. // This one and the next one can be pads. Here's where we start
  215. // deciding how many bytes we have. If we have a pad in this spot
  216. // then our output bytes will only be 1.
  217. if('='==x) DequeIx = 1; // If we've got a pad here we'll only
  218. else DequeIx = 3; // have one valid output byte. Otherwise
  219. // we could have 3.
  220. x=Base64Table[x]; // Convert the byte.
  221. if(XX64==x) { // The byte can't be invalid...
  222. ScanIx=DequeIx=0; // If it is then we reset ourself,
  223. do{ // eat the rest of this line,
  224. y=x;x=FilterChain::GetByte(); // and then go on with scanning.
  225. }while('\n'!=x);
  226. return x;
  227. }
  228. // At this point we have a clean byte...
  229. Workspace += // Add it to the workspace in the
  230. x << base64_seg2_shift; // correct position.
  231. // Byte number 4 of the block...
  232. x=FilterChain::GetByte(); // Grab the byte...
  233. if('='==x && DequeIx > 2) // If we've got a pad here the most
  234. DequeIx=2; // we can have are 2 valid outputs.
  235. x=Base64Table[x]; // Convert the byte.
  236. if(XX64==x) { // The byte can't be invalid...
  237. ScanIx=DequeIx=0; // If it is then we reset ourself,
  238. do{ // eat the rest of this line,
  239. y=x;x=FilterChain::GetByte(); // and then go on with scanning.
  240. }while('\n'!=x);
  241. return x;
  242. }
  243. // At this point we have a clean byte...
  244. Workspace += // Add it to the workspace in the
  245. x << base64_seg3_shift; // correct position.
  246. // At this point we are ready to begin outputting our bytes.
  247. ScanIx=2; // Output always starts byte three.
  248. return GetByte(); // Return our first decoded byte.
  249. } else { // If there are bytes ready then spit them out.
  250. x=(Workspace >> (ScanIx * 8)) & 0xFF; // Grab the byte we want.
  251. ScanIx--; // Decrement our output index.
  252. DequeIx--; // Decrement our output count.
  253. return x; // Send back our byte.
  254. }
  255. break;
  256. }
  257. }
  258. // We should never get to this point.
  259. return 0; // Dummy to make the compiler happy.
  260. }
  261. // FilterChainQuotedPrintable Methods.
  262. // isHexDigit()
  263. // Returns true if i is a valid hex digit.
  264. bool FilterChainQuotedPrintable::isHexDigit(unsigned char i) {
  265. if(
  266. (i >= '0' && i <= '9') || // Hex digits must be 0-9 or
  267. (i >= 'A' && i <= 'F') || // A-F or
  268. (i >= 'a' && i <= 'f') // a-f if somebody used lower case.
  269. ) {
  270. return true; // If i is one of these we are true
  271. } else {
  272. return false; // IF i is not then we are false
  273. }
  274. }
  275. // convertHexDigit()
  276. // Returns an integer value for the hex digit i
  277. int FilterChainQuotedPrintable::convertHexDigit(unsigned char i) {
  278. if(i >= '0' && i <= '9') { // Digit chars convert directly.
  279. return i - '0';
  280. } else if (i >= 'A' && i <= 'F') { // Cap A-F convert to 10 - 15
  281. return i - 'A' + 10;
  282. } else if (i >= 'a' && i <= 'f') { // Small A-F convert to 10 - 15
  283. return i - 'a' + 10;
  284. }
  285. return -1; // Return -1 if i was not a hex digit!
  286. }
  287. // GetByte()
  288. // Returns the next byte from this filter module.
  289. unsigned char FilterChainQuotedPrintable::GetByte() {
  290. switch(State) { // What are we doing?
  291. case SCANNING: // We're scanning to turn on...
  292. Buffer[0]=FilterChain::GetByte();
  293. if('='== Buffer[0]) { // If we've found an = then we're on.
  294. Buffer[1]=FilterChain::GetByte(); // Fill up the decoding buffer with
  295. Buffer[2]=FilterChain::GetByte(); // the next two bytes,
  296. BufferIndex = 0; // Setup the buffer index.
  297. BufferLength = 3; // Setup the buffer length.
  298. State = DECODING; // Set our mode and get the result
  299. return GetByte(); // by calling ourselves!
  300. } else
  301. return Buffer[0]; // Otherwise just pass through.
  302. break;
  303. case DEQUEING: // We're recovering from a false start...
  304. if(BufferIndex < BufferLength) { // If we've got buffered stuff then
  305. return Buffer[BufferIndex++]; // return it and move the pointer.
  306. } else { // If we've run out of stuff then
  307. BufferIndex = 0; // Reset our index and our
  308. BufferLength = 0; // buffer length, then set our
  309. State = SCANNING; // mode to SCANNING and return
  310. return GetByte(); // the next byte from there.
  311. }
  312. break;
  313. case DECODING: // We're decoding data...
  314. // Now we are decoding quoted printable data. First we will handle the case
  315. // where this is a soft line break. In that case we simply eat the encoded bytes
  316. // and set up to dequeue the last byte.
  317. if(Buffer[1] == '\n') { // If this is a soft break the
  318. BufferIndex = 2; // point our dequeue index at the last byte
  319. State = DEQUEING; // establish our DEQUEING state and
  320. return GetByte(); // return by letteing DEQUEING do it!
  321. }
  322. // If it wasn't a soft break then we _may_ need to decode it. We will find
  323. // out by looking for hex digits in the next two locations. If they are there
  324. // we are decoding. If not then we will simply dequeue the entire buffer.
  325. if(
  326. isHexDigit(Buffer[1]) && // If the next two bytes are hex
  327. isHexDigit(Buffer[2]) // digits then we can convert them.
  328. ) {
  329. Workspace= // Set our workspace to convert the
  330. (convertHexDigit(Buffer[1]) << 4) | // two hex digits into a single
  331. (convertHexDigit(Buffer[2])); // byte.
  332. Buffer[2] = Workspace & 0xFF; // Store that byte in our buffer.
  333. BufferIndex = 2; // Set the index and change our
  334. State = DEQUEING; // state to DEQUEING then let that
  335. return GetByte(); // code spit it out!
  336. } else { // If either byte was not a valid
  337. State = DEQUEING; // hex digit DEQUEUE the entire
  338. return GetByte(); // buffer.
  339. }
  340. break;
  341. };
  342. return FilterChain::GetByte(); // Dummy
  343. }
  344. /////////////////////////////////////////////////////////////////////////////////////////
  345. // FilterChainDefunker
  346. /////////////////////////////////////////////////////////////////////////////////////////
  347. const char* DefunkerPreamble = " ----[DEFUNKER]---- ";
  348. // Patterns to match
  349. const char* patMatchBR = "<br>";
  350. const char* patMatchP = "<p>";
  351. const char* patNBSP = "&nbsp;";
  352. const char* patAMP = "&amp;";
  353. const char* patAPOS = "&apos;";
  354. const char* patLT = "&lt;";
  355. const char* patGT = "&gt;";
  356. const char* patQUOT = "&quot;";
  357. // SkipHeaders() waits for the headers to go by before launching Store().
  358. unsigned char FilterChainDefunker::SkipHeaders() { // While waiting EOH...
  359. unsigned char x = FilterChain::GetByte(); // Get a byte.
  360. if(LastRawByte == '\n' && x == '\n') { // If we're at EOH
  361. Master = &FilterChainDefunker::Store; // Go to store mode.
  362. return x; // and return the byte.
  363. } // If we're not at EOH
  364. LastRawByte = x; // then remember this byte
  365. return x; // and return it.
  366. }
  367. // Store() puts the original data into the buffer for later.
  368. unsigned char FilterChainDefunker::Store() { // While in Store mode,
  369. unsigned char x; // we need a byte.
  370. try {
  371. if(DefunkerSize <= InputPosition)
  372. throw Empty("FilterChainDefunker: No more data"); // Careful about the buffer.
  373. x = FilterChain::GetByte(); // Try getting the next byte
  374. StoreBuffer[InputPosition++] = x; // and storing it.
  375. }
  376. catch(const Empty&) { // When we get the Empty
  377. Master = &FilterChainDefunker::ReadOut; // signal it is time for us
  378. return GetByte(); // to read out our data.
  379. }
  380. return x; // Otherwis pass on the byte.
  381. }
  382. // ReadOut() retrieves the stored data through the state engine.
  383. unsigned char FilterChainDefunker::ReadOut() { // Read out and dedup spaces.
  384. if(LastReadOut == ' ') { // If the last byte was a space
  385. while(LastReadOut == ' ') { // then eat all of the spaces
  386. LastReadOut = SpaceConvChart[GetInternal()]; // that come next with spaces
  387. } // converted.
  388. } else { // If it was not a space then
  389. LastReadOut = SpaceConvChart[GetInternal()]; // simply read the next byte
  390. } // with spaces converted.
  391. return LastReadOut; // Output the byte we found.
  392. }
  393. // GetStore() retrieves the raw store for the state engine.
  394. unsigned char FilterChainDefunker::GetStore() { // Read from the Store.
  395. if(OutputPosition >= InputPosition) {
  396. throw Empty("FilterChainDefunker: No more data"); // If we're out of bytes throw Empty.
  397. }
  398. return LastGetStore = StoreBuffer[OutputPosition++]; // If we have more, trap and send it.
  399. }
  400. //// The following functions make up the state engine with the state maintained
  401. //// as a function pointer in the (*Internal)() handle.
  402. unsigned char FilterChainDefunker::Preamble() { // Emit the preamble.
  403. for(
  404. int p=0; // Load the preamble into
  405. DefunkerPreamble[p]; // the queue.
  406. p++) EnQueue(DefunkerPreamble[p]);
  407. Internal = &FilterChainDefunker::DeQueue; // Set up the DeQueue mode
  408. return GetInternal(); // and return the next byte.
  409. }
  410. unsigned char FilterChainDefunker::DefunkRoot() { // While in DefunkRoot state...
  411. unsigned char x = 0; // One byte at a time via x.
  412. do { // Loop through any emptiness.
  413. ReturnNothing = false; // Be ready to return a byte.
  414. x = GetStore(); // Grab the next byte to process.
  415. if(x == '<') { // If it matches < then
  416. Internal = &FilterChainDefunker::OpenTag; // go to OpenTag state and
  417. x = GetInternal(); // return the converted byte.
  418. } else
  419. if(x == '&') { // If it matches & then
  420. Internal = &FilterChainDefunker::OpenAmp; // go to OpenAnd state and
  421. EnQueue(x); // push in the amphersand.
  422. x = GetInternal(); // return the converted byte.
  423. }
  424. // If x is none of the above then x is just x.
  425. } while (true == ReturnNothing); // Returning nothing? Go again!
  426. return x; // otherwise return a funkless x.
  427. }
  428. unsigned char FilterChainDefunker::OpenTag() { // While in OpenTag state
  429. unsigned char x = GetStore(); // grab the next byte.
  430. switch(tolower(x)) { // Check the lower case of x.
  431. case 'b': // If we have a 'b' then
  432. Internal = &FilterChainDefunker::MatchBR; // our mode is MatchBR.
  433. break;
  434. case 'p': // If we have a 'p' then
  435. Internal = &FilterChainDefunker::MatchP; // our mode is MatchP.
  436. break;
  437. default: // If we did not match then
  438. Internal = &FilterChainDefunker::EatTag; // our mode is EatTag.
  439. break;
  440. }
  441. return GetInternal(); // Return the next byte.
  442. }
  443. unsigned char FilterChainDefunker::OpenAmp() { // While in OpenAmp state
  444. unsigned char x = GetStore(); // grab the next byte.
  445. if(tolower(x) == 'n') { // If it matched n then
  446. EnQueue(x); // push in the n -
  447. Internal = &FilterChainDefunker::MatchNBSP; // we are working on &nbsp;
  448. return GetInternal(); // return the next byte.
  449. } else
  450. if(tolower(x) == 'a') { // If it matched a then
  451. EnQueue(x); // push in the a -
  452. Internal = &FilterChainDefunker::SwitchAMPAPOS; // is it AMP or APOS?
  453. return GetInternal(); // return the next byte.
  454. } else
  455. if(tolower(x) == 'l') { // If it matched l then
  456. EnQueue(x); // push in the l -
  457. Internal = &FilterChainDefunker::MatchLT; // we are working on &lt;
  458. return GetInternal(); // return the next byte.
  459. } else
  460. if(tolower(x) == 'g') { // If it matched g then
  461. EnQueue(x); // push in the g -
  462. Internal = &FilterChainDefunker::MatchGT; // we are working on &gt;
  463. return GetInternal(); // return the next byte.
  464. } else
  465. if(tolower(x) == 'q') { // If it matched q then
  466. EnQueue(x); // push in the q -
  467. Internal = &FilterChainDefunker::MatchQUOT; // we are working on &quot;
  468. return GetInternal(); // return the next byte.
  469. } else
  470. if(x == '#') { // If it matched # then
  471. EnQueue(x); // push in the # -
  472. Internal = &FilterChainDefunker::DecodeNum; // we are working on &#...;
  473. return GetInternal(); // return the next byte.
  474. }
  475. Internal = &FilterChainDefunker::DeQueue; // If nothing matched then
  476. return GetInternal(); // punt and dequeue.
  477. }
  478. unsigned char FilterChainDefunker::MatchBR() { // If our mode is MatchBR
  479. if(MatchTagPattern(patMatchBR)) { // If we matched our pattern
  480. Internal = &FilterChainDefunker::DefunkRoot; // go to DefunkRoot state
  481. return ' '; // and return a space.
  482. } // If we did not match then
  483. Internal = &FilterChainDefunker::EatTag; // go to EatTag state and
  484. return GetInternal(); // return the next byte.
  485. }
  486. unsigned char FilterChainDefunker::MatchP() { // If our mode is MatchP
  487. if(MatchTagPattern(patMatchP)) { // if we matched our pattern
  488. Internal = &FilterChainDefunker::DefunkRoot; // go to DefunkRoot state
  489. return ' '; // and return a space.
  490. } // If we did not match then
  491. Internal = &FilterChainDefunker::EatTag; // go to EatTag state and
  492. return GetInternal(); // return the next byte.
  493. }
  494. unsigned char FilterChainDefunker::MatchNBSP() { // If our mode is MatchNBSP
  495. int pos = 2; // We've seen &n so far.
  496. while(patNBSP[pos]){ // Look through the pattern
  497. unsigned char x = GetStore(); // getting one byte at a time.
  498. EnQueue(x); // Push each into the queue.
  499. if(tolower(x)!=patNBSP[pos]) break; // If we fall off, get out.
  500. pos++; // otherwise keep going.
  501. }
  502. // At this point our pattern[pos] is either 0 (a match) or not.
  503. if(patNBSP[pos]) { // If we did not match then
  504. Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
  505. return GetInternal(); // and return the next byte.
  506. }
  507. // If we did match the pattern
  508. ClearQueue(); // then clear the queue and
  509. Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
  510. return ' '; // return a space.
  511. }
  512. unsigned char FilterChainDefunker::MatchLT() { // If our mode is MatchLT
  513. int pos = 2; // We've seen &l so far.
  514. while(patLT[pos]){ // Look through the pattern
  515. unsigned char x = GetStore(); // getting one byte at a time.
  516. EnQueue(x); // Push each into the queue.
  517. if(tolower(x)!=patLT[pos]) break; // If we fall off, get out.
  518. pos++; // otherwise keep going.
  519. }
  520. // At this point our pattern[pos] is either 0 (a match) or not.
  521. if(patLT[pos]) { // If we did not match then
  522. Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
  523. return GetInternal(); // and return the next byte.
  524. }
  525. // If we did match the pattern
  526. ClearQueue(); // then clear the queue and
  527. Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
  528. return '<'; // return a <.
  529. }
  530. unsigned char FilterChainDefunker::MatchGT() { // If our mode is MatchGT
  531. int pos = 2; // We've seen &g so far.
  532. while(patGT[pos]){ // Look through the pattern
  533. unsigned char x = GetStore(); // getting one byte at a time.
  534. EnQueue(x); // Push each into the queue.
  535. if(tolower(x)!=patGT[pos]) break; // If we fall off, get out.
  536. pos++; // otherwise keep going.
  537. }
  538. // At this point our pattern[pos] is either 0 (a match) or not.
  539. if(patGT[pos]) { // If we did not match then
  540. Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
  541. return GetInternal(); // and return the next byte.
  542. }
  543. // If we did match the pattern
  544. ClearQueue(); // then clear the queue and
  545. Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
  546. return '>'; // return a >.
  547. }
  548. unsigned char FilterChainDefunker::MatchQUOT() { // If our mode is MatchQUOT
  549. int pos = 2; // We've seen &q so far.
  550. while(patQUOT[pos]){ // Look through the pattern
  551. unsigned char x = GetStore(); // getting one byte at a time.
  552. EnQueue(x); // Push each into the queue.
  553. if(tolower(x)!=patQUOT[pos]) break; // If we fall off, get out.
  554. pos++; // otherwise keep going.
  555. }
  556. // At this point our pattern[pos] is either 0 (a match) or not.
  557. if(patQUOT[pos]) { // If we did not match then
  558. Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
  559. return GetInternal(); // and return the next byte.
  560. }
  561. // If we did match the pattern
  562. ClearQueue(); // then clear the queue and
  563. Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
  564. return '\"'; // return a quote.
  565. }
  566. unsigned char FilterChainDefunker::SwitchAMPAPOS() { // We are chosing AMP or APOS.
  567. unsigned char x = GetStore(); // Get the next byte.
  568. EnQueue(x); // Put it into the queue.
  569. if(tolower(x)=='m') { // If we matched m then we
  570. Internal = &FilterChainDefunker::MatchAMP; // are working on MatchAMP.
  571. return GetInternal(); // Go get it.
  572. } else
  573. if(tolower(x)=='p') { // If we matched p then we
  574. Internal = &FilterChainDefunker::MatchAPOS; // are working on MatchAPOS.
  575. return GetInternal(); // Go get it.
  576. }
  577. Internal = &FilterChainDefunker::DeQueue; // If we didn't match either
  578. return GetInternal(); // we punt and DeQueue.
  579. }
  580. unsigned char FilterChainDefunker::MatchAPOS() { // If our mode is MatchAPOS
  581. int pos = 3; // We've seen &ap so far.
  582. while(patAPOS[pos]){ // Look through the pattern
  583. unsigned char x = GetStore(); // getting one byte at a time.
  584. EnQueue(x); // Push each into the queue.
  585. if(tolower(x)!=patAPOS[pos]) break; // If we fall off, get out.
  586. pos++; // otherwise keep going.
  587. }
  588. // At this point our pattern[pos] is either 0 (a match) or not.
  589. if(patAMP[pos]) { // If we did not match then
  590. Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
  591. return GetInternal(); // and return the next byte.
  592. }
  593. // If we did match the pattern
  594. ClearQueue(); // then clear the queue and
  595. Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
  596. return '\''; // return an apostrophie.
  597. }
  598. unsigned char FilterChainDefunker::MatchAMP() { // If our mode is MatchAMP
  599. int pos = 3; // We've seen &am so far.
  600. while(patAMP[pos]){ // Look through the pattern
  601. unsigned char x = GetStore(); // getting one byte at a time.
  602. EnQueue(x); // Push each into the queue.
  603. if(tolower(x)!=patAMP[pos]) break; // If we fall off, get out.
  604. pos++; // otherwise keep going.
  605. }
  606. // At this point our pattern[pos] is either 0 (a match) or not.
  607. if(patAMP[pos]) { // If we did not match then
  608. Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
  609. return GetInternal(); // and return the next byte.
  610. }
  611. // If we did match the pattern
  612. ClearQueue(); // then clear the queue and
  613. Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
  614. return '&'; // return an amphersand.
  615. }
  616. unsigned char FilterChainDefunker::EatTag() { // If our mode is EatTag
  617. if(LastGetStore != '>') { // and our last byte was not
  618. while(GetStore()!='>')continue; // endtag then eat through
  619. } // the end tag. Then set our
  620. ReturnNothing = true; // ReturnNothing flag, set our
  621. Internal = &FilterChainDefunker::DefunkRoot; // mode to DefunkRoot and
  622. return 0; // return 0 (nothing, really).
  623. }
  624. unsigned char FilterChainDefunker::DecodeNum() { // If our mode is DecodeNum
  625. unsigned char NumBfr[5]; // A buffer for digits.
  626. memset(NumBfr,0,sizeof(NumBfr)); // Clear the buffer.
  627. for( // Let's read the number...
  628. unsigned int i=0; // NumBfr position = 0;
  629. i<(sizeof(NumBfr)-1) && // Stay well within the NunBfr.
  630. (EnQueue(NumBfr[i]=GetStore()), // Read and EnQueue each byte.
  631. isdigit(NumBfr[i])); // Keep going if it's a digit.
  632. i++)continue; // Move the buffer pointer.
  633. // Check for a proper finish...
  634. if(LastGetStore != ';') { // If we didn't end properly
  635. Internal = &FilterChainDefunker::DeQueue; // then we will punt and
  636. return GetInternal(); // DeQueue.
  637. }
  638. // At this point, NumBfr contains a c_str of the number to be decoded.
  639. // Also, the Qbfr has each byte we read in case we want to punt.
  640. int Decoded = atol((const char*)NumBfr); // Read the number.
  641. if(Decoded < 32 || Decoded > 255) { // If the number we read is
  642. Internal = &FilterChainDefunker::DeQueue; // out of range then we
  643. return GetInternal(); // punt and DeQueue.
  644. }
  645. // If we decoded a character
  646. ClearQueue(); // that is in range of normal
  647. Internal = &FilterChainDefunker::DefunkRoot; // ascii then clear the queue,
  648. return (unsigned char) Decoded; // go back to DefunkRoot, and
  649. } // return the decoded byte.
  650. /////////////////////////////////////////////////////////////////////////////////////////
  651. // FilterChainUrlDecode
  652. /////////////////////////////////////////////////////////////////////////////////////////
  653. unsigned char FilterChainUrlDecode::Bypass() { // In Bypass mode...
  654. unsigned char c = FilterChain::GetByte(); // Get the raw byte.
  655. if(c == '<') { // If it was '<' we begin.
  656. Internal = &FilterChainUrlDecode::Tag; // Go to Tag mode.
  657. AddToBfr(c); // Write the byte to our buffer.
  658. }
  659. return c; // Always return the byte.
  660. }
  661. unsigned char FilterChainUrlDecode::Tag() { // In Tag mode...
  662. unsigned char c = FilterChain::GetByte(); // Get the raw byte.
  663. if(tolower(c) == 'a') { // If we're in an anchor tag
  664. Internal = &FilterChainUrlDecode::Root; // Go to Decode Root mode.
  665. AddToBfr(c); // Write the byte to our buffer.
  666. } else
  667. if(tolower(c) == 'i') { // If we might be in an img tag
  668. Internal = &FilterChainUrlDecode::Img1; // Go to Img1 mode.
  669. AddToBfr(c); // Write the byte to our buffer.
  670. } else { // If we didn't match
  671. DecodeBfr[0] = 0; // we clear out the Decode
  672. DecodeBfr[1] = 0; // buffer. (Save some bytes by
  673. DecodeLength = 0; // doing it manually) Then we
  674. Internal = &FilterChainUrlDecode::Bypass; // Go to Bypass mode again.
  675. }
  676. return c; // Always return the byte.
  677. }
  678. unsigned char FilterChainUrlDecode::Img1() { // In Img1 mode...
  679. unsigned char c = FilterChain::GetByte(); // Get the raw byte.
  680. if(tolower(c)=='m') { // If we're still in an img tag
  681. Internal = &FilterChainUrlDecode::Img2; // Go to Img2 mode.
  682. AddToBfr(c); // Write the byte to our buffer.
  683. } else { // If we didn't match
  684. DecodeBfr[0] = 0; // we clear out the Decode
  685. DecodeBfr[1] = 0; // buffer and go back to
  686. DecodeBfr[2] = 0; // Bypass mode again.
  687. DecodeLength = 0;
  688. Internal = &FilterChainUrlDecode::Bypass;
  689. }
  690. return c; // Always return the byte.
  691. }
  692. unsigned char FilterChainUrlDecode::Img2() { // In Img2 mode...
  693. unsigned char c = FilterChain::GetByte(); // Get the raw byte.
  694. if(tolower(c)=='g') { // If we're still in an img tag
  695. Internal = &FilterChainUrlDecode::Root; // Go to Decode Root mode.
  696. AddToBfr(c); // Write the byte to our buffer.
  697. } else { // If we didn't match
  698. DecodeBfr[0] = 0; // we clear out the Decode
  699. DecodeBfr[1] = 0; // buffer and go back to
  700. DecodeBfr[2] = 0; // Bypass mode again.
  701. DecodeBfr[3] = 0;
  702. DecodeLength = 0;
  703. Internal = &FilterChainUrlDecode::Bypass;
  704. }
  705. return c; // Always return the byte.
  706. }
  707. unsigned char FilterChainUrlDecode::Root() { // While in Decode Root mode...
  708. unsigned char c = FilterChain::GetByte(); // Get the raw byte.
  709. AddToBfr(c); // Push it into the buffer.
  710. // Now we will switch modes based on the byte we get.
  711. if(c == '%') { // If we have '%' then it is
  712. Internal = &FilterChainUrlDecode::GetD1; // time to start decoding.
  713. } else
  714. if(c == '>') { // If we have '>' and
  715. if(DecodeFlag) { // we did some decoding then
  716. Internal = &FilterChainUrlDecode::Inject; // it is time to inject the result.
  717. } else { // If there was no decoding then
  718. Clear(); // we clear out our buffer and
  719. Internal = &FilterChainUrlDecode::Bypass; // it is time to go to sleep.
  720. }
  721. }
  722. // This next bit protects against malformed HTML by watching for any new tag
  723. // start. If one occurs, then we throw away our current decoding and assume a state
  724. // that starts with the new open "<".
  725. if(c == '<') { // If found a new < then we
  726. Clear(); // clear the buffer,
  727. AddToBfr(c); // Add the '<' back in, and
  728. Internal = &FilterChainUrlDecode::Tag; // go back to Tag mode.
  729. }
  730. return c; // Always return the byte.
  731. }
  732. unsigned char FilterChainUrlDecode::GetD1() { // Get the first digit.
  733. unsigned char c = FilterChain::GetByte(); // Read the raw byte.
  734. AddToBfr(c); // Add it to the buffer.
  735. Internal = &FilterChainUrlDecode::GetD2; // Move to GetD2 mode.
  736. return c; // Always return the byte.
  737. }
  738. // isHexDigit()
  739. // Returns true if i is a valid hex digit.
  740. bool FilterChainUrlDecode::isHexDigit(unsigned char i) {
  741. if(
  742. (i >= '0' && i <= '9') || // Hex digits must be 0-9 or
  743. (i >= 'A' && i <= 'F') || // A-F or
  744. (i >= 'a' && i <= 'f') // a-f if somebody used lower case.
  745. ) {
  746. return true; // If i is one of these we are true
  747. } else {
  748. return false; // IF i is not then we are false
  749. }
  750. }
  751. // convertHexDigit()
  752. // Returns an integer value for the hex digit i
  753. int FilterChainUrlDecode::convertHexDigit(unsigned char i) {
  754. if(i >= '0' && i <= '9') { // Digit chars convert directly.
  755. return i - '0';
  756. } else if (i >= 'A' && i <= 'F') { // Cap A-F convert to 10 - 15
  757. return i - 'A' + 10;
  758. } else if (i >= 'a' && i <= 'f') { // Small A-F convert to 10 - 15
  759. return i - 'a' + 10;
  760. }
  761. return -1; // Return -1 if i was not a hex digit!
  762. }
  763. // convertHexByte()
  764. // Returns an integer value for a hex string representing a byte.
  765. unsigned char FilterChainUrlDecode::convertHexByte(unsigned char* x) {
  766. unsigned char working = convertHexDigit(x[1]); // Convert the low order nybl.
  767. working = working + (16 * convertHexDigit(x[0])); // Convert the high order nybl.
  768. return working; // Return the result.
  769. }
  770. unsigned char FilterChainUrlDecode::GetD2() { // Get the second digit.
  771. unsigned char c = FilterChain::GetByte(); // Read the raw byte.
  772. AddToBfr(c); // Add it to the buffer.
  773. // At this point the end of our DecodeBfr has a c_str of a small hex integer (we hope)
  774. // that we can decode. If we successfully decode it then we will replace %xx in our
  775. // DecodeBfr with the character that is represented by that byte.
  776. // Do we really have an encoded byte to decode?
  777. int codepos = DecodeLength-3; // Grab the position of the hex.
  778. if(
  779. DecodeBfr[codepos]=='%' && // If the first char is %
  780. isHexDigit(DecodeBfr[codepos+1]) && // and the second is a hex digit
  781. isHexDigit(DecodeBfr[codepos+2]) // and the third is a hex digit
  782. ){ // then we can decode the string.
  783. unsigned char q = convertHexByte(DecodeBfr+codepos+1); // Decode the byte.
  784. if(q >= 32) { // If the byte is in range then
  785. DecodeBfr[codepos] = q; // Replace the % with the byte
  786. DecodeBfr[--DecodeLength] = 0; // backup over and erase the hex
  787. DecodeBfr[--DecodeLength] = 0; // digits themselves.
  788. DecodeFlag = true; // Set the decode flag.
  789. }
  790. // If we decided the byte was not decodable for some reason then the original data
  791. // remains in the buffer as it was originally read.
  792. }
  793. Internal = &FilterChainUrlDecode::Root; // Get ready to decode more.
  794. return c; // Always return the byte.
  795. }
  796. unsigned char FilterChainUrlDecode::Inject() { // Inject the decoded result.
  797. if(
  798. DecodeBfr[DecodePosition] && // If we've got more bytes
  799. DecodePosition < sizeof(DecodeBfr)) { // and we're safely in our buffer
  800. return DecodeBfr[DecodePosition++]; // then return the byte and move
  801. } // ahead.
  802. // Once the buffer is empty we
  803. Clear(); // clear out the system, and go
  804. Internal = &FilterChainUrlDecode::Bypass; // back to bypass mode. Then
  805. return GetByte(); // return the next bypassed byte.
  806. }
  807. ////////////////////////////////////////////////////////////////////////////////
  808. // FilterChainHeaderAnalysis
  809. ////////////////////////////////////////////////////////////////////////////////
  810. int FilterChainHeaderAnalysis::FollowPattern(char c) { // Follow the pattern.
  811. c = tolower(c); // Convert c to lower case.
  812. if(c != MatchPattern[MatchIndex]) { // If c doesn't match the pattern
  813. return -1; // then return -1 indicating we fell off.
  814. } else { // If it did match the pattern then
  815. MatchIndex++; // move ahead to the next byte and
  816. if(0 == MatchPattern[MatchIndex]) { // take a look. If that's all there was
  817. return 0; // then we've finished :-)
  818. }
  819. } // If we matched and there's more to do
  820. return 1; // then we return 1.
  821. }
  822. unsigned char FilterChainHeaderAnalysis::doSeekNL() { // Looking for a new line.
  823. unsigned char c = GetCheckedByte(); // Get the next byte (and check for high bits)
  824. if('\n' == c) { // If it was a new line then
  825. Mode = &FilterChainHeaderAnalysis::doSeekDispatch; // move on to the next mode
  826. } // for the next byte and
  827. return c; // return the byte we got.
  828. }
  829. unsigned char FilterChainHeaderAnalysis::doSeekDispatch() { // Looking at the first char after NL.
  830. unsigned char c = GetCheckedByte(); // Get the next byte (and check for high bits)
  831. switch(tolower(c)) { // Switch modes based on what this byte is.
  832. case '\n': { // If it is a New Line then the headers are
  833. Mode = &FilterChainHeaderAnalysis::doEndOfHeaders; // finished - so we set up our EndOfHeaders
  834. return GetByte(); // mode and return the next byte from there.
  835. break; // The extra NL will be emitted at the end.
  836. }
  837. case 'r': { // If it is an R as in (R)eceived:
  838. SetFollowPattern("eceived:"); // establish the follow pattern and
  839. Mode = &FilterChainHeaderAnalysis::doReceived; // switch to doReceived mode.
  840. break;
  841. }
  842. case 'f': { // If it is an F as in (F)rom:
  843. SetFollowPattern("rom:"); // establish the follow pattern and
  844. Mode = &FilterChainHeaderAnalysis::doFrom; // switch to doFrom mode.
  845. break;
  846. }
  847. case 't': { // If it is an T as in (T)o:
  848. SetFollowPattern("o:"); // establish the follow pattern and
  849. Mode = &FilterChainHeaderAnalysis::doTo; // switch to doTo mode.
  850. break;
  851. }
  852. case 'c': { // If it is a C as in (C)C:
  853. SetFollowPattern("c:"); // establish the follow pattern and
  854. Mode = &FilterChainHeaderAnalysis::doCC; // switch to doCC mode.
  855. break;
  856. }
  857. case 'm': { // If it is an M as in (M)essage-id:
  858. SetFollowPattern("essage-id:"); // establish the follow pattern and
  859. Mode = &FilterChainHeaderAnalysis::doMessageID; // switch to doMessageID mode.
  860. break;
  861. }
  862. case 'd': { // If it is a D as in (D)ate:
  863. SetFollowPattern("ate:"); // establish the follow pattern and
  864. Mode = &FilterChainHeaderAnalysis::doDate; // switch to doDate mode.
  865. break;
  866. }
  867. case 's': { // If it is an S as in (S)ubject:
  868. SetFollowPattern("ubject:"); // establish the follow pattern and
  869. Mode = &FilterChainHeaderAnalysis::doSubject; // switch to doSubject mode.
  870. break;
  871. }
  872. default: { // If we don't recognize the byte then
  873. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for a new line.
  874. break;
  875. }
  876. } // Once all of our mode switching is handled
  877. return c; // we return the byte we got.
  878. }
  879. unsigned char FilterChainHeaderAnalysis::doReceived() { // Identifying a Received: header.
  880. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
  881. switch(FollowPattern(c)) { // See if we're still on the path.
  882. case -1: { // If we're not on the right tag then
  883. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
  884. break;
  885. }
  886. case 0: { // If we've found the end of our tag (match!)
  887. Mode = &FilterChainHeaderAnalysis::doFindIP; // start looking for the IP.
  888. IPToTest = ""; // Clear the IPToTest buffer.
  889. break;
  890. }
  891. default: { // If we're still following along then
  892. break; // keep on keepin' on.
  893. }
  894. } // Once we know what we're doing we
  895. return c; // return the character we got.
  896. }
  897. unsigned char FilterChainHeaderAnalysis::doFindIP() { // Seeking the [IP] in a Received header.
  898. unsigned char c = GetCheckedByte(); // Get a checked byte.
  899. switch(c) {
  900. case '[': { // If we find the [ then
  901. Mode = &FilterChainHeaderAnalysis::doTestIP; // set up to grab and test the IP.
  902. break;
  903. }
  904. case '\n': { // If we come across a newline then
  905. Mode = &FilterChainHeaderAnalysis::doSeekNL; // we must be lost so go back to basics.
  906. break;
  907. }
  908. default: { // For anything else we keep on going.
  909. break;
  910. }
  911. }
  912. return c; // Return the byte.
  913. }
  914. //// 20070614 _M Improved IP exctaction from received headers so that if the
  915. //// apparent IP contains any unusual bytes (not digits or dots) then the
  916. //// attempt is abandoned.
  917. unsigned char FilterChainHeaderAnalysis::doTestIP() { // Gets and tests the [IP].
  918. unsigned char c = FilterChain::GetByte(); // Get the next byte.
  919. switch(c) {
  920. case ']': { // If we come to ] we've got it!
  921. IPTester.test(IPToTest, IPTestResult); // Do the test with this IP.
  922. if(0 == IPTestResult.length()) { // If the IP test wants us to truncate
  923. throw Empty("FilterChainHeaderAnalysis: Truncate"); // the message then throw Empty!
  924. } // Otherwise, proceed as per normal...
  925. SetOutputBuffer(IPTestResult); // Put the result in the output buffer.
  926. Mode = &FilterChainHeaderAnalysis::doInjectIPTestResult; // Set the mode to inject the result.
  927. break; // That will start on the next byte.
  928. }
  929. case '0': // IPs are made of digits and dots.
  930. case '1':
  931. case '2':
  932. case '3':
  933. case '4':
  934. case '5':
  935. case '6':
  936. case '7':
  937. case '8':
  938. case '9':
  939. case '.': { // Capture the IP between [ and ]
  940. IPToTest += c; // one byte at a time.
  941. break;
  942. }
  943. default: { // If we find anything else we must be
  944. Mode = &FilterChainHeaderAnalysis::doSeekNL; // lost so we go back to the basics.
  945. break;
  946. }
  947. }
  948. return c;
  949. }
  950. unsigned char FilterChainHeaderAnalysis::doFrom() { // Identifying a From: header.
  951. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
  952. switch(FollowPattern(c)) { // See if we're still on the path.
  953. case -1: { // If we're not on the right tag then
  954. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
  955. break;
  956. }
  957. case 0: { // If we've found the end of our tag (match!)
  958. Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
  959. FoundFrom = true; // record that this tag was present.
  960. break;
  961. }
  962. default: { // If we're still following along then
  963. break; // keep on keepin' on.
  964. }
  965. } // Once we know what we're doing we
  966. return c; // return the character we got.
  967. }
  968. unsigned char FilterChainHeaderAnalysis::doTo() { // Identifying a To: header.
  969. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
  970. switch(FollowPattern(c)) { // See if we're still on the path.
  971. case -1: { // If we're not on the right tag then
  972. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
  973. break;
  974. }
  975. case 0: { // If we've found the end of our tag (match!)
  976. Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
  977. FoundTo = true; // record that this tag was present.
  978. break;
  979. }
  980. default: { // If we're still following along then
  981. break; // keep on keepin' on.
  982. }
  983. } // Once we know what we're doing we
  984. return c; // return the character we got.
  985. }
  986. unsigned char FilterChainHeaderAnalysis::doCC() { // Identifying a CC: header.
  987. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
  988. switch(FollowPattern(c)) { // See if we're still on the path.
  989. case -1: { // If we're not on the right tag then
  990. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
  991. break;
  992. }
  993. case 0: { // If we've found the end of our tag (match!)
  994. Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
  995. FoundCC = true; // record that this tag was present.
  996. break;
  997. }
  998. default: { // If we're still following along then
  999. break; // keep on keepin' on.
  1000. }
  1001. } // Once we know what we're doing we
  1002. return c; // return the character we got.
  1003. }
  1004. unsigned char FilterChainHeaderAnalysis::doMessageID() { // Identifying a MessageID header.
  1005. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
  1006. switch(FollowPattern(c)) { // See if we're still on the path.
  1007. case -1: { // If we're not on the right tag then
  1008. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
  1009. break;
  1010. }
  1011. case 0: { // If we've found the end of our tag (match!)
  1012. Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
  1013. FoundMessageID = true; // record that this tag was present.
  1014. break;
  1015. }
  1016. default: { // If we're still following along then
  1017. break; // keep on keepin' on.
  1018. }
  1019. } // Once we know what we're doing we
  1020. return c; // return the character we got.
  1021. }
  1022. unsigned char FilterChainHeaderAnalysis::doDate() { // Identifying a Date: header.
  1023. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
  1024. switch(FollowPattern(c)) { // See if we're still on the path.
  1025. case -1: { // If we're not on the right tag then
  1026. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
  1027. break;
  1028. }
  1029. case 0: { // If we've found the end of our tag (match!)
  1030. Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
  1031. FoundDate = true; // record that this tag was present.
  1032. break;
  1033. }
  1034. default: { // If we're still following along then
  1035. break; // keep on keepin' on.
  1036. }
  1037. } // Once we know what we're doing we
  1038. return c; // return the character we got.
  1039. }
  1040. unsigned char FilterChainHeaderAnalysis::doSubject() { // Identifying a Subject: header.
  1041. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
  1042. switch(FollowPattern(c)) { // See if we're still on the path.
  1043. case -1: { // If we're not on the right tag then
  1044. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
  1045. break;
  1046. }
  1047. case 0: { // If we've found the end of our tag (match!)
  1048. Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
  1049. FoundSubject = true; // record that this tag was present.
  1050. break;
  1051. }
  1052. default: { // If we're still following along then
  1053. break; // keep on keepin' on.
  1054. }
  1055. } // Once we know what we're doing we
  1056. return c; // return the character we got.
  1057. }
  1058. unsigned char FilterChainHeaderAnalysis::doEndOfHeaders() { // IdentifyEndOfHeaders & Emit Results.
  1059. // We know we've reached the end of the headers so now
  1060. // we have to formulate the results we want to inject and
  1061. // er... inject them.
  1062. EndOfHeaderResults = "X-SNFHDR: "; // Emit an X header (internal only)
  1063. if(MissingCC()) { EndOfHeaderResults.append("-CC "); } // Emit -CC if no CC header.
  1064. if(MissingTo()) { EndOfHeaderResults.append("-TO "); } // Emit -TO if no TO header (together no to)
  1065. if(MissingFrom()) { EndOfHeaderResults.append("-FROM "); } // Emit -FROM if no FROM header.
  1066. if(MissingDate()) { EndOfHeaderResults.append("-DATE "); } // Emit -DATE if no DATE header.
  1067. if(MissingMessageID()) { EndOfHeaderResults.append("-MESSAGEID "); } // Emit -MESSAGEID if no MESSAGE-ID header.
  1068. if(MissingSubject()) { EndOfHeaderResults.append("-SUBJECT "); } // Emit -SUBJECT if no SUBJECT header.
  1069. if(HighBitCharacters()) { EndOfHeaderResults.append("+HIGHBIT"); } // Emit +HIGHBIT if non-ascii chars present.
  1070. EndOfHeaderResults.append("\n\n"); // Emit the double newline - end of headers.
  1071. SetOutputBuffer(EndOfHeaderResults); // Setup the output string.
  1072. Mode = &FilterChainHeaderAnalysis::doInjectAnalysis; // Switch to the output injection mode.
  1073. return GetByte(); // Return the first byte from there :-)
  1074. }
  1075. void FilterChainHeaderAnalysis::SetOutputBuffer(std::string& s) { // Setup the OutputBuffer.
  1076. OutputBuffer = (char*) s.c_str(); OutputIndex = 0; // Capture the c_str and reset the index.
  1077. }
  1078. unsigned char FilterChainHeaderAnalysis::doInjectIPTestResult() { // Inject OutputBuffer and go to doSeekNL.
  1079. unsigned char c = OutputBuffer[OutputIndex++]; // Get the next byte in the output buffer.
  1080. if(0 == c) { // If it is the null terminator then we
  1081. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to seeking lines and return that
  1082. return GetByte(); // byte instead.
  1083. } // If we have a normal byte then we
  1084. return c; // return it.
  1085. }
  1086. unsigned char FilterChainHeaderAnalysis::doInjectAnalysis() { // Inject OutputBuffer and go to doOff.
  1087. unsigned char c = OutputBuffer[OutputIndex++]; // Get the next byte in the output buffer.
  1088. if(0 == c) { // If it is the null terminator then we
  1089. Mode = &FilterChainHeaderAnalysis::doOff; // go back to seeking lines and return that
  1090. return GetByte(); // byte instead.
  1091. } // If we have a normal byte then we
  1092. return c; // return it.
  1093. }
  1094. }