12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286 |
- // FilterChain.cpp
- //
- // (C) 2002-2009 MicroNeil Research Corporation
- //
- // Main code file for module FilterChain.
-
- // 20041116 _M Added UrlDecode module. The module will repeat a decoded version of
- // any anchor tag that it sees which contains decodable %xx bytes. Other anchor
- // tags are not repeated.
-
- // 20041116 _M Upgrades to the Defunker module. The module now decodes any HTML
- // encoded bytes that could have been normal ascii.
-
- // 20041114 _M Completed basic defunker engine which strips out all HTML and some
- // basic encoding.
-
- // 20041113 _M Began heavy upgrades to this module to improve performance and
- // provide additional obfuscation removal. This modification will include a move
- // from the use of switch(State) mechanisms to the use of function pointers. This
- // should save a few cycles on every byte processed.
-
- #include "FilterChain.hpp"
-
- // FilterChainBase64 Methods.
-
- // GetByte()
- // Returns the next byte from this filter module.
-
- unsigned char FilterChainBase64::GetByte() {
-
- switch(State) { // What are we doing?
-
- case SCANNING:{ // We're scanning to turn on...
-
- // In this mode we are hunting for a reason to turn
- // ourselves on. If we find our startup sequence then
- // we will go into decoding mode. Until then, we try
- // to match each incoming character with our startup
- // sequence.
-
- while(true) { // Search for our startup string or get out.
-
- try { // Try this...
- x=FilterChain::GetByte(); // Get the next byte from source.
- } // If we get the empty signal
- // here, we've failed to match.
- catch(Empty) { // If so - and we haven't
- if(0==ScanIx) throw Empty("FilterChainBase64: No more data"); // started then just throw Empty.
- x=Base64Start[ScanIx]-1; // If we did start then make
- } // sure we won't match below.
-
- // It's important that no empty's get beyond this point unless
- // we've got a match started. Otherwise we'll return corruption.
-
- if(x!=Base64Start[ScanIx]){ // If the byte doesnt match,
- // and we've started matching
- if(0!=ScanIx) { // the sequence then save the
- Buffer=x; // byte for later, change to
- State=DEQUEING;DequeIx=0; // DEQUING mode, and return
- return GetByte(); // the first Dequeued byte.
- }
- // If there's no match
- else return x; // started then shortcut that:
- } // just send back the byte.
-
- // We've handled non matches, now time for the good stuff...
-
- else { // This byte matches :-)
-
- ScanIx++; // Move forward!
- if(ScanIx>=sizeof(Base64Start)-1){ // If we've matched it all
- // then prep for decoding.
-
- // At this point we've got our trigger - but we need to
- // eat up any extra junk before we start decoding. What
- // we're looking for is a blank line (CRLFCRLF) within
- // the next couple of lines. While we're at this if we
- // get an exception we'll just pass it through.
-
- ScanIx=DequeIx=0; // Let's reset our indexes.
-
- // We're SCANNING now - so if we fail to get to good base64
- // stuff then we'll be starting from scratch - and that's ok.
- // Here we will allow some number of additional header lines
- // to occur before we give up on this being a base64 segment.
- // If we give up then we go back to scanning agian.
-
- // 20030114 _M Increased limit to 150 - lots of X- headers cause
- // the engine to stop decoding base64!! 30 was too small.
-
- const int LineLimit = 150; // We'll allow this many.
-
- for(int LineCount=0; LineCount<LineLimit; LineCount++) {
-
- do{ // Eat up characters through
- x=FilterChain::GetByte(); // the end of the line.
- } while(x!='\n');
-
- x=FilterChain::GetByte(); // Get the next byte.
- if(x=='\n'){ // Next line is blank?
- State=DECODING; // Then get ready to DECODE!
- break; // NO MORE LOOPING!
- }
-
- // If the line is not blank then we'll go around again up
- // to the number of lines we allow. Then we're done trying
- // and we will fall through.
- }
-
- // At this point we are either ready to decode base64 data
- // or we're still in SCANNING mode because of too much junk.
-
- if(DECODING==State) { // If we're ready to decode
- Workspace = 0x0000000a; // then set up a pair of
- DequeIx=3; // <LF> lines so they will
- ScanIx=2; // be the first bytes decoded.
- } // Here we pump <LF> into the
- // workspace. Then we return one <LF>
- return x; // (usually).
-
- // The deal is, if we're decoding then we will pump in LF and
- // return what must be the last LF. If we're not decoding then we
- // end up returning the last byte we read before giving up which should
- // be the first byte of the next line.
-
- }
- }
- }
-
- // The above will be tried repeatedly in the first call to
- // this object's GetByte() until we either return a byte or
- // throw an exception. The result is that once we start to match
- // our startup sequence we will either match all of it or we will
- // grab as much of it as we can until we don't match - then we'll
- // fail and move into DEQUEING.
-
- // You may be asking yourself, why go through all that complex
- // Turing engine stuff when a simple line load and string comparison
- // would do nicely. The answer is SPEED. Without getting too deep,
- // the above code will identify the startup string in roughly 2
- // comparisons per byte. If I were to load the entire line first
- // then that alone would be 2 comparisons before I got started. This
- // way I cut the number of comparisons down by at least 50%.
-
- break;
- }
-
- case DEQUEING:{ // We're recovering from a false start...
-
- // When we get here, ScanIx will be one greater than the last
- // matching byte. The last byte read will be stored in our buffer
- // so that it can be returned here as the last step. The calling
- // program will request each byte one at a time... starting with
- // the first byte coming out of this code. For all positions in our
- // startup string less than ScanIx, we know we had a matching input.
- // We start our output at the first byte. The Scanning engine should
- // have set our DequeIx to 0 before we got here - so that part should
- // be automatic. Here goes...
-
- if(DequeIx < ScanIx) { // If we're still returning a
- unsigned char x = // partial match, grab the next byte
- Base64Start[DequeIx]; // from the startup string, Increment
- DequeIx++; // our Deque index for next time, and
- return x; // return the byte that's needed.
-
- } else { // When we're done with that part,
- State=SCANNING; // we set our mode back to scanning,
- ScanIx=DequeIx=0; // reset our indexes to start again,
- return Buffer; // and return the unmatching byte that
- } // got us to DEQUEING mode.
-
- break;
- }
-
- case DECODING:{ // We're decoding data...
-
- // DequeIx will be used here to indicate how many decoded
- // bytes are ready to be delivered. This is compatible with
- // the normal startup for other modes.
-
- // ScanIx will be used here to indicate which byte position
- // we should be reading from. This combination helps to handle
- // pads and simplifies processing. For example, if we've got two
- // pads then we'll have a single byte to read starting at index
- // zero.
-
- // If we get an exception from up the chain while we're decoding
- // then we'll just pass it along.
-
- if(0==DequeIx) { // If there are no bytes ready then get some!
-
- // First Byte:
- // Eat anything up to the first byte that doesn't look like
- // a base64 digit. If we hit a '\n-' then we'll assume we've got
- // a segment boundary and we'll quit. Everything else will be
- // ignored to get us to the next line.
-
- do{ // Empty out any in-between bytes.
- y=x;x=FilterChain::GetByte(); // Read one byte at a time.
-
- if('-'==x && '\n'==y) { // If we get to a segment separator
- ScanIx=DequeIx=0; // then reset our indexes, set our
- State=SCANNING; // state to SCANNING...
- do { // Eat up the rest of this line
- x=FilterChain::GetByte(); // one byte at a time including
- } while('\n'!=x); // the <LF> at the end, then
- return '\n'; // return the that <LF> byte.
-
- // On the next incoming call, the scanner section "should"
- // return the following <LF> byte to complete the end of line.
- // This ensures that we put a new line at the end of our
- // decoded segment. Four message scanning purposes this is
- // desireable. If we wanted a clean segment then we'd probably
- // eat through the new line rather than the carriage return.
- }
- } while(XX64==Base64Table[x]); // Eat all invalid bytes.
-
- // At this point x should have the first valid byte for us :-)
-
- if('='==x) { // First byte can't be a pad.
- ScanIx=DequeIx=0; // If it is then we reset ourself,
- do{ // eat the rest of this line,
- y=x;x=FilterChain::GetByte(); // and then go on with scanning.
- }while('\n'!=x);
- return x;
- }
-
- // At this point we have a clean byte, presumably at the start
- // of a base64 block which we can decode.
-
- x = Base64Table[x]; // Convert the byte.
-
- // This first one we assign to clear out the register. The rest
- // get added to keep things in place.
-
- Workspace = // Add it to the workspace in the
- x << base64_seg0_shift; // correct position.
-
- // Byte number 2 of the block...
-
- x=FilterChain::GetByte(); // Grab the byte...
-
- if('='==x) { // This byte can't be a pad.
- ScanIx=DequeIx=0; // If it is then we reset ourself,
- do{ // eat the rest of this line,
- y=x;x=FilterChain::GetByte(); // and then go on with scanning.
- }while('\n'!=x);
- return x;
- }
-
- x=Base64Table[x]; // Convert the byte.
- if(XX64==x) { // The byte can't be invalid...
- ScanIx=DequeIx=0; // If it is then we reset ourself,
- do{ // eat the rest of this line,
- y=x;x=FilterChain::GetByte(); // and then go on with scanning.
- }while('\n'!=x);
- return x;
- }
-
- // At this point we have a clean byte...
-
- Workspace += // Add it to the workspace in the
- x << base64_seg1_shift; // correct position.
-
- // Byte number 3 of the block...
-
- x=FilterChain::GetByte(); // Grab the byte...
-
- // This one and the next one can be pads. Here's where we start
- // deciding how many bytes we have. If we have a pad in this spot
- // then our output bytes will only be 1.
-
- if('='==x) DequeIx = 1; // If we've got a pad here we'll only
- else DequeIx = 3; // have one valid output byte. Otherwise
- // we could have 3.
-
- x=Base64Table[x]; // Convert the byte.
- if(XX64==x) { // The byte can't be invalid...
- ScanIx=DequeIx=0; // If it is then we reset ourself,
- do{ // eat the rest of this line,
- y=x;x=FilterChain::GetByte(); // and then go on with scanning.
- }while('\n'!=x);
- return x;
- }
-
- // At this point we have a clean byte...
-
- Workspace += // Add it to the workspace in the
- x << base64_seg2_shift; // correct position.
-
- // Byte number 4 of the block...
-
- x=FilterChain::GetByte(); // Grab the byte...
-
- if('='==x && DequeIx > 2) // If we've got a pad here the most
- DequeIx=2; // we can have are 2 valid outputs.
-
- x=Base64Table[x]; // Convert the byte.
- if(XX64==x) { // The byte can't be invalid...
- ScanIx=DequeIx=0; // If it is then we reset ourself,
- do{ // eat the rest of this line,
- y=x;x=FilterChain::GetByte(); // and then go on with scanning.
- }while('\n'!=x);
- return x;
- }
-
- // At this point we have a clean byte...
-
- Workspace += // Add it to the workspace in the
- x << base64_seg3_shift; // correct position.
-
- // At this point we are ready to begin outputting our bytes.
-
- ScanIx=2; // Output always starts byte three.
- return GetByte(); // Return our first decoded byte.
-
- } else { // If there are bytes ready then spit them out.
-
- x=(Workspace >> (ScanIx * 8)) & 0xFF; // Grab the byte we want.
-
- ScanIx--; // Decrement our output index.
- DequeIx--; // Decrement our output count.
- return x; // Send back our byte.
- }
-
- break;
- }
- }
- // We should never get to this point.
- return 0; // Dummy to make the compiler happy.
- }
-
- // FilterChainQuotedPrintable Methods.
-
- // isHexDigit()
- // Returns true if i is a valid hex digit.
-
- bool FilterChainQuotedPrintable::isHexDigit(unsigned char i) {
-
- if(
- (i >= '0' && i <= '9') || // Hex digits must be 0-9 or
- (i >= 'A' && i <= 'F') || // A-F or
- (i >= 'a' && i <= 'f') // a-f if somebody used lower case.
- ) {
- return true; // If i is one of these we are true
- } else {
- return false; // IF i is not then we are false
- }
- }
-
- // convertHexDigit()
- // Returns an integer value for the hex digit i
-
- int FilterChainQuotedPrintable::convertHexDigit(unsigned char i) {
-
- if(i >= '0' && i <= '9') { // Digit chars convert directly.
- return i - '0';
- } else if (i >= 'A' && i <= 'F') { // Cap A-F convert to 10 - 15
- return i - 'A' + 10;
- } else if (i >= 'a' && i <= 'f') { // Small A-F convert to 10 - 15
- return i - 'a' + 10;
- }
-
- return -1; // Return -1 if i was not a hex digit!
- }
-
- // GetByte()
- // Returns the next byte from this filter module.
-
- unsigned char FilterChainQuotedPrintable::GetByte() {
-
- switch(State) { // What are we doing?
-
- case SCANNING: // We're scanning to turn on...
-
- Buffer[0]=FilterChain::GetByte();
- if('='== Buffer[0]) { // If we've found an = then we're on.
- Buffer[1]=FilterChain::GetByte(); // Fill up the decoding buffer with
- Buffer[2]=FilterChain::GetByte(); // the next two bytes,
- BufferIndex = 0; // Setup the buffer index.
- BufferLength = 3; // Setup the buffer length.
- State = DECODING; // Set our mode and get the result
- return GetByte(); // by calling ourselves!
- } else
- return Buffer[0]; // Otherwise just pass through.
- break;
-
- case DEQUEING: // We're recovering from a false start...
-
- if(BufferIndex < BufferLength) { // If we've got buffered stuff then
- return Buffer[BufferIndex++]; // return it and move the pointer.
- } else { // If we've run out of stuff then
- BufferIndex = 0; // Reset our index and our
- BufferLength = 0; // buffer length, then set our
- State = SCANNING; // mode to SCANNING and return
- return GetByte(); // the next byte from there.
- }
- break;
-
- case DECODING: // We're decoding data...
-
- // Now we are decoding quoted printable data. First we will handle the case
- // where this is a soft line break. In that case we simply eat the encoded bytes
- // and set up to dequeue the last byte.
-
- if(Buffer[1] == '\n') { // If this is a soft break the
- BufferIndex = 2; // point our dequeue index at the last byte
- State = DEQUEING; // establish our DEQUEING state and
- return GetByte(); // return by letteing DEQUEING do it!
- }
-
- // If it wasn't a soft break then we _may_ need to decode it. We will find
- // out by looking for hex digits in the next two locations. If they are there
- // we are decoding. If not then we will simply dequeue the entire buffer.
-
- if(
- isHexDigit(Buffer[1]) && // If the next two bytes are hex
- isHexDigit(Buffer[2]) // digits then we can convert them.
- ) {
- Workspace= // Set our workspace to convert the
- (convertHexDigit(Buffer[1]) << 4) | // two hex digits into a single
- (convertHexDigit(Buffer[2])); // byte.
-
- Buffer[2] = Workspace & 0xFF; // Store that byte in our buffer.
- BufferIndex = 2; // Set the index and change our
- State = DEQUEING; // state to DEQUEING then let that
- return GetByte(); // code spit it out!
-
- } else { // If either byte was not a valid
- State = DEQUEING; // hex digit DEQUEUE the entire
- return GetByte(); // buffer.
- }
-
- break;
-
- };
-
- return FilterChain::GetByte(); // Dummy
-
- }
-
-
- /////////////////////////////////////////////////////////////////////////////////////////
- // FilterChainDefunker
- /////////////////////////////////////////////////////////////////////////////////////////
-
- // SkipHeaders() waits for the headers to go by before launching Store().
-
- unsigned char FilterChainDefunker::SkipHeaders() { // While waiting EOH...
- unsigned char x = FilterChain::GetByte(); // Get a byte.
- if(LastRawByte == '\n' && x == '\n') { // If we're at EOH
- Master = &FilterChainDefunker::Store; // Go to store mode.
- return x; // and return the byte.
- } // If we're not at EOH
- LastRawByte = x; // then remember this byte
- return x; // and return it.
- }
-
- // Store() puts the original data into the buffer for later.
-
- unsigned char FilterChainDefunker::Store() { // While in Store mode,
-
- unsigned char x; // we need a byte.
-
- try {
- if(DefunkerSize-10 < InputPosition) {
- cout << "watch this" << endl;
- }
- if(DefunkerSize <= InputPosition)
- throw Empty("FilterChainDefunker: No more data"); // Careful about the buffer.
- x = FilterChain::GetByte(); // Try getting the next byte
- StoreBuffer[InputPosition++] = x; // and storing it.
- }
-
- catch(Empty) { // When we get the Empty
- Master = &FilterChainDefunker::ReadOut; // signal it is time for us
- return GetByte(); // to read out our data.
- }
- return x; // Otherwis pass on the byte.
- }
-
- // ReadOut() retrieves the stored data through the state engine.
-
- unsigned char FilterChainDefunker::ReadOut() { // Read out and dedup spaces.
-
- if(LastReadOut == ' ') { // If the last byte was a space
- while(LastReadOut == ' ') { // then eat all of the spaces
- LastReadOut = SpaceConvChart[GetInternal()]; // that come next with spaces
- } // converted.
- } else { // If it was not a space then
- LastReadOut = SpaceConvChart[GetInternal()]; // simply read the next byte
- } // with spaces converted.
- return LastReadOut; // Output the byte we found.
- }
-
- // GetStore() retrieves the raw store for the state engine.
-
- unsigned char FilterChainDefunker::GetStore() { // Read from the Store.
- if(OutputPosition >= InputPosition) {
- throw Empty("FilterChainDefunker: No more data"); // If we're out of bytes throw Empty.
- }
- return LastGetStore = StoreBuffer[OutputPosition++]; // If we have more, trap and send it.
- }
-
- //// The following functions make up the state engine with the state maintained
- //// as a function pointer in the (*Internal)() handle.
-
- unsigned char FilterChainDefunker::Preamble() { // Emit the preamble.
- for(
- int p=0; // Load the preamble into
- DefunkerPreamble[p]; // the queue.
- p++) EnQueue(DefunkerPreamble[p]);
-
- Internal = &FilterChainDefunker::DeQueue; // Set up the DeQueue mode
- return GetInternal(); // and return the next byte.
- }
-
- unsigned char FilterChainDefunker::DefunkRoot() { // While in DefunkRoot state
- unsigned char x = GetStore(); // grab the next byte.
- if(x == '<') { // If it matches < then
- Internal = &FilterChainDefunker::OpenTag; // go to OpenTag state and
- return GetInternal(); // return the next byte.
- } else
- if(x == '&') { // If it matches & then
- Internal = &FilterChainDefunker::OpenAmp; // go to OpenAnd state and
- EnQueue(x); // push in the amphersand.
- return GetInternal(); // return the next byte.
-
- } // If we did not match then
- return x; // return what we grabbed.
- }
-
- unsigned char FilterChainDefunker::OpenTag() { // While in OpenTag state
- unsigned char x = GetStore(); // grab the next byte.
- switch(tolower(x)) { // Check the lower case of x.
-
- case 'b': // If we have a 'b' then
- Internal = &FilterChainDefunker::MatchBR; // our mode is MatchBR.
- break;
-
- case 'p': // If we have a 'p' then
- Internal = &FilterChainDefunker::MatchP; // our mode is MatchP.
- break;
-
- default: // If we did not match then
- Internal = &FilterChainDefunker::EatTag; // our mode is EatTag.
- break;
- }
-
- return GetInternal(); // Return the next byte.
- }
-
- unsigned char FilterChainDefunker::OpenAmp() { // While in OpenAmp state
- unsigned char x = GetStore(); // grab the next byte.
- if(tolower(x) == 'n') { // If it matched n then
- EnQueue(x); // push in the n -
- Internal = &FilterChainDefunker::MatchNBSP; // we are working on
- return GetInternal(); // return the next byte.
- } else
- if(tolower(x) == 'a') { // If it matched a then
- EnQueue(x); // push in the a -
- Internal = &FilterChainDefunker::SwitchAMPAPOS; // is it AMP or APOS?
- return GetInternal(); // return the next byte.
- } else
- if(tolower(x) == 'l') { // If it matched l then
- EnQueue(x); // push in the l -
- Internal = &FilterChainDefunker::MatchLT; // we are working on <
- return GetInternal(); // return the next byte.
- } else
- if(tolower(x) == 'g') { // If it matched g then
- EnQueue(x); // push in the g -
- Internal = &FilterChainDefunker::MatchGT; // we are working on >
- return GetInternal(); // return the next byte.
- } else
- if(tolower(x) == 'q') { // If it matched q then
- EnQueue(x); // push in the q -
- Internal = &FilterChainDefunker::MatchQUOT; // we are working on "
- return GetInternal(); // return the next byte.
- } else
- if(x == '#') { // If it matched # then
- EnQueue(x); // push in the # -
- Internal = &FilterChainDefunker::DecodeNum; // we are working on &#...;
- return GetInternal(); // return the next byte.
- }
-
- Internal = &FilterChainDefunker::DeQueue; // If nothing matched then
- return GetInternal(); // punt and dequeue.
- }
-
- unsigned char FilterChainDefunker::MatchBR() { // If our mode is MatchBR
- if(MatchTagPattern(patMatchBR)) { // If we matched our pattern
- Internal = &FilterChainDefunker::DefunkRoot; // go to DefunkRoot state
- return ' '; // and return a space.
- } // If we did not match then
- Internal = &FilterChainDefunker::EatTag; // go to EatTag state and
- return GetInternal(); // return the next byte.
- }
-
- unsigned char FilterChainDefunker::MatchP() { // If our mode is MatchP
- if(MatchTagPattern(patMatchP)) { // if we matched our pattern
- Internal = &FilterChainDefunker::DefunkRoot; // go to DefunkRoot state
- return ' '; // and return a space.
- } // If we did not match then
- Internal = &FilterChainDefunker::EatTag; // go to EatTag state and
- return GetInternal(); // return the next byte.
- }
-
- unsigned char FilterChainDefunker::MatchNBSP() { // If our mode is MatchNBSP
- int pos = 2; // We've seen &n so far.
- while(patNBSP[pos]){ // Look through the pattern
- unsigned char x = GetStore(); // getting one byte at a time.
- EnQueue(x); // Push each into the queue.
- if(tolower(x)!=patNBSP[pos]) break; // If we fall off, get out.
- pos++; // otherwise keep going.
- }
-
- // At this point our pattern[pos] is either 0 (a match) or not.
-
- if(patNBSP[pos]) { // If we did not match then
- Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
- return GetInternal(); // and return the next byte.
- }
- // If we did match the pattern
- ClearQueue(); // then clear the queue and
- Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
- return ' '; // return a space.
- }
-
- unsigned char FilterChainDefunker::MatchLT() { // If our mode is MatchLT
- int pos = 2; // We've seen &l so far.
- while(patLT[pos]){ // Look through the pattern
- unsigned char x = GetStore(); // getting one byte at a time.
- EnQueue(x); // Push each into the queue.
- if(tolower(x)!=patLT[pos]) break; // If we fall off, get out.
- pos++; // otherwise keep going.
- }
-
- // At this point our pattern[pos] is either 0 (a match) or not.
-
- if(patLT[pos]) { // If we did not match then
- Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
- return GetInternal(); // and return the next byte.
- }
- // If we did match the pattern
- ClearQueue(); // then clear the queue and
- Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
- return '<'; // return a <.
- }
-
- unsigned char FilterChainDefunker::MatchGT() { // If our mode is MatchGT
- int pos = 2; // We've seen &g so far.
- while(patGT[pos]){ // Look through the pattern
- unsigned char x = GetStore(); // getting one byte at a time.
- EnQueue(x); // Push each into the queue.
- if(tolower(x)!=patGT[pos]) break; // If we fall off, get out.
- pos++; // otherwise keep going.
- }
-
- // At this point our pattern[pos] is either 0 (a match) or not.
-
- if(patGT[pos]) { // If we did not match then
- Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
- return GetInternal(); // and return the next byte.
- }
- // If we did match the pattern
- ClearQueue(); // then clear the queue and
- Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
- return '>'; // return a >.
- }
-
- unsigned char FilterChainDefunker::MatchQUOT() { // If our mode is MatchQUOT
- int pos = 2; // We've seen &q so far.
- while(patQUOT[pos]){ // Look through the pattern
- unsigned char x = GetStore(); // getting one byte at a time.
- EnQueue(x); // Push each into the queue.
- if(tolower(x)!=patQUOT[pos]) break; // If we fall off, get out.
- pos++; // otherwise keep going.
- }
-
- // At this point our pattern[pos] is either 0 (a match) or not.
-
- if(patQUOT[pos]) { // If we did not match then
- Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
- return GetInternal(); // and return the next byte.
- }
- // If we did match the pattern
- ClearQueue(); // then clear the queue and
- Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
- return '\"'; // return a quote.
- }
-
- unsigned char FilterChainDefunker::SwitchAMPAPOS() { // We are chosing AMP or APOS.
- unsigned char x = GetStore(); // Get the next byte.
- EnQueue(x); // Put it into the queue.
- if(tolower(x)=='m') { // If we matched m then we
- Internal = &FilterChainDefunker::MatchAMP; // are working on MatchAMP.
- return GetInternal(); // Go get it.
- } else
- if(tolower(x)=='p') { // If we matched p then we
- Internal = &FilterChainDefunker::MatchAPOS; // are working on MatchAPOS.
- return GetInternal(); // Go get it.
- }
-
- Internal = &FilterChainDefunker::DeQueue; // If we didn't match either
- return GetInternal(); // we punt and DeQueue.
- }
-
- unsigned char FilterChainDefunker::MatchAPOS() { // If our mode is MatchAPOS
- int pos = 3; // We've seen &ap so far.
- while(patAPOS[pos]){ // Look through the pattern
- unsigned char x = GetStore(); // getting one byte at a time.
- EnQueue(x); // Push each into the queue.
- if(tolower(x)!=patAPOS[pos]) break; // If we fall off, get out.
- pos++; // otherwise keep going.
- }
-
- // At this point our pattern[pos] is either 0 (a match) or not.
-
- if(patAMP[pos]) { // If we did not match then
- Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
- return GetInternal(); // and return the next byte.
- }
- // If we did match the pattern
- ClearQueue(); // then clear the queue and
- Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
- return '\''; // return an apostrophie.
- }
-
- unsigned char FilterChainDefunker::MatchAMP() { // If our mode is MatchAMP
- int pos = 3; // We've seen &am so far.
- while(patAMP[pos]){ // Look through the pattern
- unsigned char x = GetStore(); // getting one byte at a time.
- EnQueue(x); // Push each into the queue.
- if(tolower(x)!=patAMP[pos]) break; // If we fall off, get out.
- pos++; // otherwise keep going.
- }
-
- // At this point our pattern[pos] is either 0 (a match) or not.
-
- if(patAMP[pos]) { // If we did not match then
- Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
- return GetInternal(); // and return the next byte.
- }
- // If we did match the pattern
- ClearQueue(); // then clear the queue and
- Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
- return '&'; // return an amphersand.
- }
-
- unsigned char FilterChainDefunker::EatTag() { // If our mode is EatTag
- if(LastGetStore != '>') { // and our last byte was not
- while(GetStore()!='>')continue; // endtag then eat through
- } // the end tag. Then set our
- Internal = &FilterChainDefunker::DefunkRoot; // mode to DefunkRoot and
- return GetInternal(); // return the next byte.
- }
-
- unsigned char FilterChainDefunker::DecodeNum() { // If our mode is DecodeNum
- unsigned char NumBfr[5]; // A buffer for digits.
- memset(NumBfr,0,sizeof(NumBfr)); // Clear the buffer.
- for( // Let's read the number...
- int i=0; // NumBfr position = 0;
- i<(sizeof(NumBfr)-1) && // Stay well within the NunBfr.
- (EnQueue(NumBfr[i]=GetStore()), // Read and EnQueue each byte.
- isdigit(NumBfr[i])); // Keep going if it's a digit.
- i++)continue; // Move the buffer pointer.
-
- // Check for a proper finish...
-
- if(LastGetStore != ';') { // If we didn't end properly
- Internal = &FilterChainDefunker::DeQueue; // then we will punt and
- return GetInternal(); // DeQueue.
- }
-
- // At this point, NumBfr contains a c_str of the number to be decoded.
- // Also, the Qbfr has each byte we read in case we want to punt.
-
- int Decoded = atol((const char*)NumBfr); // Read the number.
- if(Decoded < 32 || Decoded > 255) { // If the number we read is
- Internal = &FilterChainDefunker::DeQueue; // out of range then we
- return GetInternal(); // punt and DeQueue.
- }
- // If we decoded a character
- ClearQueue(); // that is in range of normal
- Internal = &FilterChainDefunker::DefunkRoot; // ascii then clear the queue,
- return (unsigned char) Decoded; // go back to DefunkRoot, and
- } // return the decoded byte.
-
- /////////////////////////////////////////////////////////////////////////////////////////
- // FilterChainUrlDecode
- /////////////////////////////////////////////////////////////////////////////////////////
-
- unsigned char FilterChainUrlDecode::Bypass() { // In Bypass mode...
- unsigned char c = FilterChain::GetByte(); // Get the raw byte.
- if(c == '<') { // If it was '<' we begin.
- Internal = &FilterChainUrlDecode::Tag; // Go to Tag mode.
- AddToBfr(c); // Write the byte to our buffer.
- }
- return c; // Always return the byte.
- }
-
- unsigned char FilterChainUrlDecode::Tag() { // In Tag mode...
- unsigned char c = FilterChain::GetByte(); // Get the raw byte.
- if(tolower(c) == 'a') { // If we're in an anchor tag
- Internal = &FilterChainUrlDecode::Root; // Go to Decode Root mode.
- AddToBfr(c); // Write the byte to our buffer.
- } else
- if(tolower(c) == 'i') { // If we might be in an img tag
- Internal = &FilterChainUrlDecode::Img1; // Go to Img1 mode.
- AddToBfr(c); // Write the byte to our buffer.
-
- } else { // If we didn't match
- DecodeBfr[0] = 0; // we clear out the Decode
- DecodeBfr[1] = 0; // buffer. (Save some bytes by
- DecodeLength = 0; // doing it manually) Then we
- Internal = &FilterChainUrlDecode::Bypass; // Go to Bypass mode again.
- }
- return c; // Always return the byte.
- }
-
- unsigned char FilterChainUrlDecode::Img1() { // In Img1 mode...
- unsigned char c = FilterChain::GetByte(); // Get the raw byte.
- if(tolower(c)=='m') { // If we're still in an img tag
- Internal = &FilterChainUrlDecode::Img2; // Go to Img2 mode.
- AddToBfr(c); // Write the byte to our buffer.
-
- } else { // If we didn't match
- DecodeBfr[0] = 0; // we clear out the Decode
- DecodeBfr[1] = 0; // buffer and go back to
- DecodeBfr[2] = 0; // Bypass mode again.
- DecodeLength = 0;
- Internal = &FilterChainUrlDecode::Bypass;
- }
- return c; // Always return the byte.
- }
-
- unsigned char FilterChainUrlDecode::Img2() { // In Img2 mode...
- unsigned char c = FilterChain::GetByte(); // Get the raw byte.
- if(tolower(c)=='g') { // If we're still in an img tag
- Internal = &FilterChainUrlDecode::Root; // Go to Decode Root mode.
- AddToBfr(c); // Write the byte to our buffer.
-
- } else { // If we didn't match
- DecodeBfr[0] = 0; // we clear out the Decode
- DecodeBfr[1] = 0; // buffer and go back to
- DecodeBfr[2] = 0; // Bypass mode again.
- DecodeBfr[3] = 0;
- DecodeLength = 0;
- Internal = &FilterChainUrlDecode::Bypass;
- }
- return c; // Always return the byte.
- }
-
- unsigned char FilterChainUrlDecode::Root() { // While in Decode Root mode...
- unsigned char c = FilterChain::GetByte(); // Get the raw byte.
- AddToBfr(c); // Push it into the buffer.
-
- // Now we will switch modes based on the byte we get.
-
- if(c == '%') { // If we have '%' then it is
- Internal = &FilterChainUrlDecode::GetD1; // time to start decoding.
- } else
- if(c == '>') { // If we have '>' and
- if(DecodeFlag) { // we did some decoding then
- Internal = &FilterChainUrlDecode::Inject; // it is time to inject the result.
- } else { // If there was no decoding then
- Clear(); // we clear out our buffer and
- Internal = &FilterChainUrlDecode::Bypass; // it is time to go to sleep.
- }
- }
-
- // This next bit protects against malformed HTML by watching for any new tag
- // start. If one occurs, then we throw away our current decoding and assume a state
- // that starts with the new open "<".
-
- if(c == '<') { // If found a new < then we
- Clear(); // clear the buffer,
- AddToBfr(c); // Add the '<' back in, and
- Internal = &FilterChainUrlDecode::Tag; // go back to Tag mode.
- }
-
- return c; // Always return the byte.
- }
-
- unsigned char FilterChainUrlDecode::GetD1() { // Get the first digit.
- unsigned char c = FilterChain::GetByte(); // Read the raw byte.
- AddToBfr(c); // Add it to the buffer.
- Internal = &FilterChainUrlDecode::GetD2; // Move to GetD2 mode.
- return c; // Always return the byte.
- }
-
- // isHexDigit()
- // Returns true if i is a valid hex digit.
-
- bool FilterChainUrlDecode::isHexDigit(unsigned char i) {
-
- if(
- (i >= '0' && i <= '9') || // Hex digits must be 0-9 or
- (i >= 'A' && i <= 'F') || // A-F or
- (i >= 'a' && i <= 'f') // a-f if somebody used lower case.
- ) {
- return true; // If i is one of these we are true
- } else {
- return false; // IF i is not then we are false
- }
- }
-
- // convertHexDigit()
- // Returns an integer value for the hex digit i
-
- int FilterChainUrlDecode::convertHexDigit(unsigned char i) {
-
- if(i >= '0' && i <= '9') { // Digit chars convert directly.
- return i - '0';
- } else if (i >= 'A' && i <= 'F') { // Cap A-F convert to 10 - 15
- return i - 'A' + 10;
- } else if (i >= 'a' && i <= 'f') { // Small A-F convert to 10 - 15
- return i - 'a' + 10;
- }
-
- return -1; // Return -1 if i was not a hex digit!
- }
-
- // convertHexByte()
- // Returns an integer value for a hex string representing a byte.
-
- unsigned char FilterChainUrlDecode::convertHexByte(unsigned char* x) {
-
- unsigned char working = convertHexDigit(x[1]); // Convert the low order nybl.
- working = working + (16 * convertHexDigit(x[0])); // Convert the high order nybl.
- return working; // Return the result.
- }
-
- unsigned char FilterChainUrlDecode::GetD2() { // Get the second digit.
- unsigned char c = FilterChain::GetByte(); // Read the raw byte.
- AddToBfr(c); // Add it to the buffer.
-
- // At this point the end of our DecodeBfr has a c_str of a small hex integer (we hope)
- // that we can decode. If we successfully decode it then we will replace %xx in our
- // DecodeBfr with the character that is represented by that byte.
-
- // Do we really have an encoded byte to decode?
-
- int codepos = DecodeLength-3; // Grab the position of the hex.
- if(
- DecodeBfr[codepos]=='%' && // If the first char is %
- isHexDigit(DecodeBfr[codepos+1]) && // and the second is a hex digit
- isHexDigit(DecodeBfr[codepos+2]) // and the third is a hex digit
- ){ // then we can decode the string.
-
- unsigned char q = convertHexByte(DecodeBfr+codepos+1); // Decode the byte.
- if(q >= 32) { // If the byte is in range then
- DecodeBfr[codepos] = q; // Replace the % with the byte
- DecodeBfr[--DecodeLength] = 0; // backup over and erase the hex
- DecodeBfr[--DecodeLength] = 0; // digits themselves.
- DecodeFlag = true; // Set the decode flag.
- }
-
- // If we decided the byte was not decodable for some reason then the original data
- // remains in the buffer as it was originally read.
- }
-
- Internal = &FilterChainUrlDecode::Root; // Get ready to decode more.
-
- return c; // Always return the byte.
- }
-
- unsigned char FilterChainUrlDecode::Inject() { // Inject the decoded result.
- if(
- DecodeBfr[DecodePosition] && // If we've got more bytes
- DecodePosition < sizeof(DecodeBfr)) { // and we're safely in our buffer
- return DecodeBfr[DecodePosition++]; // then return the byte and move
- } // ahead.
- // Once the buffer is empty we
- Clear(); // clear out the system, and go
- Internal = &FilterChainUrlDecode::Bypass; // back to bypass mode. Then
- return GetByte(); // return the next bypassed byte.
- }
-
- ////////////////////////////////////////////////////////////////////////////////
- // FilterChainHeaderAnalysis
- ////////////////////////////////////////////////////////////////////////////////
-
- int FilterChainHeaderAnalysis::FollowPattern(char c) { // Follow the pattern.
- c = tolower(c); // Convert c to lower case.
- if(c != MatchPattern[MatchIndex]) { // If c doesn't match the pattern
- return -1; // then return -1 indicating we fell off.
- } else { // If it did match the pattern then
- MatchIndex++; // move ahead to the next byte and
- if(0 == MatchPattern[MatchIndex]) { // take a look. If that's all there was
- return 0; // then we've finished :-)
- }
- } // If we matched and there's more to do
- return 1; // then we return 1.
- }
-
- unsigned char FilterChainHeaderAnalysis::doSeekNL() { // Looking for a new line.
- unsigned char c = GetCheckedByte(); // Get the next byte (and check for high bits)
- if('\n' == c) { // If it was a new line then
- Mode = &FilterChainHeaderAnalysis::doSeekDispatch; // move on to the next mode
- } // for the next byte and
- return c; // return the byte we got.
- }
-
- unsigned char FilterChainHeaderAnalysis::doSeekDispatch() { // Looking at the first char after NL.
- unsigned char c = GetCheckedByte(); // Get the next byte (and check for high bits)
- switch(tolower(c)) { // Switch modes based on what this byte is.
- case '\n': { // If it is a New Line then the headers are
- Mode = &FilterChainHeaderAnalysis::doEndOfHeaders; // finished - so we set up our EndOfHeaders
- return GetByte(); // mode and return the next byte from there.
- break; // The extra NL will be emitted at the end.
- }
- case 'r': { // If it is an R as in (R)eceived:
- SetFollowPattern("eceived:"); // establish the follow pattern and
- Mode = &FilterChainHeaderAnalysis::doReceived; // switch to doReceived mode.
- break;
- }
- case 'f': { // If it is an F as in (F)rom:
- SetFollowPattern("rom:"); // establish the follow pattern and
- Mode = &FilterChainHeaderAnalysis::doFrom; // switch to doFrom mode.
- break;
- }
- case 't': { // If it is an T as in (T)o:
- SetFollowPattern("o:"); // establish the follow pattern and
- Mode = &FilterChainHeaderAnalysis::doTo; // switch to doTo mode.
- break;
- }
- case 'c': { // If it is a C as in (C)C:
- SetFollowPattern("c:"); // establish the follow pattern and
- Mode = &FilterChainHeaderAnalysis::doCC; // switch to doCC mode.
- break;
- }
- case 'm': { // If it is an M as in (M)essage-id:
- SetFollowPattern("essage-id:"); // establish the follow pattern and
- Mode = &FilterChainHeaderAnalysis::doMessageID; // switch to doMessageID mode.
- break;
- }
- case 'd': { // If it is a D as in (D)ate:
- SetFollowPattern("ate:"); // establish the follow pattern and
- Mode = &FilterChainHeaderAnalysis::doDate; // switch to doDate mode.
- break;
- }
- case 's': { // If it is an S as in (S)ubject:
- SetFollowPattern("ubject:"); // establish the follow pattern and
- Mode = &FilterChainHeaderAnalysis::doSubject; // switch to doSubject mode.
- break;
- }
- default: { // If we don't recognize the byte then
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for a new line.
- break;
- }
- } // Once all of our mode switching is handled
- return c; // we return the byte we got.
- }
-
- unsigned char FilterChainHeaderAnalysis::doReceived() { // Identifying a Received: header.
- unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
- switch(FollowPattern(c)) { // See if we're still on the path.
- case -1: { // If we're not on the right tag then
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
- break;
- }
- case 0: { // If we've found the end of our tag (match!)
- Mode = &FilterChainHeaderAnalysis::doFindIP; // start looking for the IP.
- IPToTest = ""; // Clear the IPToTest buffer.
- break;
- }
- default: { // If we're still following along then
- break; // keep on keepin' on.
- }
- } // Once we know what we're doing we
- return c; // return the character we got.
- }
-
- unsigned char FilterChainHeaderAnalysis::doFindIP() { // Seeking the [IP] in a Received header.
- unsigned char c = GetCheckedByte(); // Get a checked byte.
- switch(c) {
- case '[': { // If we find the [ then
- Mode = &FilterChainHeaderAnalysis::doTestIP; // set up to grab and test the IP.
- break;
- }
- case '\n': { // If we come across a newline then
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // we must be lost so go back to basics.
- break;
- }
- default: { // For anything else we keep on going.
- break;
- }
- }
- return c; // Return the byte.
- }
-
- //// 20070614 _M Improved IP exctaction from received headers so that if the
- //// apparent IP contains any unusual bytes (not digits or dots) then the
- //// attempt is abandoned.
-
- unsigned char FilterChainHeaderAnalysis::doTestIP() { // Gets and tests the [IP].
- unsigned char c = FilterChain::GetByte(); // Get the next byte.
- switch(c) {
- case ']': { // If we come to ] we've got it!
- IPTester.test(IPToTest, IPTestResult); // Do the test with this IP.
- if(0 == IPTestResult.length()) { // If the IP test wants us to truncate
- throw Empty("FilterChainHeaderAnalysis: Truncate"); // the message then throw Empty!
- } // Otherwise, proceed as per normal...
- SetOutputBuffer(IPTestResult); // Put the result in the output buffer.
- Mode = &FilterChainHeaderAnalysis::doInjectIPTestResult; // Set the mode to inject the result.
- break; // That will start on the next byte.
- }
- case '0': // IPs are made of digits and dots.
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- case '.': { // Capture the IP between [ and ]
- IPToTest += c; // one byte at a time.
- break;
- }
- default: { // If we find anything else we must be
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // lost so we go back to the basics.
- break;
- }
- }
- return c;
- }
-
- unsigned char FilterChainHeaderAnalysis::doFrom() { // Identifying a From: header.
- unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
- switch(FollowPattern(c)) { // See if we're still on the path.
- case -1: { // If we're not on the right tag then
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
- break;
- }
- case 0: { // If we've found the end of our tag (match!)
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
- FoundFrom = true; // record that this tag was present.
- break;
- }
- default: { // If we're still following along then
- break; // keep on keepin' on.
- }
- } // Once we know what we're doing we
- return c; // return the character we got.
- }
-
- unsigned char FilterChainHeaderAnalysis::doTo() { // Identifying a To: header.
- unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
- switch(FollowPattern(c)) { // See if we're still on the path.
- case -1: { // If we're not on the right tag then
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
- break;
- }
- case 0: { // If we've found the end of our tag (match!)
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
- FoundTo = true; // record that this tag was present.
- break;
- }
- default: { // If we're still following along then
- break; // keep on keepin' on.
- }
- } // Once we know what we're doing we
- return c; // return the character we got.
- }
-
- unsigned char FilterChainHeaderAnalysis::doCC() { // Identifying a CC: header.
- unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
- switch(FollowPattern(c)) { // See if we're still on the path.
- case -1: { // If we're not on the right tag then
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
- break;
- }
- case 0: { // If we've found the end of our tag (match!)
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
- FoundCC = true; // record that this tag was present.
- break;
- }
- default: { // If we're still following along then
- break; // keep on keepin' on.
- }
- } // Once we know what we're doing we
- return c; // return the character we got.
- }
-
- unsigned char FilterChainHeaderAnalysis::doMessageID() { // Identifying a MessageID header.
- unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
- switch(FollowPattern(c)) { // See if we're still on the path.
- case -1: { // If we're not on the right tag then
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
- break;
- }
- case 0: { // If we've found the end of our tag (match!)
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
- FoundMessageID = true; // record that this tag was present.
- break;
- }
- default: { // If we're still following along then
- break; // keep on keepin' on.
- }
- } // Once we know what we're doing we
- return c; // return the character we got.
- }
-
- unsigned char FilterChainHeaderAnalysis::doDate() { // Identifying a Date: header.
- unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
- switch(FollowPattern(c)) { // See if we're still on the path.
- case -1: { // If we're not on the right tag then
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
- break;
- }
- case 0: { // If we've found the end of our tag (match!)
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
- FoundDate = true; // record that this tag was present.
- break;
- }
- default: { // If we're still following along then
- break; // keep on keepin' on.
- }
- } // Once we know what we're doing we
- return c; // return the character we got.
- }
-
- unsigned char FilterChainHeaderAnalysis::doSubject() { // Identifying a Subject: header.
- unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
- switch(FollowPattern(c)) { // See if we're still on the path.
- case -1: { // If we're not on the right tag then
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
- break;
- }
- case 0: { // If we've found the end of our tag (match!)
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
- FoundSubject = true; // record that this tag was present.
- break;
- }
- default: { // If we're still following along then
- break; // keep on keepin' on.
- }
- } // Once we know what we're doing we
- return c; // return the character we got.
- }
-
- unsigned char FilterChainHeaderAnalysis::doEndOfHeaders() { // IdentifyEndOfHeaders & Emit Results.
- // We know we've reached the end of the headers so now
- // we have to formulate the results we want to inject and
- // er... inject them.
-
- EndOfHeaderResults = "X-SNFHDR: "; // Emit an X header (internal only)
- if(MissingCC()) { EndOfHeaderResults.append("-CC "); } // Emit -CC if no CC header.
- if(MissingTo()) { EndOfHeaderResults.append("-TO "); } // Emit -TO if no TO header (together no to)
- if(MissingFrom()) { EndOfHeaderResults.append("-FROM "); } // Emit -FROM if no FROM header.
- if(MissingDate()) { EndOfHeaderResults.append("-DATE "); } // Emit -DATE if no DATE header.
- if(MissingMessageID()) { EndOfHeaderResults.append("-MESSAGEID "); } // Emit -MESSAGEID if no MESSAGE-ID header.
- if(MissingSubject()) { EndOfHeaderResults.append("-SUBJECT "); } // Emit -SUBJECT if no SUBJECT header.
- if(HighBitCharacters()) { EndOfHeaderResults.append("+HIGHBIT"); } // Emit +HIGHBIT if non-ascii chars present.
- EndOfHeaderResults.append("\n\n"); // Emit the double newline - end of headers.
-
- SetOutputBuffer(EndOfHeaderResults); // Setup the output string.
- Mode = &FilterChainHeaderAnalysis::doInjectAnalysis; // Switch to the output injection mode.
-
- return GetByte(); // Return the first byte from there :-)
- }
-
- void FilterChainHeaderAnalysis::SetOutputBuffer(string& s) { // Setup the OutputBuffer.
- OutputBuffer = (char*) s.c_str(); OutputIndex = 0; // Capture the c_str and reset the index.
- }
-
- unsigned char FilterChainHeaderAnalysis::doInjectIPTestResult() { // Inject OutputBuffer and go to doSeekNL.
- unsigned char c = OutputBuffer[OutputIndex++]; // Get the next byte in the output buffer.
- if(0 == c) { // If it is the null terminator then we
- Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to seeking lines and return that
- return GetByte(); // byte instead.
- } // If we have a normal byte then we
- return c; // return it.
- }
-
- unsigned char FilterChainHeaderAnalysis::doInjectAnalysis() { // Inject OutputBuffer and go to doOff.
- unsigned char c = OutputBuffer[OutputIndex++]; // Get the next byte in the output buffer.
- if(0 == c) { // If it is the null terminator then we
- Mode = &FilterChainHeaderAnalysis::doOff; // go back to seeking lines and return that
- return GetByte(); // byte instead.
- } // If we have a normal byte then we
- return c; // return it.
- }
|