// roller.hpp // Copyright (C) 2022 MicroNeil Research Corporation. // // This software is released under the MIT license. See LICENSE.TXT. // // Roller is a naive rolling hash system for rapid string searches. // Roller32 accommodates matches up to 4 bytes. // Roller64 accommodates matches up to 8 bytes. #pragma once #include #include #include #include #include #include namespace codedweller { class Roller32 { private: uint_fast32_t roller; public: Roller32() : roller(0) {} uint_fast32_t value() const { return roller; } uint_fast32_t add(uint8_t byte) { roller = roller << 8 | byte; return roller; } }; class Roller64 { private: uint_fast64_t roller; public: Roller64() : roller(0) {} uint_fast64_t value() const { return roller; } uint_fast64_t add(uint8_t byte) { roller = roller << 8 | byte; return roller; } }; namespace roller{ static const unsigned char caseBit = 'A' ^ 'a'; static const unsigned char caseMask = ~caseBit; unsigned char applyCaseSensitivity(unsigned char byte, bool caseSensitive) { if (true==caseSensitive) return byte; if (false==isalpha(byte) && 0xFF != byte) return byte; unsigned char insensitiveByte = byte & caseMask; return insensitiveByte; } unsigned char appropriateBitMask(unsigned char byte, bool caseSensitive) { if (true==caseSensitive) return 0xff; if (false==isalpha(byte)) return 0xff; return caseMask; } template void compute(D data, bool caseSensitive, T& matcher, T& masker, size_t& matchLength) { matchLength = std::min(sizeof(matcher.value()), data.size()); for(size_t count = 0; count < matchLength; count++) { unsigned char theByte = data.at(count); matcher.add(roller::applyCaseSensitivity(theByte, caseSensitive)); masker.add(roller::appropriateBitMask(theByte, caseSensitive)); } } } class RollerMatch32 { private: uint_fast32_t match; uint_fast32_t mask; size_t matchLength; public: RollerMatch32(const std::vector pattern, bool caseSensitive=true) { Roller32 matcher, masker; roller::compute(pattern, caseSensitive, matcher, masker, matchLength); match = matcher.value(); mask = masker.value(); } RollerMatch32(const std::string pattern, bool caseSensitive=true) { Roller32 matcher, masker; roller::compute(pattern, caseSensitive, matcher, masker, matchLength); match = matcher.value(); mask = masker.value(); } bool matches(const Roller32 roller) { return (match == (roller.value() & mask)); } size_t size() { return matchLength; } }; class RollerMatch64 { private: uint_fast64_t match; uint_fast64_t mask; size_t matchLength; public: RollerMatch64(const std::vector pattern, bool caseSensitive=true) { Roller64 matcher, masker; roller::compute(pattern, caseSensitive, matcher, masker, matchLength); match = matcher.value(); mask = masker.value(); } RollerMatch64(const std::string pattern, bool caseSensitive=true) { Roller64 matcher, masker; roller::compute(pattern, caseSensitive, matcher, masker, matchLength); match = matcher.value(); mask = masker.value(); } bool matches(const Roller64 roller) { return (match == (roller.value() & mask)); } size_t size() { return matchLength; } }; }