123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133 |
- // roller.hpp
- // Copyright (C) 2022 MicroNeil Research Corporation.
- //
- // This software is released under the MIT license. See LICENSE.TXT.
- //
- // Roller is a naive rolling hash system for rapid string searches.
- // Roller32 accommodates matches up to 4 bytes.
- // Roller64 accommodates matches up to 8 bytes.
-
- #pragma once
-
- #include <cstdint>
- #include <ctype.h>
- #include <string>
- #include <vector>
-
- #include <iostream>
- #include <bitset>
-
- namespace codedweller {
-
- class Roller32 {
- private:
- uint_fast32_t roller;
-
- public:
- Roller32() : roller(0) {}
- uint_fast32_t value() const { return roller; }
- uint_fast32_t add(uint8_t byte) {
- roller = roller << 8 | byte;
- return roller;
- }
- };
-
- class Roller64 {
- private:
- uint_fast64_t roller;
-
- public:
- Roller64() : roller(0) {}
- uint_fast64_t value() const { return roller; }
- uint_fast64_t add(uint8_t byte) {
- roller = roller << 8 | byte;
- return roller;
- }
- };
-
- namespace roller{
- static const unsigned char caseBit = 'A' ^ 'a';
- static const unsigned char caseMask = ~caseBit;
-
- unsigned char applyCaseSensitivity(unsigned char byte, bool caseSensitive) {
- if (true==caseSensitive) return byte;
- if (false==isalpha(byte) && 0xFF != byte) return byte;
- unsigned char insensitiveByte = byte & caseMask;
- return insensitiveByte;
- }
-
- unsigned char appropriateBitMask(unsigned char byte, bool caseSensitive) {
- if (true==caseSensitive) return 0xff;
- if (false==isalpha(byte)) return 0xff;
- return caseMask;
- }
-
- template <class D, class T>
- void compute(D data, bool caseSensitive, T& matcher, T& masker, size_t& matchLength) {
- matchLength = std::min(sizeof(matcher.value()), data.size());
- for(size_t count = 0; count < matchLength; count++) {
- unsigned char theByte = data.at(count);
- matcher.add(roller::applyCaseSensitivity(theByte, caseSensitive));
- masker.add(roller::appropriateBitMask(theByte, caseSensitive));
- }
- }
- }
-
- class RollerMatch32 {
- private:
- uint_fast32_t match;
- uint_fast32_t mask;
- size_t matchLength;
-
- public:
- RollerMatch32(const std::vector<unsigned char> pattern, bool caseSensitive=true) {
- Roller32 matcher, masker;
- roller::compute(pattern, caseSensitive, matcher, masker, matchLength);
- match = matcher.value();
- mask = masker.value();
- }
-
- RollerMatch32(const std::string pattern, bool caseSensitive=true) {
- Roller32 matcher, masker;
- roller::compute(pattern, caseSensitive, matcher, masker, matchLength);
- match = matcher.value();
- mask = masker.value();
- }
-
- bool matches(const Roller32 roller) {
- return (match == (roller.value() & mask));
- }
-
- size_t size() { return matchLength; }
- };
-
- class RollerMatch64 {
- private:
- uint_fast64_t match;
- uint_fast64_t mask;
- size_t matchLength;
-
- public:
- RollerMatch64(const std::vector<unsigned char> pattern, bool caseSensitive=true) {
- Roller64 matcher, masker;
- roller::compute(pattern, caseSensitive, matcher, masker, matchLength);
- match = matcher.value();
- mask = masker.value();
- }
-
- RollerMatch64(const std::string pattern, bool caseSensitive=true) {
- Roller64 matcher, masker;
- roller::compute(pattern, caseSensitive, matcher, masker, matchLength);
- match = matcher.value();
- mask = masker.value();
- }
-
- bool matches(const Roller64 roller) {
- return (match == (roller.value() & mask));
- }
-
- size_t size() { return matchLength; }
- };
- }
-
|