You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

roller.hpp 4.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. // roller.hpp
  2. // Copyright (C) 2022 MicroNeil Research Corporation.
  3. //
  4. // This software is released under the MIT license. See LICENSE.TXT.
  5. //
  6. // Roller is a naive rolling hash system for rapid string searches.
  7. // Roller32 accommodates matches up to 4 bytes.
  8. // Roller64 accommodates matches up to 8 bytes.
  9. #pragma once
  10. #include <cstdint>
  11. #include <ctype.h>
  12. #include <string>
  13. #include <vector>
  14. #include <iostream>
  15. #include <bitset>
  16. namespace codedweller {
  17. class Roller32 {
  18. private:
  19. uint_fast32_t roller;
  20. public:
  21. Roller32() : roller(0) {}
  22. uint_fast32_t value() const { return roller; }
  23. uint_fast32_t add(uint8_t byte) {
  24. roller = roller << 8 | byte;
  25. return roller;
  26. }
  27. };
  28. class Roller64 {
  29. private:
  30. uint_fast64_t roller;
  31. public:
  32. Roller64() : roller(0) {}
  33. uint_fast64_t value() const { return roller; }
  34. uint_fast64_t add(uint8_t byte) {
  35. roller = roller << 8 | byte;
  36. return roller;
  37. }
  38. };
  39. namespace roller{
  40. static const unsigned char caseBit = 'A' ^ 'a';
  41. static const unsigned char caseMask = ~caseBit;
  42. unsigned char applyCaseSensitivity(unsigned char byte, bool caseSensitive) {
  43. if (true==caseSensitive) return byte;
  44. if (false==isalpha(byte) && 0xFF != byte) return byte;
  45. unsigned char insensitiveByte = byte & caseMask;
  46. return insensitiveByte;
  47. }
  48. unsigned char appropriateBitMask(unsigned char byte, bool caseSensitive) {
  49. if (true==caseSensitive) return 0xff;
  50. if (false==isalpha(byte)) return 0xff;
  51. return caseMask;
  52. }
  53. template <class D, class T>
  54. void compute(D data, bool caseSensitive, T& matcher, T& masker, size_t& matchLength) {
  55. matchLength = std::min(sizeof(matcher.value()), data.size());
  56. for(size_t count = 0; count < matchLength; count++) {
  57. unsigned char theByte = data.at(count);
  58. matcher.add(roller::applyCaseSensitivity(theByte, caseSensitive));
  59. masker.add(roller::appropriateBitMask(theByte, caseSensitive));
  60. }
  61. }
  62. }
  63. class RollerMatch32 {
  64. private:
  65. uint_fast32_t match;
  66. uint_fast32_t mask;
  67. size_t matchLength;
  68. public:
  69. RollerMatch32(const std::vector<unsigned char> pattern, bool caseSensitive=true) {
  70. Roller32 matcher, masker;
  71. roller::compute(pattern, caseSensitive, matcher, masker, matchLength);
  72. match = matcher.value();
  73. mask = masker.value();
  74. }
  75. RollerMatch32(const std::string pattern, bool caseSensitive=true) {
  76. Roller32 matcher, masker;
  77. roller::compute(pattern, caseSensitive, matcher, masker, matchLength);
  78. match = matcher.value();
  79. mask = masker.value();
  80. }
  81. bool matches(const Roller32 roller) {
  82. return (match == (roller.value() & mask));
  83. }
  84. size_t size() { return matchLength; }
  85. };
  86. class RollerMatch64 {
  87. private:
  88. uint_fast64_t match;
  89. uint_fast64_t mask;
  90. size_t matchLength;
  91. public:
  92. RollerMatch64(const std::vector<unsigned char> pattern, bool caseSensitive=true) {
  93. Roller64 matcher, masker;
  94. roller::compute(pattern, caseSensitive, matcher, masker, matchLength);
  95. match = matcher.value();
  96. mask = masker.value();
  97. }
  98. RollerMatch64(const std::string pattern, bool caseSensitive=true) {
  99. Roller64 matcher, masker;
  100. roller::compute(pattern, caseSensitive, matcher, masker, matchLength);
  101. match = matcher.value();
  102. mask = masker.value();
  103. }
  104. bool matches(const Roller64 roller) {
  105. return (match == (roller.value() & mask));
  106. }
  107. size_t size() { return matchLength; }
  108. };
  109. }