|
|
|
|
|
|
|
|
|
|
|
// roller.hpp |
|
|
|
|
|
// Copyright (C) 2022 MicroNeil Research Corporation. |
|
|
|
|
|
// |
|
|
|
|
|
// This software is released under the MIT license. See LICENSE.TXT. |
|
|
|
|
|
// |
|
|
|
|
|
// Roller is a naive rolling hash system for rapid string searches. |
|
|
|
|
|
// Roller32 accommodates matches up to 4 bytes. |
|
|
|
|
|
// Roller64 accommodates matches up to 8 bytes. |
|
|
|
|
|
|
|
|
|
|
|
#pragma once |
|
|
|
|
|
|
|
|
|
|
|
#include <cstdint> |
|
|
|
|
|
#include <string> |
|
|
|
|
|
#include <vector> |
|
|
|
|
|
|
|
|
|
|
|
namespace codedweller { |
|
|
|
|
|
|
|
|
|
|
|
class Roller32 { |
|
|
|
|
|
private: |
|
|
|
|
|
uint_fast32_t roller; |
|
|
|
|
|
|
|
|
|
|
|
public: |
|
|
|
|
|
Roller32() : roller(0) {} |
|
|
|
|
|
uint_fast32_t value() const { return roller; } |
|
|
|
|
|
uint_fast32_t add(uint8_t byte) { |
|
|
|
|
|
roller = roller << 8 | byte; |
|
|
|
|
|
return roller; |
|
|
|
|
|
} |
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
class Roller64 { |
|
|
|
|
|
private: |
|
|
|
|
|
uint_fast64_t roller; |
|
|
|
|
|
|
|
|
|
|
|
public: |
|
|
|
|
|
Roller64() : roller(0) {} |
|
|
|
|
|
uint_fast64_t value() const { return roller; } |
|
|
|
|
|
uint_fast64_t add(uint8_t byte) { |
|
|
|
|
|
roller = roller << 8 | byte; |
|
|
|
|
|
return roller; |
|
|
|
|
|
} |
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
class RollerMatch32 { |
|
|
|
|
|
private: |
|
|
|
|
|
uint_fast32_t match; |
|
|
|
|
|
uint_fast32_t mask; |
|
|
|
|
|
|
|
|
|
|
|
public: |
|
|
|
|
|
RollerMatch32(const std::vector<unsigned char> pattern) { |
|
|
|
|
|
match = mask = 0; |
|
|
|
|
|
size_t ingest = std::min(sizeof(uint_fast32_t), pattern.size()); |
|
|
|
|
|
Roller32 matcher; |
|
|
|
|
|
Roller32 masker; |
|
|
|
|
|
for(size_t count = 0; count < ingest; count++) { |
|
|
|
|
|
matcher.add(pattern.at(count)); |
|
|
|
|
|
masker.add(0xFF); |
|
|
|
|
|
} |
|
|
|
|
|
match = matcher.value(); |
|
|
|
|
|
mask = masker.value(); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
RollerMatch32(const std::string pattern) { |
|
|
|
|
|
match = mask = 0; |
|
|
|
|
|
size_t ingest = std::min(sizeof(uint_fast32_t), pattern.size()); |
|
|
|
|
|
Roller32 matcher; |
|
|
|
|
|
Roller32 masker; |
|
|
|
|
|
for(size_t count = 0; count < ingest; count++) { |
|
|
|
|
|
matcher.add(pattern.at(count)); |
|
|
|
|
|
masker.add(0xFF); |
|
|
|
|
|
} |
|
|
|
|
|
match = matcher.value(); |
|
|
|
|
|
mask = masker.value(); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
bool matches(const Roller32 roller) { |
|
|
|
|
|
return (match == (roller.value() & mask)); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
class RollerMatch64 { |
|
|
|
|
|
private: |
|
|
|
|
|
uint_fast64_t match; |
|
|
|
|
|
uint_fast64_t mask; |
|
|
|
|
|
|
|
|
|
|
|
public: |
|
|
|
|
|
RollerMatch64(const std::vector<unsigned char> pattern) { |
|
|
|
|
|
match = mask = 0; |
|
|
|
|
|
size_t ingest = std::min(sizeof(uint_fast64_t), pattern.size()); |
|
|
|
|
|
Roller64 matcher; |
|
|
|
|
|
Roller64 masker; |
|
|
|
|
|
for(size_t count = 0; count < ingest; count++) { |
|
|
|
|
|
matcher.add(pattern.at(count)); |
|
|
|
|
|
masker.add(0xFF); |
|
|
|
|
|
} |
|
|
|
|
|
match = matcher.value(); |
|
|
|
|
|
mask = masker.value(); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
RollerMatch64(const std::string pattern) { |
|
|
|
|
|
match = mask = 0; |
|
|
|
|
|
size_t ingest = std::min(sizeof(uint_fast64_t), pattern.size()); |
|
|
|
|
|
Roller64 matcher; |
|
|
|
|
|
Roller64 masker; |
|
|
|
|
|
for(size_t count = 0; count < ingest; count++) { |
|
|
|
|
|
matcher.add(pattern.at(count)); |
|
|
|
|
|
masker.add(0xFF); |
|
|
|
|
|
} |
|
|
|
|
|
match = matcher.value(); |
|
|
|
|
|
mask = masker.value(); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
bool matches(const Roller64 roller) { |
|
|
|
|
|
return (match == (roller.value() & mask)); |
|
|
|
|
|
} |
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|