2022-05-27 18:26:48 +02:00
|
|
|
#include "DataIngestionLayer.h"
|
|
|
|
#include "CommandlineInterface.h"
|
2022-05-31 15:32:17 +02:00
|
|
|
#include "Bases.h"
|
2022-05-27 18:26:48 +02:00
|
|
|
#include <iostream>
|
2022-05-31 14:25:23 +02:00
|
|
|
#include <istream>
|
|
|
|
#include <fstream>
|
|
|
|
#include <sstream>
|
2022-05-27 18:26:48 +02:00
|
|
|
#include <cstring>
|
|
|
|
|
|
|
|
using namespace IO;
|
|
|
|
|
|
|
|
void DataIngestionLayer::Init() {
|
|
|
|
|
|
|
|
// Set our istream
|
|
|
|
switch (Configuration::inputFrom) {
|
|
|
|
|
|
|
|
// Are we reading from stdin?
|
|
|
|
case Configuration::INPUT_FROM::STDIN:
|
|
|
|
|
|
|
|
// Redirect our istream to stdin
|
|
|
|
in = &std::cin;
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Are we reading from a file?
|
|
|
|
case Configuration::INPUT_FROM::FILE:
|
|
|
|
|
|
|
|
// Open the file
|
|
|
|
ifs.open(
|
|
|
|
Configuration::inputFilename,
|
|
|
|
std::ios::in | std::ios::binary
|
|
|
|
);
|
|
|
|
|
|
|
|
// A little bit of error handling
|
|
|
|
if (!ifs.good()) {
|
|
|
|
throw std::runtime_error( "Unable to open infilestream!");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Redirect our istream to this infilestream
|
|
|
|
in = &ifs;
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Are we reading from a parameter?
|
|
|
|
case Configuration::INPUT_FROM::PARAMETER:
|
|
|
|
|
|
|
|
// Create an instringstream with our parameter
|
|
|
|
iss = std::istringstream(
|
|
|
|
CommandlineInterface::Get()["--intext"].GetString()
|
|
|
|
);
|
|
|
|
|
2022-05-31 10:44:05 +02:00
|
|
|
// Redirect our istream to this instringstream
|
2022-05-27 18:26:48 +02:00
|
|
|
in = &iss;
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2022-05-31 15:32:17 +02:00
|
|
|
// Determine which iobase format to read in
|
|
|
|
// If we are decrypting, input is formatted.
|
|
|
|
if (Configuration::activeModule == Configuration::MODULE::DECRYPTION) {
|
|
|
|
inFormat = Configuration::iobaseFormat;
|
|
|
|
}
|
|
|
|
// If we are doing anything else, input is raw bytes.
|
|
|
|
else {
|
|
|
|
inFormat = Configuration::IOBASE_FORMAT::BASE_BYTES;
|
|
|
|
}
|
|
|
|
|
2022-05-31 14:25:23 +02:00
|
|
|
initialized = true;
|
|
|
|
reachedEof = false;
|
|
|
|
|
2022-05-31 15:32:17 +02:00
|
|
|
return;
|
|
|
|
}
|
2022-05-27 18:26:48 +02:00
|
|
|
|
2022-05-31 15:32:17 +02:00
|
|
|
void DataIngestionLayer::Destruct() {
|
2022-05-27 18:26:48 +02:00
|
|
|
|
2022-05-31 15:32:17 +02:00
|
|
|
if (Configuration::inputFrom == Configuration::INPUT_FROM::FILE) {
|
|
|
|
ifs.close();
|
|
|
|
}
|
2022-05-27 18:26:48 +02:00
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2022-05-31 14:25:23 +02:00
|
|
|
void DataIngestionLayer::ReadBlock() {
|
|
|
|
if (!initialized) {
|
|
|
|
throw std::runtime_error("Attempted to read on uninitialized DataIngestionLayer!");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!reachedEof) {
|
2022-05-31 15:32:17 +02:00
|
|
|
// This should really account for iobase recoding!
|
|
|
|
|
2022-05-31 14:25:23 +02:00
|
|
|
// Create buffer to read into
|
|
|
|
char buf[Block::BLOCK_SIZE];
|
|
|
|
memset(buf, 0, sizeof(buf));
|
|
|
|
|
|
|
|
// Read
|
|
|
|
in->read(buf, sizeof(buf));
|
|
|
|
|
|
|
|
// Fetch how much we've read
|
|
|
|
const std::size_t n_bytes_read = in->gcount();
|
|
|
|
|
|
|
|
// Is this fewer bytes than we requested?
|
|
|
|
if (n_bytes_read < sizeof(buf)) {
|
|
|
|
// Yes: EOF reached.
|
|
|
|
reachedEof = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Construct a Block from this buf
|
|
|
|
Block block;
|
2022-05-31 15:32:17 +02:00
|
|
|
block.FromByteString(std::string(buf, sizeof(buf)));
|
2022-05-31 14:25:23 +02:00
|
|
|
|
|
|
|
// Enqueue it
|
|
|
|
blocks.emplace(block);
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool DataIngestionLayer::ReachedEOF() {
|
|
|
|
return reachedEof;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool DataIngestionLayer::IsBlockReady() {
|
2022-05-31 15:32:17 +02:00
|
|
|
// We're not ready, if we haven't reached EOF, if we should puffer
|
|
|
|
// the input.
|
|
|
|
if (
|
|
|
|
(CommandlineInterface::Get().HasParam("--puffer-input")) &&
|
|
|
|
(!reachedEof)
|
|
|
|
) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we're not puffering, just return whether or not
|
|
|
|
// we have any blocks...
|
2022-05-31 14:25:23 +02:00
|
|
|
return blocks.size() > 0;
|
|
|
|
}
|
|
|
|
|
2022-05-31 15:32:17 +02:00
|
|
|
bool DataIngestionLayer::IsFinished() {
|
|
|
|
return (reachedEof) && (blocks.size() == 0);
|
|
|
|
}
|
|
|
|
|
2022-05-31 14:25:23 +02:00
|
|
|
Block DataIngestionLayer::GetNextBlock() {
|
|
|
|
if (!IsBlockReady()) {
|
|
|
|
throw std::runtime_error("Attempted to get the next block, but there are none left!");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Why... why not just return a T in pop()???
|
|
|
|
const Block popped = blocks.front();
|
|
|
|
blocks.pop();
|
|
|
|
return popped;
|
|
|
|
}
|
2022-05-27 18:26:48 +02:00
|
|
|
|
|
|
|
std::istream* DataIngestionLayer::in;
|
|
|
|
std::ifstream DataIngestionLayer::ifs;
|
|
|
|
std::istringstream DataIngestionLayer::iss;
|
2022-05-31 14:25:23 +02:00
|
|
|
bool DataIngestionLayer::reachedEof = false;
|
|
|
|
bool DataIngestionLayer::initialized = false;
|
2022-05-31 15:32:17 +02:00
|
|
|
Configuration::IOBASE_FORMAT DataIngestionLayer::inFormat;
|
2022-05-31 14:25:23 +02:00
|
|
|
std::queue<Block> DataIngestionLayer::blocks;
|
2022-05-27 18:26:48 +02:00
|
|
|
|