• Include necessary libraries:

    #include <iostream>
    #include <sstream>
    #include <fstream>
    #include <string>
    #include <vector>
    #include <map>
    
  • Alias necessary things:

    using std::cout;
    using std::endl;
    using std::string;
    using std::vector;
    using Data = std::map<char, vector<size_t>>;  // <char, vector of positions>
    
    • Data — hashmap with characters as keys and lists of positions as values
  • Declare necessary globals:
    string filename_original = "input.txt";
    string filename_encoded = "output.txt";
    
  • Define necessary functions:

    Data* encode(string);
    void dump(const Data&, string);
    string decode(string);
    
    • encode — encodes file with given filename and returns encoded data as Data*

    • dump — writes given data into file with given filename

    • decode — restores back the original text from encoded file with given filename and returns it as string


  • The main function:

    int main() {
      Data encoded_data = *encode(filename_original);
      dump(encoded_data, filename_encoded);
      string restored = decode(filename_encoded);
    
      cout << "[+] Restored file:" << endl;
      cout << restored << endl;
    
      cout << "[@] END." << endl;
    }
    

    Explanation:

    • Simply encode — pass filename to function encode

      Data encoded_data = *encode(filename_original);
      
    • Then dump that data — pass encoded data and filename to function dump

      dump(encoded_data, filename_encoded);
      
    • Check result by decoding back — pass first filename to function decode

      string restored = decode(filename_encoded);
      
    • Finish by printing result

      cout << "[+] Restored file:" << endl;
      cout << restored << endl;
      

  • Encode function:

    Data* encode(string filename) {
      cout << "[*] Encoding..." << endl;
      std::ifstream fi { filename };  // Input file
    
      if (fi) {
          Data* data = new Data;
          size_t i = 0;  // Current position
          char c;  // Current character
    
          while (fi.get(c)) {
              if (data->find(c) != data->end()) {
                  (*data)[c].push_back(i);
              } else {
                  (*data)[c] = { i };
              }
    
              ++i;
          }
    
          fi.close();
          cout << "[+] Encoded (" << filename << ")" << endl;
          return data;
      } else {
          cout << "[-] Couldn't open " << filename << endl;
          return nullptr;
      }
    }
    

    Explanation:

    • Initialize input file stream, Data object and other necessary variables

      std::ifstream fi { filename };  // Input file
      
      Data* data = new Data;
      size_t i = 0;  // Current position
      char c;  // Current char
      
    • That line magically reads one more character at a time if can — if can't, then it drops

      while (fi.get(c)) {
      
    • If character c is already has been seen (c is within keys of data) then append it to the positions list (which is vector<size_t>), else create a new list with that character (= { i } — C++11 feature — inplace object creation). data is a pointer (Data*), so we must use arrow (->) to access its methods. Also we must convert it to Data (by (*data)) to use brackets for key-access ([c]).

      if (data->find(c) != data->end()) {
          (*data)[c].push_back(i);
      } else {
          (*data)[c] = { i };
      }
      
    • Do not forget to increase position...

      ++i;
      }
      
    • The job is done, so simply return generated data and do not forget to close the input-file-stream

      fi.close();
      return data;
      

  • Dump function:

    void dump(const Data& data, string filename) {
      cout << "[*] Writing to file..." << endl;
      std::ofstream fo { filename };  // Output file
    
      if (fo) {
          for (auto&& item : data) {
              fo << item.first;  // Write character
              for (size_t p : item.second) {  // Write positions
                  fo << ' ' << p;
              }
              fo << endl;
          }
    
          fo.close();
          cout << "[+] File written (" << filename << ")" << endl;
      } else {
          cout << "[-] Couldn't write to " << filename << endl;
      }
    }
    

    Explanation:

    • Again — initialization. Output file stream that time

      std::ofstream fo { filename };  // Output file
      
    • For-each C++11 syntax. item is a pair (std::pair<char, vector<size_t>> afaik) of character and positions list

      for (auto&& item : data) {
      
    • Simple write character, then an array of positions

      fo << item.first;  // Write character
      for (size_t p : item.second) {  // Write positions
          fo << ' ' << p;
      }
      fo << endl;
      
    • Close output-file-stream and we are done

      fo.close();
      

  • Decode function:

    string decode(string filename) {
      cout << "[*] Trying to restore original..." << endl;
      std::ifstream fe { filename_encoded };
    
      if (fe) {
          std::vector<char> v;  // Restored string as vector of chars
          // 1) Read first character
          char c;
    
          while (fe.get(c)) {
              // 2) Read rest of the line
              string line;
              std::getline(fe, line);
              std::stringstream ss { line };
    
              while (ss) {
                  size_t p;
                  ss >> p;
                  if (p >= v.size()) {  // Resize vector if need
                      v.resize(p+1);
                  }
                  v[p] = c;  // Set according character
              }
          }
    
          fe.close();
          cout << "[+] Original restored" << endl;
          return { v.begin(), v.end() };
      } else {
          cout << "[-] Couldn't open " << filename << endl;
          throw "Couldn't open file";
      }
    }
    

    Explanation:

    • Init. Still suprised?

      std::ifstream fe { filename_encoded };
      std::vector<char> v;  // Restored string as vector of chars
      char c;
      
    • These lines inside loop with loop itself magically do all the work — read first character of the line and then read remaining line. If there is no more chars to read then loop drops

      while (fe.get(c)) {
          string line;
          std::getline(fe, line);
      
    • Place the line than we just read into stringstream. That beast allows us to extract any token we need. And we need integers — positions of the character. Then we just set character in positions we pull. But we also have to resize our vector if pulled token doesn't fit into size

      std::stringstream ss { line };
      
      while (ss) {
          size_t p;
          ss >> p;
          if (p >= v.size()) {  // Resize vector if need
              v.resize(p+1);
          }
          v[p] = c;  // Set according character
      }
      
    • Close file-stream and return the text we just built. The convertion from vector<char> into string is done by the string constructor (implicitly called with {} syntax)

      fe.close();
      return { v.begin(), v.end() };
      

We are done.