文件压缩系统(未完待续


AMDyesIntelno/c_cpp_code

基于哈夫曼编码进行文件(文件夹)压缩

核心逻辑

int main(int argc, char *argv[]) {
    string name, text, key, plaintext, ciphertext;
    vector<string> path;
    int sum, size = 0;
    int weight[16] = {0};
    H_Tree HT = NULL;
    H_Code HC = NULL;
    if (strcmp(argv[1], "-h") == 0) {//help
        cout << "tar: ./xxxx -t file1 file2 file3 [-p password] -o file_out" << endl;
        cout << "extract: ./xxxx -e file [-p password]" << endl;
        /*
         -t /home/zen/asdf /home/zen/ubuntu_setup_env.sh -o /home/zen/out
         -e /home/zen/out
        */
    }
    else if (strcmp(argv[1], "-t") == 0) {//压缩
        int i = 2;
        while (i < argc && strcmp(argv[i], "-p") != 0 && strcmp(argv[i], "-o") != 0) {
            name = argv[i];
            list_dir(name, path);
            if (size == path.size()) {
                path.push_back(name);
            }
            size = path.size();
            ++i;
        }
        for (int i = 0; i < path.size(); ++i) {
            cout << path[i] << endl;
            tar_convert_file(path[i], text);
        }
        if (i < argc && strcmp(argv[i], "-p") == 0) {
            key = argv[i + 1];
            i += 2;
        }
        if (i < argc && strcmp(argv[i], "-o") == 0) {
            name = argv[i + 1];
        }
        get_weight(text, weight, plaintext);
        Create_Huffman_Tree(HT, weight);
        Create_Huffman_Code(HT, HC);
        encode(plaintext, text, HC);
        if (key.size() > 0) {
            encrypt(key, plaintext, ciphertext);
        }
        else {
            ciphertext.erase();
            ciphertext = plaintext;
        }
        binary_to_hex(ciphertext);
        write_file(name, ciphertext);
    }
    else if (strcmp(argv[1], "-e") == 0) {//解压
        name = argv[2];
        if (argc > 3) {
            if (strcmp(argv[3], "-p") == 0) {
                if (argc > 4) {
                    key = argv[4];
                }
            }
        }
        if (key.size() > 0) {
            extract_convert_file(name, ciphertext);
            decrypt(key, plaintext, ciphertext);
        }
        else {
            extract_convert_file(name, plaintext);
        }
        sum = recover(weight, plaintext);
        Create_Huffman_Tree(HT, weight);
        decode(plaintext, HT, sum);
        spilt_and_extract(plaintext);
    }
    delete HT;
    HT = NULL;
    delete HC;
    HC = NULL;
    return 0;
}

文件操作部分

1.递归地列出文件夹中的所有文件,包括子文件夹中的文件

How can I get the list of files in a directory using C or C++?

bool list_dir(string posi, vector<string> &path) {
    struct dirent *entry;
    DIR *dir = opendir(posi.c_str());

    if (dir == NULL) {
        return 0;
    }
    while ((entry = readdir(dir)) != NULL) {
        if (entry->d_name[0] != '.') {
            string temp = posi + '/' + string(entry->d_name);
            if (list_dir((temp.c_str()), path)) {//当前路径为文件夹,则进入文件夹不向vector中添加文件夹的路径
                continue;
            }
            else {//当前为文件,添加路径
                path.push_back(temp);
            }
        }
    }
    closedir(dir);
    return 1;
}

2.文件内容转换为16进制

void tar_convert_file(const string &name, string &text) {/*读取要压缩的文件并转换为16进制*/
    ifstream fp(name);
    if (!fp) {//当没有该文件时,直接退出
        exit(-1);
    }
    for (int i = 0; i < name.size(); ++i) {//文件名转16进制
        if (name[i] / 16 < 10) {
            text += (char) (name[i] / 16 + '0');
        }
        else {
            text += (char) (name[i] / 16 - 10 + 'A');
        }
        if (name[i] % 16 < 10) {
            text += (char) (name[i] % 16 + '0');
        }
        else {
            text += (char) (name[i] % 16 - 10 + 'A');
        }
    }
    text += "0A";//加一个换行符
    int ch, sum = 0;//注意ch用int类型表示,sum用来记录长度
    string temp;
    ch = fp.get();
    while (ch != EOF) {//文件内容转16进制
        if (ch / 16 < 10) {
            temp += (char) (ch / 16 + '0');
        }
        else {
            temp += (char) (ch / 16 - 10 + 'A');
        }
        if (ch % 16 < 10) {
            temp += (char) (ch % 16 + '0');
        }
        else {
            temp += (char) (ch % 16 - 10 + 'A');
        }
        ++sum;
        ch = fp.get();
    }
    string str_sum;
    str_sum = to_string(sum);
    if (str_sum.size() % 2) {//保证转换出的字符串的长度是偶数
        str_sum = "0" + str_sum;
    }
    text += str_sum;//写入文件大小
    text += "0A";//加一个换行符
    text += temp;//写入文件内容
    text += "0A";//加一个换行符
    fp.close();
    return;
}

void extract_convert_file(const string &name, string &text) {/*读取要解压的文件并转换为16进制*/
    ifstream fp(name);
    if (!fp) {//当没有该文件时,直接退出
        exit(-1);
    }
    int ch;
    char hex[8];
    ch = fp.get();
    while (ch != EOF) {//文件内容转16进制
        for (int j = 7; j >= 0; --j) {
            hex[j] = '0' + ch % 2;
            ch >>= 1;
        }
        for (int j = 0; j < 8; ++j) {
            text += hex[j];
        }
        ch = fp.get();
    }
    fp.close();
    return;
}

3.将哈夫曼编码后的二进制转换为16进制

void binary_to_hex(string &text) {/*二进制(哈夫曼编码后的文件内容)转16进制*/
    string temp, hex_string;
    int num;
    for (int i = 0; i < text.size(); i += 8) {
        for (int j = 0; j < 8; ++j) {
            temp += text[i + j];
        }
        num = stoi(temp, nullptr, 2);
        temp.erase();
        if (num / 16 < 10) {
            hex_string += (char) (num / 16 + '0');
        }
        else {
            hex_string += (char) (num / 16 - 10 + 'A');
        }
        if (num % 16 < 10) {
            hex_string += (char) (num % 16 + '0');
        }
        else {
            hex_string += (char) (num % 16 - 10 + 'A');
        }
    }
    text.erase();
    text = hex_string;
    return;
}

4.将哈夫曼解码后的文件内容分离并准备恢复文件

void spilt_and_extract(string &plaintext) {/*将哈夫曼解码后的文件内容(16进制)分离并重新写入*/
    string name, text;
    int check = 1, num = 0;
    string temp;
    for (int i = 0; i < plaintext.size(); i += 2) {
        if (check == 1) {/*文件名分离*/
            if (plaintext[i] == '0' && plaintext[i + 1] == 'A') {
                check = 2;
            }
            else {
                int temp1 = 0, temp2 = 0;
                if (plaintext[i] >= '0' && plaintext[i] <= '9') {
                    temp1 = plaintext[i] - '0';
                }
                else {
                    temp1 = plaintext[i] - 'A' + 10;
                }
                if (plaintext[i + 1] >= '0' && plaintext[i + 1] <= '9') {
                    temp2 = plaintext[i + 1] - '0';
                }
                else {
                    temp2 = plaintext[i + 1] - 'A' + 10;
                }
                name += (char) (temp1 * 16 + temp2);
            }
        }
        else if (check == 2) {/*文件大小分离*/
            if (plaintext[i] == '0' && plaintext[i + 1] == 'A') {
                check = 3;
                num = stoi(temp, nullptr, 10);
                temp.erase();
            }
            else {
                temp += plaintext[i];
                temp += plaintext[i + 1];
            }
        }
        else {/*文件内容分离*/
            int j;
            for (j = i; j < num * 2 + i; j += 2) {
                text += plaintext[j];
                text += plaintext[j + 1];
            }
            cout << name << endl;
            write_file(name, text);/*文件写入*/
            i = j;
            check = 1;
            name.erase();
            text.erase();
        }
    }
}

5.创建文件并写入

void write_file(const string &name, string &text) {/*将16进制的文件内容写到新的文件中*/
    for (int i = 1; i < name.size(); ++i) {
        if (name[i] == '/') {
            string temp(name, 0, i);
            ifstream fp(temp);
            if (!fp) {
                temp = "mkdir " + temp;
                //cout << temp << endl;
                system(temp.c_str());
                fp.close();
            }
        }
    }
    ofstream fp(name, ios_base::binary | ios_base::out);
    char hex_ch[3];
    hex_ch[2] = 0;
    stringstream input(text);
    input.flags(ios_base::hex);
    for (int i = 0; i < text.size(); i += 2) {
        hex_ch[0] = text[i];
        hex_ch[1] = text[i + 1];
        long data = strtol(hex_ch, nullptr, 16);
        fp << static_cast<unsigned char>(data & 0xff);
    }
    fp.close();
    return;
}

加/解密部分

异或加密/解密

void encrypt(string &key, string &plaintext, string &ciphertext) {/*异或加密*/
    string temp;
    temp = key;
    key.erase();
    char hex[8];
    for (int i = 0; i < temp.size(); ++i) {/*将key转换为二进制*/
        for (int j = 7; j >= 0; --j) {
            hex[j] = '0' + temp[i] % 2;
            temp[i] >>= 1;
        }
        for (int j = 0; j < 8; ++j) {
            key += hex[j];
        }
    }
    for (int i = 0, j = 0; i < plaintext.size(); ++i) {/*加密*/
        ciphertext += (char) ((key[j] - '0') ^ (plaintext[i] - '0') + 48);
        ++j;
        if (j == key.size()) {
            j = 0;
        }
    }
    return;
}

void decrypt(string &key, string &plaintext, string &ciphertext) {/*异或解密*/
    string temp;
    temp = key;
    key.erase();
    char hex[8];
    for (int i = 0; i < temp.size(); ++i) {/*将key转换为二进制*/
        for (int j = 7; j >= 0; --j) {
            hex[j] = '0' + temp[i] % 2;
            temp[i] >>= 1;
        }
        for (int j = 0; j < 8; ++j) {
            key += hex[j];
        }
    }
    for (int i = 0, j = 0; i < ciphertext.size(); ++i) {/*解密*/
        plaintext += (char) ((ciphertext[i] - '0') ^ (key[j] - '0') + 48);
        ++j;
        if (j == key.size()) {
            j = 0;
        }
    }
    return;
}

哈夫曼编/解码部分

1.统计字符权重

void get_weight(string text, int *weight, string &plaintext) {/*统计每个字符的权重*/
    int temp[16] = {0};
    for (int i = 0; i < text.size(); ++i) {
        if (text[i] >= '0' && text[i] <= '9') {
            weight[text[i] - '0']++;
            temp[text[i] - '0']++;
        }
        if (text[i] >= 'A' && text[i] <= 'F') {
            weight[text[i] - 'A' + 10]++;
            temp[text[i] - 'A' + 10]++;
        }
    }
    char hex[32];
    for (int i = 0; i < 16; ++i) {/*将字符权重写入明文*/
        for (int j = 31; j >= 0; --j) {
            hex[j] = '0' + temp[i] % 2;
            temp[i] >>= 1;
        }
        for (int j = 0; j < 32; ++j) {
            plaintext += hex[j];
        }
        memset(hex, 0, 32);
    }
    return;
}

2.在权重表中获取权重最小的两个字符/节点

void min_2(H_Tree HT, int _size, int &posi1, int &posi2) {//获取权重最小的两个结点
    //先获取两个根结点为零的结点,然后进行权重比较
    int i = 1;
    for (i; i <= _size; ++i) {
        if (HT[i].root == 0) {
            posi1 = i;
            ++i;
            break;
        }
    }
    for (i; i <= _size; ++i) {
        if (HT[i].root == 0) {
            posi2 = i;
            ++i;
            break;
        }
    }
    if (HT[posi1].weight >= HT[posi2].weight) {//使posi1的权重小于posi2的权重
        swap(posi1, posi2);
    }
    for (i; i <= _size; ++i) {
        if (HT[i].root == 0) {
            if (HT[i].weight < HT[posi2].weight) {
                if (HT[i].weight < HT[posi1].weight) {
                    posi2 = posi1;
                    posi1 = i;
                }
                else {
                    posi2 = i;
                }
            }
        }
    }
    return;
}

3.利用得到的两个最小权重字符/节点构建哈夫曼树

void Create_Huffman_Tree(H_Tree &HT, int *weight) {//构建哈夫曼树
    int m = 2 * 16 - 1;
    HT = new Huffman[m + 1];//分配空间要+1
    for (int i = 1; i <= 16; ++i) {//为哈夫曼树的叶子结点写入权重和字符
        if (i <= 10) {
            HT[i].ch = (char) ('0' + i - 1);
            HT[i].weight = weight[i - 1];
        }
        else {
            HT[i].ch = (char) ('A' + i - 11);
            HT[i].weight = weight[i - 1];
        }
    }
    for (int i = 17; i <= m; ++i) {//正式开始构建哈夫曼树,叶子结点的左孩子和右孩子结点置为0,便于译码
        int posi1, posi2;
        min_2(HT, i - 1, posi1, posi2);
        HT[posi1].root = i;
        HT[posi2].root = i;
        HT[i].left = posi1;
        HT[i].right = posi2;
        HT[i].weight = HT[posi1].weight + HT[posi2].weight;//叠加权重
    }
    return;
}

4.构建哈夫曼编码表

void Create_Huffman_Code(H_Tree HT, H_Code &HC) {//构建哈夫曼编码表,逆序编码
    HC = new char *[17];
    char *temp;
    temp = new char[16];
    temp[15] = 0;
    for (int i = 1; i <= 16; ++i) {
        int start = 15;
        for (int j = i, root = HT[i].root; root != 0; j = root, root = HT[root].root) {
            //从叶子结点开始,向前回溯,若该结点是前一个结点的左孩子,则记为0,否则,记为1
            //当回溯到哈夫曼树的根结点时,回溯结束,将当前所得字符串复制到哈夫曼编码表中,开始新的一轮编码
            if (HT[root].left == j) {//约定左分支为0,右分支为1,注意此时为逆序编码
                temp[--start] = '0';
            }
            else {
                temp[--start] = '1';
            }
        }
        HC[i] = new char[16 - start];
        strcpy(HC[i], &temp[start]);
    }
    delete[]temp;
    return;
}

5.利用哈夫曼编码表进行编码/利用哈夫曼树进行解码

void encode(string &plaintext, string text, H_Code HC) {//编码
    for (int i = 0; i < text.size(); ++i) {
        if (text[i] >= '0' && text[i] <= '9') {
            plaintext += HC[text[i] - '0' + 1];
        }
        if (text[i] >= 'A' && text[i] <= 'F') {
            plaintext += HC[text[i] - 'A' + 11];
        }
    }
    for (int i = 0; i < plaintext.size() % 8; ++i) {//保证2进制能够顺利转换为16进制,对哈夫曼编码后的文件内容进行补零
        plaintext += '0';
    }
    return;
}

void decode(string &plaintext, H_Tree HT, int sum) {//解码
    int root = 2 * 16 - 1;
    string temp;
    for (int i = 0; i < plaintext.size(); ++i) {
        if (sum <= 0) {//权重部分记录的sum
            break;
        }
        if (plaintext[i] == '0') {//遇到0向左子树搜索
            root = HT[root].left;
        }
        else if (plaintext[i] == '1') {//遇到1向右子树搜索
            root = HT[root].right;
        }
        if (HT[root].right == 0 && HT[root].left == 0) {//左右子树均为0,说明这是叶子结点
            temp += HT[root].ch;
            root = 2 * 16 - 1;//重置根结点
            --sum;
        }
    }
    plaintext.erase();
    plaintext = temp;
    return;
}

6.从文件中读取字符权重

int recover(int *weight, string &plaintext) {/*文件解密后(二进制形式),读取字符权重*/
    fill(weight, weight + 16, 0);
    int sum = 0;//sum用于在进行解码时剔除在保存字符权重时额外添加的0
    string temp;
    for (int i = 0, j = 0; i < 512; ++i) {
        if (i && i % 32 == 0) {
            weight[j] = stoi(temp, nullptr, 2);
            sum += weight[j++];
            temp.erase();
        }
        temp += plaintext[i];
    }
    weight[15] = stoi(temp, nullptr, 2);//最后一个权重在循环中没有读取
    sum += weight[15];
    plaintext.erase(0, 512);//清除明文中的权重记录
    return sum;
}

文章作者: Misaka
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 Misaka !
 上一篇
VMware虚拟机桥接网卡的设置 VMware虚拟机桥接网卡的设置
在使用虚拟机的过程中,出现了桥接网络无法获取IP的情况,现记录解决方法 1.在菜单栏中选择Edit→Virtual Network Editor 2.点击Change Settings,使用管理员权限修改 3.在桥接模式的设定中,选
2020-07-26 Misaka
下一篇 
Hyper-V构建CTF环境 Hyper-V构建CTF环境
使用原因在开启WSL2后,VMware的正式版不能正常使用,所以我一开始尝试了VMware的测试版(现在的15.5版本已经可以支持了) VMware的博客和对测试版的介绍 VMware的博客和对15.5版的介绍 总的来说,在使用测试版时没有
2020-07-26 Misaka
  目录