]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Btrfs: heuristic: add detection of repeated data patterns
authorTimofey Titovets <nefelim4ag@gmail.com>
Thu, 28 Sep 2017 14:33:39 +0000 (17:33 +0300)
committerDavid Sterba <dsterba@suse.com>
Wed, 1 Nov 2017 19:45:36 +0000 (20:45 +0100)
Walk over data sample and use memcmp to detect repeated patterns, like
zeros, but a bit more general.

Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ minor coding style fixes ]
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/compression.c

index 0e1561cc9578b9c395c7c67052ec97c19652bfab..0d445c815ca27019579cac23b0e53ba91b01771e 100644 (file)
@@ -1222,6 +1222,14 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
        return 1;
 }
 
+static bool sample_repeated_patterns(struct heuristic_ws *ws)
+{
+       const u32 half_of_sample = ws->sample_size / 2;
+       const u8 *data = ws->sample;
+
+       return memcmp(&data[0], &data[half_of_sample], half_of_sample) == 0;
+}
+
 static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
                                     struct heuristic_ws *ws)
 {
@@ -1301,6 +1309,11 @@ int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
 
        heuristic_collect_sample(inode, start, end, ws);
 
+       if (sample_repeated_patterns(ws)) {
+               ret = 1;
+               goto out;
+       }
+
        memset(ws->bucket, 0, sizeof(*ws->bucket)*BUCKET_SIZE);
 
        for (i = 0; i < ws->sample_size; i++) {
@@ -1308,8 +1321,8 @@ int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
                ws->bucket[byte].count++;
        }
 
+out:
        __free_workspace(0, ws_list, true);
-
        return ret;
 }