Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2023-03-17 11:26:50

0001 #include <stdio.h>
0002 #include <string.h>
0003 #include <sys/types.h>
0004 #include <sys/stat.h>
0005 #include <fcntl.h>
0006 #include <stdlib.h>
0007 
0008 #include "dablooms.h"
0009 
0010 #define CAPACITY 5000
0011 #define ERROR_RATE 0.0002
0012 
0013 static void chomp_line(char *word)
0014 {
0015     char *p;
0016     if ((p = strchr(word, '\r'))) {
0017         *p = '\0';
0018     }
0019     if ((p = strchr(word, '\n'))) {
0020         *p = '\0';
0021     }
0022 }
0023 
0024 int generate_bloom_filter(const char *bloom_file, const char *words_file)
0025 {
0026     FILE *fp;
0027     char word[1024];
0028     scaling_bloom_t *bloom;
0029     int i;
0030     
0031     if (!(bloom = new_scaling_bloom(CAPACITY, ERROR_RATE, bloom_file))) {
0032         fprintf(stderr, "ERROR: Could not create bloom filter\n");
0033         return EXIT_FAILURE;
0034     }
0035     
0036     if (!(fp = fopen(words_file, "r"))) {
0037         fprintf(stderr, "ERROR: Could not open words file\n");
0038         return EXIT_FAILURE;
0039     }
0040     
0041     for (i = 0; fgets(word, sizeof(word), fp); i++) {
0042         chomp_line(word);
0043         scaling_bloom_add(bloom, word, strlen(word), i);
0044     }
0045     
0046     int result = bitmap_flush(bloom->bitmap);
0047     
0048     return result;
0049 }
0050 
0051 
0052 int main(int argc, char *argv[])
0053 {
0054     printf("** dablooms version: %s\n", dablooms_version());
0055     
0056     if (argc != 3) {
0057         fprintf(stderr, "Usage: %s <bloom_file> <words_file>\n", argv[0]);
0058         return EXIT_FAILURE;
0059     }
0060 
0061     int result = generate_bloom_filter(argv[1], argv[2]);
0062     return result;
0063 }