From f73ed48dbf8fe3c76e4e2584f12d4857bbda2285 Mon Sep 17 00:00:00 2001 From: DL6ER Date: Sun, 12 Mar 2017 21:03:05 +0100 Subject: [PATCH] Implement FTL garbage collector --- FTL.h | 19 +++++++------ Makefile | 2 +- args.c | 13 +++++++++ gc.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ log.c | 13 +++++++++ main.c | 33 +++++++++++++++-------- parser.c | 71 +++++++++++++++++++----------------------------- request.c | 7 ++++- routines.h | 6 +++++ socket.c | 13 +++++---- threads.c | 27 +++++++++++++++++++ 11 files changed, 212 insertions(+), 71 deletions(-) create mode 100644 gc.c create mode 100644 threads.c diff --git a/FTL.h b/FTL.h index 65c06b6ca..a5c99bc9b 100644 --- a/FTL.h +++ b/FTL.h @@ -55,14 +55,13 @@ // Default: 86400 (24 hours) #define MAXLOGAGE 86400 -// How often do we reparse logs (to ensure we only have data fitting to -// the MAXLOGAGE defined above)? [seconds] +// How often do we garbage collect (to ensure we only have data fitting to the MAXLOGAGE defined above)? [seconds] // Default: 3600 (once per hour) -#define reparsing_interval 3600 +#define GCinterval 3600 -// Delay applied to the reparsing of logs [seconds] -// Can be used -#define reparsing_delay (-60) +// Delay applied to the garbage collecting [seconds] +// Default -60 (one minute before a full hour) +#define GCdelay (-60) // Static structs typedef struct { @@ -83,6 +82,7 @@ typedef struct { typedef struct { int queries; + int invalidqueries; int blocked; int wildcardblocked; int cached; @@ -107,11 +107,14 @@ typedef struct { // Dynamic structs typedef struct { int timestamp; + int timeidx; unsigned char type; unsigned char status; - // 0 = unknown, 1 = gravity.list (blocked), 2 = reply from upstream, 3 = cache + // 0 = unknown, 1 = gravity.list (blocked), 2 = reply from upstream, 3 = cache, 4 = wildcard blocked int domainID; int clientID; + int forwardID; + bool valid; } queriesDataStruct; typedef struct { @@ -172,9 +175,9 @@ bool initialscan; bool debug; bool debugthreads; bool debugclients; +bool debugGC; bool threadlock; char ** wildcarddomains; -bool rescan_logfiles; memoryStruct memory; diff --git a/Makefile b/Makefile index ed0687bc7..4c2cb4055 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ # Please see LICENSE file for your rights under this license. DEPS = FTL.h routines.h version.h -OBJ = main.o structs.o log.o daemon.o parser.o signals.o socket.o request.o grep.o setupVars.o args.o flush.o +OBJ = main.o structs.o log.o daemon.o parser.o signals.o socket.o request.o grep.o setupVars.o args.o flush.o gc.o threads.o # Get git commit version and date GIT_BRANCH := $(shell git branch | sed -n 's/^\* //p') diff --git a/args.c b/args.c index d4563ed95..77b849c26 100644 --- a/args.c +++ b/args.c @@ -14,34 +14,47 @@ bool debug = false; bool debugthreads = false; bool debugclients = false; +bool debugGC = false; void parse_args(int argc, char* argv[]) { int i; for(i=0; i < argc; i++) { if(strcmp(argv[i], "debug") == 0) debug = true; + if(strcmp(argv[i], "debugthreads") == 0) { debug = true; debugthreads = true; } + if(strcmp(argv[i], "debugclients") == 0) { debug = true; debugclients = true; } + + if(strcmp(argv[i], "debugGC") == 0) + { + debug = true; + debugGC = true; + } + if(strcmp(argv[i], "test") == 0) killed = 1; + if(strcmp(argv[i], "version") == 0) { printf("%s\n",GIT_VERSION); exit(0); } + if(strcmp(argv[i], "tag") == 0) { printf("%s\n",GIT_TAG); exit(0); } + // Other arguments are ignored } } diff --git a/gc.c b/gc.c new file mode 100644 index 000000000..4e06b5577 --- /dev/null +++ b/gc.c @@ -0,0 +1,79 @@ +/* Pi-hole: A black hole for Internet advertisements +* (c) 2017 Pi-hole, LLC (https://pi-hole.net) +* Network-wide ad blocking via your own hardware. +* +* FTL Engine +* Garbage collection routines +* +* This file is copyright under the latest version of the EUPL. +* Please see LICENSE file for your rights under this license. */ + +#include "FTL.h" + +void *GC_thread(void *val) +{ + // Set thread name + prctl(PR_SET_NAME,"GC",0,0,0); + + // Lock FTL's data structure, since it is likely that it will be changed here + // Requests should not be processed/answered when data is about to change + enable_lock("GC_thread"); + + // Get minimum time stamp to keep + int mintime = time(NULL) - GCdelay - MAXLOGAGE; + if(debugGC) + { + time_t timestamp = mintime; + logg_str("GC all queries older than: ", strtok(ctime(×tamp),"\n")); + } + + // Process all queries + int i; + for(i=0; i < counters.queries; i++) + { + if(queries[i].timestamp < mintime && queries[i].valid) + { + // Adjust total counters and total over time data + // We cannot edit counters.queries directly as it is used + // as max ID for the queries[] struct + counters.invalidqueries++; + overTime[queries[i].timeidx].total--; + + // Adjust client and domain counters + clients[queries[i].clientID].count--; + domains[queries[i].domainID].count--; + forwarded[queries[i].forwardID].count--; + + // Change other counters according to status of this query + switch(queries[i].status) + { + case 0: counters.unknown--; break; + case 1: counters.blocked--; overTime[queries[i].timeidx].blocked--; domains[queries[i].domainID].blockedcount--; break; + case 2: break; + case 3: counters.cached--; break; + case 4: counters.wildcardblocked--; overTime[queries[i].timeidx].blocked--; break; + default: /* That cannot happen */ break; + } + + // Mark this query as garbage collected + queries[i].valid = false; + + if(debugGC) + { + time_t timestamp = queries[i].timestamp; + logg_str("GC query with time: ", strtok(ctime(×tamp),"\n")); + } + } + } + + if(debugGC) + { + logg_int("GC queries: ", counters.invalidqueries); + } + + // Release thread lock + disable_lock("GC_thread"); + + + return NULL; +} diff --git a/log.c b/log.c index 929dd146d..9a3186aa4 100644 --- a/log.c +++ b/log.c @@ -60,6 +60,19 @@ void logg_str(const char* str, char* str2) printf("[%d-%02d-%02d %02d:%02d:%02d.%03i] %s%s\n", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, millisec, str, str2); } +void logg_const_str(const char* str, const char* str2) +{ + time_t t = time(NULL); + struct tm tm = *localtime(&t); + struct timeval tv; + gettimeofday(&tv, NULL); + int millisec = tv.tv_usec/1000; + fprintf(logfile, "[%d-%02d-%02d %02d:%02d:%02d.%03i] %s%s\n", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, millisec, str, str2); + fflush(logfile); + if(debug) + printf("[%d-%02d-%02d %02d:%02d:%02d.%03i] %s%s\n", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, millisec, str, str2); +} + void logg_str_str(const char* str, char* str2, char* str3) { time_t t = time(NULL); diff --git a/main.c b/main.c index d312be8c1..21ef0d00a 100644 --- a/main.c +++ b/main.c @@ -43,15 +43,23 @@ int main (int argc, char* argv[]) { log_counter_info(); check_setupVarsconf(); + // We will use the attributes object later to start all threads in detached mode + pthread_attr_t attr; + // Initialize thread attributes object with default attribute values + pthread_attr_init(&attr); + // When a detached thread terminates, its resources are automatically released back to + // the system without the need for another thread to join with the terminated thread + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + pthread_t piholelogthread; - if(pthread_create( &piholelogthread, NULL, pihole_log_thread, NULL ) != 0) + if(pthread_create( &piholelogthread, &attr, pihole_log_thread, NULL ) != 0) { logg("Unable to open Pi-hole log processing thread. Exiting..."); killed = 1; } pthread_t listenthread; - if(pthread_create( &listenthread, NULL, listenting_thread, NULL ) != 0) + if(pthread_create( &listenthread, &attr, listenting_thread, NULL ) != 0) { logg("Unable to open Socket listening thread. Exiting..."); killed = 1; @@ -61,17 +69,20 @@ int main (int argc, char* argv[]) { { sleepms(100); - // Reparse log in regular interval, but don't do it if the threadlock is set to - // prevent locking the whole engine in an endless loop - if(((time(NULL)-reparsing_delay)%reparsing_interval) == 0 && !threadlock) + // Garbadge collect in regular interval, but don't do it if the threadlock is set + if(((time(NULL) - GCdelay)%GCinterval) == 0 && !threadlock) { if(debug) - logg_int("Re-parsing log files due to set update interval of [s]: ",reparsing_interval); - // Flush internal data structure - rescan_logfiles = true; - initialscan = true; - // Reparse logs - while(((time(NULL)-reparsing_delay)%reparsing_interval) == 0) + logg_int("Running GC on data structure due to set interval of [s]: ", GCinterval); + + pthread_t GCthread; + if(pthread_create( &GCthread, &attr, GC_thread, NULL ) != 0) + { + logg("Unable to open GC thread. Exiting..."); + killed = 1; + } + + while(((time(NULL) - GCdelay)%GCinterval) == 0) sleepms(100); } } diff --git a/parser.c b/parser.c index dd8d5cefe..5ee4a4cb6 100644 --- a/parser.c +++ b/parser.c @@ -13,7 +13,6 @@ char *resolveHostname(char *addr); int dnsmasqlogpos = 0; -bool rescan_logfiles = false; int checkLogForChanges(void) { @@ -44,52 +43,36 @@ void *pihole_log_thread(void *val) prctl(PR_SET_NAME,"loganalyzer",0,0,0); while(!killed) { - // Lock FTL data structure, since it is likely that it will be changed here - // Requests should not be processed/answered when data is about to change - while(threadlock) sleepms(5); - if(debugthreads) - logg("Thread lock enabled (pihole_log_thread)"); - threadlock = true; - - // Shall we reprocess all log files? - if(rescan_logfiles) - { - rescan_logfiles = false; - pihole_log_flushed(false); - initialscan = true; - process_pihole_log(1); - process_pihole_log(0); - initialscan = false; - } - else + int newdata = checkLogForChanges(); + + if(newdata != 0) { - int newdata = checkLogForChanges(); + // Lock FTL's data structure, since it is likely that it will be changed here + // Requests should not be processed/answered when data is about to change + enable_lock("pihole_log_thread"); - if(newdata != 0) + if(newdata > 0) { - - if(newdata > 0) - { - // Process new data if found only in main log (file 0) - process_pihole_log(0); - } - else if(newdata < 0) - { - // Process flushed log - // Flush internal datastructure - pihole_log_flushed(true); - // Rescan files 0 (pihole.log) and 1 (pihole.log.1) - initialscan = true; - process_pihole_log(1); - process_pihole_log(0); - initialscan = false; - } + // Process new data if found only in main log (file 0) + process_pihole_log(0); } + else if(newdata < 0) + { + // Process flushed log + // Flush internal datastructure + pihole_log_flushed(true); + // Rescan files 0 (pihole.log) and 1 (pihole.log.1) + initialscan = true; + process_pihole_log(1); + process_pihole_log(0); + initialscan = false; + } + + // Release thread lock + disable_lock("pihole_log_thread"); } - threadlock = false; - if(debugthreads) - logg("Thread lock disabled (pihole_log_thread)"); + // Wait some time before looking again at the log files sleepms(50); } return NULL; @@ -167,8 +150,8 @@ void process_pihole_log(int file) querytime.tm_year = (*timeinfo).tm_year; int querytimestamp = (int)mktime(&querytime); - // Skip parsing of this log entry if too old - if(((time(NULL) - reparsing_delay) - querytimestamp) > MAXLOGAGE) continue; + // Skip parsing of this log entry if it is too old + if(((time(NULL) - GCdelay) - querytimestamp) > MAXLOGAGE) continue; // Now, we modify the minutes (and seconds), but that is fine, since // we don't need the querytime anymore (querytimestamp is already set) @@ -396,6 +379,8 @@ void process_pihole_log(int file) queries[counters.queries].status = status; queries[counters.queries].domainID = domainID; queries[counters.queries].clientID = clientID; + queries[counters.queries].timeidx = timeidx; + queries[counters.queries].valid = true; // Increase DNS queries counter counters.queries++; diff --git a/request.c b/request.c index 9517b9cad..39050f0fb 100644 --- a/request.c +++ b/request.c @@ -32,7 +32,8 @@ void process_request(char *client_message, int *sock) { percentage = 1e2*counters.blocked/counters.queries; } - sprintf(server_message,"domains_being_blocked %i\ndns_queries_today %i\nads_blocked_today %i\nads_percentage_today %f\n",counters.gravity,counters.queries,counters.blocked,percentage); + sprintf(server_message,"domains_being_blocked %i\ndns_queries_today %i\nads_blocked_today %i\nads_percentage_today %f\n", \ + counters.gravity,(counters.queries-counters.invalidqueries),counters.blocked,percentage); swrite(server_message, *sock); if(debugclients) logg_int("Sent stats data to client, ID: ", *sock); @@ -337,6 +338,8 @@ void process_request(char *client_message, int *sock) int i; for(i=ibeg; i < counters.queries; i++) { + // Check if this query has been removed due to garbage collection + if(!queries[i].valid) continue; char type[5]; if(queries[i].type == 1) { @@ -415,6 +418,8 @@ void process_request(char *client_message, int *sock) int found = 0; for(i = counters.queries - 1; i > 0 ; i--) { + // Check if this query has been removed due to garbage collection + if(!queries[i].valid) continue; if(queries[i].status == 1 || queries[i].status == 4) { found++; diff --git a/routines.h b/routines.h index c15dade17..248c8bdca 100644 --- a/routines.h +++ b/routines.h @@ -18,6 +18,7 @@ void open_FTL_log(void); void logg(const char* str); void logg_int(const char* star, int i); void logg_str(const char* str, char* str2); +void logg_const_str(const char* str, const char* str2); void logg_str_str_int(const char* str, char* str2, char* str3, int i); void logg_struct_resize(const char* str, int from, int to); void logg_str_str(const char* str, char* str2, char* str3); @@ -62,3 +63,8 @@ bool getSetupVarsBool(char * input); void parse_args(int argc, char* argv[]); int detectStatus(char *domain); + +void *GC_thread(void *val); + +void enable_lock(const char *message); +void disable_lock(const char *message); diff --git a/socket.c b/socket.c index 88886f986..8e7cdfc1b 100644 --- a/socket.c +++ b/socket.c @@ -177,15 +177,14 @@ void *connection_handler_thread(void *socket_desc) strcpy(message, client_message); // Lock FTL data structure, since it is likely that it will be changed here // Requests should not be processed/answered when data is about to change - while(threadlock) sleepms(5); - threadlock = true; - if(debugthreads) - logg("Thread lock enabled (process_request)"); + enable_lock("connection_handler_thread"); + process_request(message, &sock); - threadlock = false; - if(debugthreads) - logg("Thread lock disabled (process_request)"); free(message); + + // Release thread lock + disable_lock("connection_handler_thread"); + if(sock == 0) { // Client disconnected by seding EOT or ">quit" diff --git a/threads.c b/threads.c new file mode 100644 index 000000000..e08fcc73c --- /dev/null +++ b/threads.c @@ -0,0 +1,27 @@ +/* Pi-hole: A black hole for Internet advertisements +* (c) 2017 Pi-hole, LLC (https://pi-hole.net) +* Network-wide ad blocking via your own hardware. +* +* FTL Engine +* Thread routines +* +* This file is copyright under the latest version of the EUPL. +* Please see LICENSE file for your rights under this license. */ + +#include "FTL.h" + +void enable_lock(const char *message) +{ + while(threadlock) sleepms(5); + + if(debugthreads) + logg_const_str("Thread lock enabled: ", message); + threadlock = true; +} + +void disable_lock(const char *message) +{ + threadlock = false; + if(debugthreads) + logg_const_str("Thread lock disabled: ", message); +}