From: Antoine Jacquet Date: Fri, 24 Mar 2006 23:00:00 +0000 (+0100) Subject: version 0.72 X-Git-Tag: v0.72 X-Git-Url: http://royale.zerezo.com/git/?p=irssistats;a=commitdiff_plain;h=refs%2Ftags%2Fv0.72 version 0.72 * now supports statistics of russian words (patch by Pavel N.Kovalenko) * parsing more relaxed for modified log formats (patch by Micha Nelissen) * check day changes with multiple log files (patch by Micha Nelissen) --- diff --git a/CHANGELOG b/CHANGELOG index 1267bfb..d8652c5 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,10 @@ Change log file for irssistats +version 0.72 (2006-03-25) + * now supports statistics of russian words (patch by Pavel N.Kovalenko) + * parsing more relaxed for modified log formats (patch by Micha Nelissen) + * check day changes with multiple log files (patch by Micha Nelissen) + version 0.71 (2005-02-07) * back to a basic Makefile for clean packinging purposes diff --git a/README b/README index 0b38498..b584f69 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ -irssistats 0.71 +irssistats 0.72 site: http://royale.zerezo.com/irssistats/ mail: royale@zerezo.com diff --git a/irssistats.c b/irssistats.c index 6547005..66522e8 100644 --- a/irssistats.c +++ b/irssistats.c @@ -1,5 +1,5 @@ /* - * irssistats version 0.71 + * irssistats version 0.72 * * This tool generates IRC stats based on irssi logs. * Usage: irssistats [/path/to/file.conf] @@ -28,6 +28,7 @@ #include #include #include +#include #ifdef __WIN32__ #define GLOBALCONF "irssistats.conf" #else @@ -48,7 +49,7 @@ #define MINWORDLENGTH 5 /* irssistats */ -#define VERSION "0.71" +#define VERSION "0.72" #define URL "http://royale.zerezo.com/irssistats/" /* Counters */ @@ -611,6 +612,7 @@ struct int hours[4]; } lastdays[31]; int days=0; +char currday[16]; int hours[24*4]; int lines=0; @@ -627,8 +629,89 @@ struct char word[MAXLINELENGTH]; } topwords[NBWORDS]; +struct rusletter +{ + int nb; + struct rusletter *next[33]; +} ruswords; + #define isletter(c) (((c>='a')&&(c<='z'))||((c>='A')&&(c<='Z'))) #define lowercase(c) (((c>='A')&&(c<='Z'))?c-'A'+'a':c) +//#define lowercase(c) (c | 0x20) +#define isrusletter(c) (memchr(koi,c,66)==NULL?0:1) + +const char koi[] = { +193,194,215,199,196,197,163,214,218,201,202,203,204,205,206,207,208,210, +211,212,213,198,200,195,222,219,221,216,223,217,220,192,209, +225,226,247,231,228,229,179,246,250,233,234,235,236,237,238,239,240,242, +243,244,245,230,232,227,254,251,253,248,255,249,252,224,241 +}; +int lowruscase(char c) +{ + char *ctmp_p; + int ch=0; + if (memchr(koi,c,66)==NULL) return koi[0]; + ch=strlen(koi)-strlen(memchr(koi,c,66)); + if (ch>33) return ch-33; else return ch; +} +/* cp1251 for encoding into koi8-r +const char win[] = { +224,225,226,227,228,229,184,230,231,232,233,234,235,236,237,238,239,240, +241,242,243,244,245,246,247,248,249,252,250,251,253,254,255, +192,193,194,195,196,197,168,198,199,200,201,202,203,204,205,206,207,208, +209,210,211,212,213,214,215,216,217,220,218,219,221,222,223 +};*/ + + + +int findruswords(char *message) +{ + int i,c,n=0; + //char *tmp_p; + struct rusletter *pos,*tmp; + for (;;) + { + while (!isrusletter(*message)) if (*message=='\0') return n; else message++; + pos=&ruswords; + while (isrusletter(*message)) + { + c=lowruscase(*message); + /*tmp_p=memchr(koi,message[0],33); + if (tmp_p==NULL) return n; + c=strlen(koi)-strlen(tmp_p);*/ + if (pos->next[(int)c]==NULL) + { + tmp=malloc(sizeof(struct rusletter)); + if (tmp==NULL) + { + fprintf(stderr, "findruswords(): malloc failure\n"); + exit(1); + } + tmp->nb=0; + for (i=0;i<33;i++) tmp->next[i]=NULL; + pos->next[(int)c]=tmp; + } + pos=pos->next[(int)c]; + message++; + } + pos->nb++; + n++; + } + return n; +} + + +void freeruswords(struct rusletter *pos) +{ + int i; + for (i=0;i<33;i++) if (pos->next[i]!=NULL) + { + freeruswords(pos->next[i]); + free(pos->next[i]); + (*pos).next[i]=NULL; + } +} + int findwords(char *message) { int i,c,n=0; @@ -684,6 +767,28 @@ void bestwords(struct letter pos,int cur) tempword[cur]='\0'; } +void bestruswords(struct rusletter pos,int cur) +{ + int i,j; + if ((cur>=MINWORDLENGTH)&&(pos.nb>topwords[NBWORDS-1].nb)) + { + for (i=0;pos.nbi;j--) + { + topwords[j].nb=topwords[j-1].nb; + strcpy(topwords[j].word,topwords[j-1].word); + } + topwords[i].nb=pos.nb; + strcpy(topwords[i].word,tempword); + } + for (i=0;i<33;i++) if (pos.next[i]!=NULL) + { + tempword[cur]=koi[i]; + bestruswords(*(pos.next[i]),cur+1); + } + tempword[cur]='\0'; +} + void freewords(struct letter *pos) { int i; @@ -753,6 +858,40 @@ int dichotomic(char *nick) return(start); } +void day_changed(char* date) +{ + int i,j; + char newday[16]; + + memcpy(newday, date, 11); + if (date[13]==':') + memcpy(newday+11, date+20, 4); + else + memcpy(newday+11, date+11, 4); + newday[15]=0; + if (memcmp(currday, newday, 15)!=0) + { + /* we do not have a "current" day yet? */ + if (currday[0]!=0) + { + for (i=30;i>0;i--) + { + lastdays[i].lines=lastdays[i-1].lines; + for (j=0;j<4;j++) lastdays[i].hours[j]=lastdays[i-1].hours[j]; + } + lastdays[0].lines=0; + for (j=0;j<4;j++) lastdays[0].hours[j]=0; + days++; + } + memcpy(currday, newday, 15); + if (debug==2) + fprintf(stderr, "day %d changed to: %s\n", days, currday); + } else { + if (debug==2) + fprintf(stderr, "but day did not change\n"); + } +} + void parse_log(char *logfile) { FILE *fic; @@ -761,8 +900,9 @@ void parse_log(char *logfile) int i,j; char *nick,*message; int nickstart; - int mononick,monolines; + int mononick=-1,monolines=0; int temp,hour; + int timelen; if ((fic=fopen(logfile,"rt"))==NULL) { fprintf(stderr,"can't open log file \"%s\"\n",logfile); exit(1); } if (debug) printf("working on %s : ",channel); @@ -786,170 +926,196 @@ void parse_log(char *logfile) pos=0; totallines++; if (totallines%10000==0 && debug) { printf("."); fflush(stdout); } - if (strncmp("--- Day changed",line,15)==0) /* --- Day changed Wed May 01 2002 */ + if (strncmp("--- Log opened",line,14)==0) /* --- Log opened Wed May 01 00:00 2002 */ { - for (i=30;i>0;i--) - { - lastdays[i].lines=lastdays[i-1].lines; - for (j=0;j<4;j++) lastdays[i].hours[j]=lastdays[i-1].hours[j]; - } - lastdays[0].lines=0; - for (j=0;j<4;j++) lastdays[0].hours[j]=0; - days++; + if (debug==2) + fprintf(stderr, "log %s opened, ", logfile); + day_changed(line+15); } - else if (strncmp("-!- mode/",&line[6],9)==0) /* 00:00 -!- mode/#channel [...] by (Nick, Nick2, )Nick3 */ + if (strncmp("--- Day changed",line,15)==0) /* --- Day changed Wed May 01 2002 */ { - for (i=strlen(line);line[i]!=' ';i--); - nick=&line[i+1]; - users[dichotomic(nick)].counters[D_MODE]++; + if (debug==2) + fprintf(stderr, "within log file, "); + day_changed(line+16); } - else if (strncmp("-!-",&line[6],3)==0) /* 00:00 -!- Nick something... */ + else { - for (i=10;line[i]!=' ' && i <= 10 + MAXNICKLENGTH;i++); - if(i > 10 + MAXNICKLENGTH) { - if(debug) { - fprintf(stderr,"nick on line %d is too long, skipping line\n",totallines); - } - continue; - } - line[i]='\0'; - nick=&line[10]; - message=&line[i+1]; - if (strncmp("changed the topic of",message,20)==0) /* 00:00 -!- Nick changed the topic of #channel to: new topic */ - { - users[dichotomic(nick)].counters[D_TOPIC]++; - for (i=21;message[i]!=':';i++); - message=&message[i+2]; - nbtopics++; - if ((nbtopics<=NBTOPICS) || (rand()%(nbtopics/NBTOPICS)==0)) - { - temp=nbtopics<=NBTOPICS?nbtopics-1:rand()%NBTOPICS; - strcpy(topics[temp].nick,nick); - strncpy(topics[temp].topic,message,MAXQUOTELENGTH); - } - } - else if (strncmp("was kicked from",message,15)==0) /* 00:00 -!- Nick was kicked from #channel by Nick [Reason] */ + /* timelen is number of characters occupied by time 00:00.. plus any space */ + timelen = 5; + if (line[timelen] == ':' && isdigit(line[timelen+1]) && isdigit(line[timelen+2])) + timelen += 3; + if (line[timelen] == ' ') + timelen++; + if (strncmp("-!- mode/",&line[timelen],9)==0) /* 00:00 -!- mode/#channel [...] by (Nick, Nick2, )Nick3 */ { - users[dichotomic(nick)].counters[D_KICKED]++; - for (i=16;message[i]!=' ';i++); - message=&message[i+4]; - for (i=0;message[i]!=' ';i++); - message[i]='\0'; - users[dichotomic(message)].counters[D_KICK]++; + for (i=strlen(line);line[i]!=' ';i--); + nick=&line[i+1]; + users[dichotomic(nick)].counters[D_MODE]++; } - else if (strncmp("is now known as",message,15)==0) /* 00:00 -!- Nick is now known as Nick */ - users[dichotomic(nick)].counters[D_NICK]++; - else if (message[0]=='[') /* 00:00 -!- Nick [user@host] something... */ - { - for (i=0;message[i]!=']';i++); - message=&message[i+2]; - if (strncmp("has joined",message,10)==0) /* 00:00 -!- Nick [user@host] has joined #channel */ - users[dichotomic(nick)].counters[D_JOIN]++; - else if (strncmp("has quit",message,8)==0); /* 00:00 -!- Nick [user@host] has quit [Reason] */ - else if (strncmp("has left",message,8)==0); /* 00:00 -!- Nick [user@host] has left #channel [Reason] */ - else; - } - } - else if ((line[6]=='<') || (line[7]=='*')) - { - line[2]='\0'; - hour=atoi(line); - if (line[7]=='*') /* 00:00 * Nick the message */ + else if (strncmp("-!-",&line[timelen],3)==0) /* 00:00 -!- Nick something... */ { - for (i=9;line[i]!=' ' && i <= 9 + MAXNICKLENGTH;i++); - if(i > 9 + MAXNICKLENGTH) { + for (i=10;line[i]!=' ' && i <= 10 + MAXNICKLENGTH;i++); + if(i > 10 + MAXNICKLENGTH) { if(debug) { fprintf(stderr,"nick on line %d is too long, skipping line\n",totallines); } - continue; + continue; } - nick=&line[9]; + line[i]='\0'; + nick=&line[timelen+4]; message=&line[i+1]; - } - else if (line[7]=='>') /* 00:00 <>>>?Nick<<<> the personal message */ - /* 00:00 <>>?Nick<<> the personal message */ - { - for (i=10;line[i]!='<' && i <= 10 + MAXNICKLENGTH;i++); - if(i > 10 + MAXNICKLENGTH) { - if(debug) { - fprintf(stderr,"nick on line %d is too long, skipping line\n",totallines); + if (strncmp("changed the topic of",message,20)==0) /* 00:00 -!- Nick changed the topic of #channel to: new topic */ + { + users[dichotomic(nick)].counters[D_TOPIC]++; + for (i=21;message[i]!=':';i++); + message=&message[i+2]; + nbtopics++; + if ((nbtopics<=NBTOPICS) || (rand()%(nbtopics/NBTOPICS)==0)) + { + temp=nbtopics<=NBTOPICS?nbtopics-1:rand()%NBTOPICS; + strcpy(topics[temp].nick,nick); + strncpy(topics[temp].topic,message,MAXQUOTELENGTH); } - continue; } - nick=&line[10]; - if (line[9]=='>') nick++; - message=&line[i+5]; + else if (strncmp("was kicked from",message,15)==0) /* 00:00 -!- Nick was kicked from #channel by Nick [Reason] */ + { + users[dichotomic(nick)].counters[D_KICKED]++; + for (i=16;message[i]!=' ';i++); + message=&message[i+4]; + for (i=0;message[i]!=' ';i++); + message[i]='\0'; + users[dichotomic(message)].counters[D_KICK]++; + } + else if (strncmp("is now known as",message,15)==0) /* 00:00 -!- Nick is now known as Nick */ + users[dichotomic(nick)].counters[D_NICK]++; + else if (message[0]=='[') /* 00:00 -!- Nick [user@host] something... */ + { + for (i=0;message[i]!=']';i++); + message=&message[i+2]; + if (strncmp("has joined",message,10)==0) /* 00:00 -!- Nick [user@host] has joined #channel */ + users[dichotomic(nick)].counters[D_JOIN]++; + else if (strncmp("has quit",message,8)==0); /* 00:00 -!- Nick [user@host] has quit [Reason] */ + else if (strncmp("has left",message,8)==0); /* 00:00 -!- Nick [user@host] has left #channel [Reason] */ + else; + } } - else /* 00:00 the message */ + else if ((line[timelen]=='<') || (line[timelen+1]=='*')) { - - /* - * Irssi doesn't log channel mode with show_nickmode = OFF - * the following covers op, half-op, voice and show_nickmode_empty - */ - if (line[7]=='@' || line[7]=='%' || line[7]=='+' || line[7]==' ') { - nickstart = 8; - } else { - nickstart = 7; + line[2]='\0'; + hour=atoi(line); + if (line[timelen+1]=='*') /* 00:00 * Nick the message */ + { + for (i=timelen+3;line[i]!=' ' && i <= timelen+3+MAXNICKLENGTH;i++); + if(i > timelen+3+MAXNICKLENGTH) { + if(debug) { + fprintf(stderr,"nick on line %d is too long, skipping line\n",totallines); + } + continue; + } + nick=&line[timelen+3]; + message=&line[i+1]; } - - for (i=nickstart;line[i]!='>' && i <= nickstart + MAXNICKLENGTH;i++); - if(i > nickstart + MAXNICKLENGTH) { - if(debug) { - fprintf(stderr,"nick on line %d is too long, skipping line\n",totallines); + else if (line[timelen+1]=='>') /* 00:00 <>>>?Nick<<<> the personal message */ + /* 00:00 <>>?Nick<<> the personal message */ + { + for (i=timelen+4;line[i]!='<' && i <= timelen+4+MAXNICKLENGTH;i++); + if(i > timelen+4+MAXNICKLENGTH) { + if(debug) { + fprintf(stderr,"nick on line %d is too long, skipping line\n",totallines); + } + continue; } - continue; + nick=&line[timelen+4]; + if (line[timelen+3]=='>') nick++; + message=&line[i+5]; } - nick=&line[nickstart]; - message=&line[i+2]; - } - line[i]='\0'; - i=dichotomic(nick); - if (line[7]=='*') users[i].counters[D_ME]++; - if (i==mononick) - { - monolines++; - if (monolines==5) users[i].counters[D_MONOLOGUE]++; - } - else - { - mononick=i; - monolines=1; - } - j=strlen(message); - users[i].lines++; - if (top_words || ranking==1) users[i].words+=findwords(message); - users[i].letters+=j; - users[i].hours[hour/6]++; - lastdays[0].lines++; - lastdays[0].hours[hour/6]++; - lines++; - if (quarter) - { - line[5]='\0'; - hour=hour*4+atoi(&line[3])/15; - } - hours[hour]++; - if (message[j-1]=='?') users[i].counters[D_QUESTION]++; - else if (message[j-1]=='!') users[i].counters[D_EXCLAM]++; - else if ((message[j-3]==' ')&&(message[j-2]==':')) - { - if (message[j-1]==')') users[i].counters[D_SMILE]++; - else if (message[j-1]=='(') users[i].counters[D_FROWN]++; - } - if (rand()%users[i].lines==0) strncpy(users[i].quote,message,MAXQUOTELENGTH); - if (strncmp("http://",message,7)==0) - { - users[i].counters[D_URL]++; - for (i=0;(message[i]!=' ') && (i the message */ { - temp=nburls<=NBURLS?nburls-1:rand()%NBURLS; - strcpy(urls[temp].nick,nick); - strcpy(urls[temp].url,message); - strncpy(urls[temp].shorturl,message,MAXQUOTELENGTH); + + /* + * Irssi doesn't log channel mode with show_nickmode = OFF + * the following covers op, half-op, voice and show_nickmode_empty + */ + switch (line[timelen+1]) + { + case '@': + case '%': + case '+': + case ' ': + nickstart = timelen+2; + break; + default: + nickstart = timelen+1; + break; + } + + for (i=nickstart;line[i]!='>' && i <= nickstart + MAXNICKLENGTH;i++); + if(i > nickstart + MAXNICKLENGTH) { + if(debug) { + fprintf(stderr,"nick on line %d is too long, skipping line\n",totallines); + } + continue; + } + nick=&line[nickstart]; + message=&line[i+2]; + } + /* remove identified character from nick (invalid nick character anyway) */ + if (nick[0] == '+') + fprintf(stderr, "nick starts with +! %s\n", nick); + if (line[i-1] == '+' || line[i-1] == '*') + i--; + line[i]='\0'; + i=dichotomic(nick); + if (line[timelen+1]=='*') users[i].counters[D_ME]++; + if (i==mononick) + { + monolines++; + if (monolines==5) users[i].counters[D_MONOLOGUE]++; + } + else + { + mononick=i; + monolines=1; + } + j=strlen(message); + users[i].lines++; + if (top_words || ranking==1) + { + if (L("CHARSET")=="KOI8-R") users[i].words+=findwords(message)+findruswords(message); + else users[i].words+=findwords(message); + } + users[i].letters+=j; + users[i].hours[hour/6]++; + lastdays[0].lines++; + lastdays[0].hours[hour/6]++; + lines++; + if (quarter) + { + line[5]='\0'; + hour=hour*4+atoi(&line[3])/15; + } + hours[hour]++; + if (message[j-1]=='?') users[i].counters[D_QUESTION]++; + else if (message[j-1]=='!') users[i].counters[D_EXCLAM]++; + else if ((message[j-3]==' ')&&(message[j-2]==':')) + { + if (message[j-1]==')') users[i].counters[D_SMILE]++; + else if (message[j-1]=='(') users[i].counters[D_FROWN]++; + } + if (rand()%users[i].lines==0) strncpy(users[i].quote,message,MAXQUOTELENGTH); + if (strncmp("http://",message,7)==0) + { + users[i].counters[D_URL]++; + for (i=0;(message[i]!=' ') && (i\"\"",users[user].photo,users[user].photo,photo_size,photo_size); else - fprintf(fic,"\"\"",users[user].photo,users[user].photo); + fprintf(fic,"\"\"",users[user].photo); } fprintf(fic,"\n"); users[user].lines=-1; @@ -1453,6 +1619,7 @@ void parse_config(char *configfile) expand(value); if (debug==2) fprintf(stderr,"generating xhtml file \"%s\"\n",value); bestwords(words,0); + if (L("CHARSET")=="KOI8-R") bestruswords(ruswords,0); gen_xhtml(value); /* reset variables */ @@ -1463,6 +1630,7 @@ void parse_config(char *configfile) for (i=0;i<24*4;i++) hours[i]=0; lines=0; freewords(&words); + freeruswords(&ruswords); for (i=0;i