Making Graphs of LJ Stats
From No LJ Ads Wiki
Do you like graphs? Of COURSE you do! NLJA keeps automatic downloads of stats for the LJ and IJ services:
And those stats can be made into graphs.
The python script for parsing all that data into a single table:
#!/usr/bin/python
# Meant to work with the data from
# here: http://noljads.com/ljstats/
# or here: http://noljads.com/ijstats/
from optparse import OptionParser
import sys
import os
import re
# parse command line options
parser = OptionParser()
parser.add_option("-d", "--dir", dest="dir",
help="The directory with the input data files", metavar="DIR")
parser.add_option("-o", "--out", dest="out",
help="The output table filename", metavar="OUT")
(options, args) = parser.parse_args()
if options.dir:
Directory = options.dir
else:
print >> sys.stderr, "Error: no input directory given."
parser.print_help()
sys.exit(0)
if options.out:
Outfile = options.out
else:
print >> sys.stderr, "Error: no output filename given."
parser.print_help()
sys.exit(0)
# uses the second column from the files to know which lines to take note of
datapoints = ["accounts_active_1", "accounts_active_7", "accounts_active_30", "updated_last1", "updated_last7", "updated_last30" ]
# get a list of eligible input files
infiles = os.listdir(Directory)
textfilename_match = re.compile("\.txt$")
textfiles = [f for f in infiles if textfilename_match.search(f)]
textfiles.sort()
data_values = list()
print "Parsing files in %s" % Directory
# Parse every file in the in directory
# write it to the out directory
for filename in textfiles:
#print "Parsing file %s" % filename
# get date from filename
filedate_match = re.compile("(\d{4})-(\d{2})-(\d{2})")
date_match = filedate_match.search(filename)
file_year = date_match.group(1)
file_month = date_match.group(2)
file_day = date_match.group(3)
file_date = "%s-%s-%s" % (file_year, file_month, file_day)
# get lines from file
TABLE = open( "%s/%s" % (Directory, filename), 'r' )
file_values = dict()
file_values["date"] = file_date;
# get the values for the datapoint values we are interested in
for line in TABLE.readlines():
values = line.split("\t")
datapoint_name = values[1]
datapoint_value = values[2].rstrip()
if datapoints.count(datapoint_name) == 1:
file_values[datapoint_name] = datapoint_value
data_values.append(file_values)
TABLE.close()
print "Writing table to %s" % Outfile
OUT = open( Outfile, 'w' )
# print headers
OUT.write( "date\t" )
OUT.write( "%s\n" % "\t".join(datapoints))
# print values
for values in data_values:
value_list = [values["date"]]
for datapoint_name in datapoints:
if values.has_key(datapoint_name):
value_list.append(values[datapoint_name])
else:
value_list.append("N/A")
OUT.write( "%s\n" % "\t".join(value_list))
OUT.close()
Running the script:
python stattables.py -d ljstats/text/ -o ljstattable.txt python stattables.py -d ijstats/text/ -o ijstattable.txt
The R code for LJ:
library(zoo)
linecolors = c("navyblue", "blue", "skyblue2", "red4", "firebrick3", "rosybrown1")
linelabels=c("Active in past day", "Active in past week", "Active in past 30 days",
"Updated in past day", "Updated in past week", "Updated in past 30 days")
ljstats <- read.table(file="ljstattable.txt",header=TRUE,na.strings = "N/A")
ljstats <- na.omit(ljstats)
zooljstats <- zoo(ljstats[,2:7], as.Date(as.character(ljstats[,1])))
onefile=TRUE
pdf(file=ifelse(onefile, "LJStatsGraph.pdf", "none"), width=12, height=8, title="LiveJournal Stats")
plot(zooljstats, main="Activity Statistics for LiveJournal", plot.type="single",
col=linecolors, xlab="Date", ylab="Number of accounts")
legend(x="topleft", linelabels, xjust=0.5, fill=linecolors, ncol=2, inset=0.2, title="Key")
dev.off()
Additionally make a graph for IJ, if you so choose:
ijstats <- read.table(file="ijstattable.txt",header=TRUE,na.strings = "N/A") ijstats <- na.omit(ijstats) zooijstats <- zoo(ijstats[,2:7], as.Date(as.character(ijstats[,1]))) onefile=TRUE pdf(file=ifelse(onefile, "IJStatsGraph.pdf", "none"), width=12, height=8, title="InsaneJournal Stats") plot(zooijstats, main="Activity Statistics for InsaneJournal", plot.type="single", col=linecolors, xlab="Date", ylab="Number of accounts") legend(x="topright", linelabels, xjust=0.5, fill=linecolors, ncol=2, inset=0.1, title="Key") dev.off()

