Making Graphs of LJ Stats

From No LJ Ads Wiki

Jump to: navigation, search

Do you like graphs? Of COURSE you do! NLJA keeps automatic downloads of stats for the LJ and IJ services:

And those stats can be made into graphs.

The python script for parsing all that data into a single table:

#!/usr/bin/python

# Meant to work with the data from 
# here: http://noljads.com/ljstats/
# or here: http://noljads.com/ijstats/

from optparse import OptionParser
import sys
import os
import re

# parse command line options
parser = OptionParser()

parser.add_option("-d", "--dir", dest="dir",
	help="The directory with the input data files", metavar="DIR")
parser.add_option("-o", "--out", dest="out",
	help="The output table filename", metavar="OUT")

(options, args) = parser.parse_args()

if options.dir:
	Directory = options.dir
else:
	print >> sys.stderr, "Error: no input directory given."
	parser.print_help()
	sys.exit(0)

if options.out:
	Outfile = options.out
else:
	print >> sys.stderr, "Error: no output filename given."
	parser.print_help()
	sys.exit(0)

# uses the second column from the files to know which lines to take note of
datapoints = ["accounts_active_1",  "accounts_active_7", "accounts_active_30", "updated_last1", "updated_last7", "updated_last30" ]

# get a list of eligible input files
infiles = os.listdir(Directory)
textfilename_match = re.compile("\.txt$")
textfiles = [f for f in infiles if textfilename_match.search(f)]
textfiles.sort()

data_values = list()

print "Parsing files in %s" % Directory

# Parse every file in the in directory
# write it to the out directory
for filename in textfiles:
	
	#print "Parsing file %s" % filename

	# get date from filename	
	filedate_match = re.compile("(\d{4})-(\d{2})-(\d{2})")
	date_match = filedate_match.search(filename)
	
	file_year = date_match.group(1)
	file_month = date_match.group(2)
	file_day = date_match.group(3)
	
	file_date = "%s-%s-%s" % (file_year, file_month, file_day)
	
	# get lines from file
	TABLE = open( "%s/%s" % (Directory, filename), 'r' )
		
	file_values = dict()
	
	file_values["date"] = file_date;
	
	# get the values for the datapoint values we are interested in
	for line in TABLE.readlines():
		values = line.split("\t")
		datapoint_name = values[1]
		datapoint_value = values[2].rstrip()
		if datapoints.count(datapoint_name) == 1:
			file_values[datapoint_name] = datapoint_value
	
	data_values.append(file_values)
	
	TABLE.close()

print "Writing table to %s" % Outfile

OUT = open( Outfile, 'w' )

# print headers
OUT.write( "date\t" )
OUT.write( "%s\n" % "\t".join(datapoints))

# print values
for values in data_values:
	value_list = [values["date"]]
	
	for datapoint_name in datapoints:
		if values.has_key(datapoint_name):
			value_list.append(values[datapoint_name])
		else:
			value_list.append("N/A")

	OUT.write( "%s\n" % "\t".join(value_list))
	
OUT.close()

Running the script:

python stattables.py -d ljstats/text/ -o ljstattable.txt
python stattables.py -d ijstats/text/ -o ijstattable.txt

The R code for LJ:

library(zoo)

linecolors = c("navyblue", "blue", "skyblue2", "red4", "firebrick3", "rosybrown1")
linelabels=c("Active in past day", "Active in past week", "Active in past 30 days", 
"Updated in past day", "Updated in past week", "Updated in past 30 days")

ljstats <- read.table(file="ljstattable.txt",header=TRUE,na.strings = "N/A")
ljstats <- na.omit(ljstats)
zooljstats <- zoo(ljstats[,2:7], as.Date(as.character(ljstats[,1])))

onefile=TRUE
pdf(file=ifelse(onefile, "LJStatsGraph.pdf", "none"), width=12, height=8, title="LiveJournal Stats")

plot(zooljstats, main="Activity Statistics for LiveJournal", plot.type="single", 
col=linecolors, xlab="Date", ylab="Number of accounts")
legend(x="topleft", linelabels, xjust=0.5, fill=linecolors, ncol=2, inset=0.2, title="Key")

dev.off()

Additionally make a graph for IJ, if you so choose:

ijstats <- read.table(file="ijstattable.txt",header=TRUE,na.strings = "N/A")
ijstats <- na.omit(ijstats)
zooijstats <- zoo(ijstats[,2:7], as.Date(as.character(ijstats[,1])))

onefile=TRUE
pdf(file=ifelse(onefile, "IJStatsGraph.pdf", "none"), width=12, height=8, title="InsaneJournal Stats")

plot(zooijstats, main="Activity Statistics for InsaneJournal", plot.type="single", 
col=linecolors, xlab="Date", ylab="Number of accounts")
legend(x="topright", linelabels, xjust=0.5, fill=linecolors, ncol=2, inset=0.1, title="Key")

dev.off()