#!/usr/bin/python import argparse import csv import tempfile import os # SCRIPT PARAMETERS parser = argparse.ArgumentParser(description='Cross validation script.') parser.add_argument('--training', required=True, help='File in CSV format containing training examples.') parser.add_argument('--script', required=True, help='The script that performs classification.') parser.add_argument('--options', required=False, default="", help='Other options to pass to the classifier script.') opt = vars(parser.parse_args()) # LOAD KNOWN DATA with open(opt['training']) as f: attributeNames = f.readline() attributeTypes = f.readline() classLine = f.readline() j=0 for i in classLine.split('\t'): if i.rstrip() == 'class': targetClass = attributeNames.split('\t')[j].rstrip() j+=1 data = f.readlines() # ITERATE OVER DATA TO GENERATE TRAINING AND TEST SETS, AND RUN CASSIFIER trainingFileName = tempfile.mktemp() testFileName = tempfile.mktemp() outputFileName = tempfile.mktemp() tests=0 errors=0 for i in range(len(data)): trainingFile=open(trainingFileName, 'w') trainingFile.write(attributeNames+attributeTypes+classLine) testFile=open(testFileName, 'w') testFile.write(attributeNames) for j in range(len(data)): if i==j: testFile.write(data[j]) else: trainingFile.write(data[j]) trainingFile.close() testFile.close() # RUN CLASSIFIER cmd=opt['script']+' --training '+trainingFileName+' --sample '+testFileName+' ' + opt['options'] + ' > '+outputFileName os.system(cmd) with open(outputFileName) as f: reader=csv.DictReader(f, delimiter='\t') for row in reader: tests+=1 print 'test ', tests, 'target:', row[ targetClass ], ', prediction: ', row['prediction'], if row[ targetClass ] != row['prediction']: errors+=1 print 'failure' else: print 'success' os.remove(trainingFileName) os.remove(testFileName) os.remove(outputFileName) print 'Tests:', tests, ', Errors:', errors, ', Error rate:', round(float(errors)/tests*100,1),'%'