#!/usr/bin/python import argparse import csv import tempfile import os # SCRIPT PARAMETERS parser = argparse.ArgumentParser(description='Cross validation script.') parser.add_argument('--training', required=True, help='File in CSV format containing training examples.') parser.add_argument('--target', required=True, help='Name of the column containing the class to predict.') parser.add_argument('--script', required=True, help='The script that performs classification.') opt = vars(parser.parse_args()) # LOAD KNOWN DATA with open(opt['training']) as f: data = f.readlines() header = data.pop(0) # ITERATE OVER DATA TO GENERATE TRAINING AND TEST SETS, AND RUN CASSIFIER trainingFileName = tempfile.mktemp() testFileName = tempfile.mktemp() outputFileName = tempfile.mktemp() tests=0 errors=0 for i in range(len(data)): trainingFile=open(trainingFileName, 'w') trainingFile.write(header) testFile=open(testFileName, 'w') testFile.write(header) for j in range(len(data)): if i==j: testFile.write(data[j]) else: trainingFile.write(data[j]) trainingFile.close() testFile.close() # RUN CLASSIFIER cmd=opt['script']+' --training '+trainingFileName+' --sample '+testFileName+' --target '+opt['target']+' > '+outputFileName #~ print cmd os.system(cmd) with open(outputFileName) as f: reader=csv.DictReader(f, delimiter=';') for row in reader: if row[ opt['target'] ] != row['prediction']: errors+=1 tests+=1 os.remove(trainingFileName) os.remove(testFileName) os.remove(outputFileName) print 'Tests:', tests, ', Errors:', errors, ', Error rate:', round(float(errors)/tests*100,1),'%'