This script will allow you to trim a folder(s) full of files to a fixed number of files, sampling randomly. My use case was to select a subsample of training images for CV research, but it could be used for whatever you like!
#!/usr/bin/python # Programmed by Alex Knaust 2013-03-12 import os, sys, random def sampleFiles(k, dirs): '''Deletes a random sample of files, leaving k remaining from each directory. The directories must have identical filenames and identical filecounts, otherwise we are in trouble ''' totalFiles = len(os.listdir(dirs[0])) if k >= totalFiles: print 'Not enough files to sample from' exit(1) # python magic at work stuff = [sorted([os.path.join(d, p) for p in os.listdir(d)]) for d in dirs] destroy = random.sample(zip(*stuff), totalFiles - k) return destroy if __name__ == '__main__': if len(sys.argv) < 3: print('Pass number of files to sample, and directories to sample from') sys.exit(1) k = int(sys.argv[1]) dirs = sys.argv[2:] destroy = sampleFiles(k, dirs) ans = raw_input('Will delete %d files, OK? y/n : ' % (len(destroy) * len(dirs))) if ans[0].lower() == 'n': print 'Goodbye' sys.exit(1) #do the actual deletion for files in destroy: for f in files: os.remove(f)