|
|
- #! /usr/bin/env python
-
- import math
- import random
- from optparse import OptionParser
-
- # minimum unit of transfer to RAID
- BLOCKSIZE = 4096
-
- def convert(size):
- length = len(size)
- lastchar = size[length-1]
- if (lastchar == 'k') or (lastchar == 'K'):
- m = 1024
- nsize = int(size[0:length-1]) * m
- elif (lastchar == 'm') or (lastchar == 'M'):
- m = 1024*1024
- nsize = int(size[0:length-1]) * m
- elif (lastchar == 'g') or (lastchar == 'G'):
- m = 1024*1024*1024
- nsize = int(size[0:length-1]) * m
- else:
- nsize = int(size)
- return nsize
-
- class disk:
- def __init__(self, seekTime=10, xferTime=0.1, queueLen=8):
- # these are both in milliseconds
- # seek is the time to seek (simple constant amount)
- # transfer is the time to read one block
- self.seekTime = seekTime
- self.xferTime = xferTime
-
- # length of scheduling queue
- self.queueLen = queueLen
-
- # current location: make it negative so that whatever
- # the first read is, it causes a seek
- self.currAddr = -10000
-
- # queue
- self.queue = []
-
- # disk geometry
- self.numTracks = 100
- self.blocksPerTrack = 100
- self.blocksPerDisk = self.numTracks * self.blocksPerTrack
-
- # stats
- self.countIO = 0
- self.countSeq = 0
- self.countNseq = 0
- self.countRand = 0
- self.utilTime = 0
-
- def stats(self):
- return (self.countIO, self.countSeq, self.countNseq, self.countRand, self.utilTime)
-
- def enqueue(self, addr):
- assert(addr < self.blocksPerDisk)
- self.countIO += 1
-
- # check if this is on the same track, or a different one
- currTrack = self.currAddr / self.numTracks
- newTrack = addr / self.numTracks
-
- # absolute diff
- diff = addr - self.currAddr
-
- # if on the same track...
- if currTrack == newTrack or diff < self.blocksPerTrack:
- if diff == 1:
- self.countSeq += 1
- else:
- self.countNseq += 1
- self.utilTime += (diff * self.xferTime)
- else:
- self.countRand += 1
- self.utilTime += (self.seekTime + self.xferTime)
- self.currAddr = addr
-
- def go(self):
- return self.utilTime
-
- class raid:
- def __init__(self, chunkSize='4k', numDisks=4, level=0, timing=False, reverse=False, solve=False, raid5type='LS'):
- chunkSize = int(convert(chunkSize))
- self.chunkSize = chunkSize / BLOCKSIZE
- self.numDisks = numDisks
- self.raidLevel = level
- self.timing = timing
- self.reverse = reverse
- self.solve = solve
- self.raid5type = raid5type
-
- if (chunkSize % BLOCKSIZE) != 0:
- print 'chunksize (%d) must be multiple of blocksize (%d): %d' % (chunkSize, BLOCKSIZE, self.chunkSize % BLOCKSIZE)
- exit(1)
- if self.raidLevel == 1 and numDisks % 2 != 0:
- print 'raid1: disks (%d) must be a multiple of two' % numDisks
- exit(1)
-
- if self.raidLevel == 4:
- self.blocksInStripe = (self.numDisks - 1) * self.chunkSize
- self.pdisk = self.numDisks - 1
- if self.raidLevel == 5:
- self.blocksInStripe = (self.numDisks - 1) * self.chunkSize
- self.pdisk = -1
-
- self.disks = []
- for i in range(self.numDisks):
- self.disks.append(disk())
-
- # print per-disk stats
- def stats(self, totalTime):
- for d in range(self.numDisks):
- s = self.disks[d].stats()
- if s[4] == totalTime:
- print 'disk:%d busy: %.2f I/Os: %5d (sequential:%d nearly:%d random:%d)' % (d, (100.0*float(s[4])/totalTime), s[0], s[1], s[2], s[3])
- elif s[4] == 0:
- print 'disk:%d busy: %.2f I/Os: %5d (sequential:%d nearly:%d random:%d)' % (d, (100.0*float(s[4])/totalTime), s[0], s[1], s[2], s[3])
- else:
- print 'disk:%d busy: %.2f I/Os: %5d (sequential:%d nearly:%d random:%d)' % (d, (100.0*float(s[4])/totalTime), s[0], s[1], s[2], s[3])
-
- # global enqueue function
- def enqueue(self, addr, size, isWrite):
- # should we print out the logical operation?
- if self.timing == False:
- if self.solve or self.reverse==False:
- if isWrite:
- print 'LOGICAL WRITE to addr:%d size:%d' % (addr, size * BLOCKSIZE)
- else:
- print 'LOGICAL READ from addr:%d size:%d' % (addr, size * BLOCKSIZE)
- if self.solve == False:
- print ' Physical reads/writes?\n'
- else:
- print 'LOGICAL OPERATION is ?'
-
- # should we print out the physical operations?
- if self.timing == False and (self.solve or self.reverse==True):
- self.printPhysical = True
- else:
- self.printPhysical = False
-
- if self.raidLevel == 0:
- self.enqueue0(addr, size, isWrite)
- elif self.raidLevel == 1:
- self.enqueue1(addr, size, isWrite)
- elif self.raidLevel == 4 or self.raidLevel == 5:
- self.enqueue45(addr, size, isWrite)
-
- # process disk workloads one at a time, returning final completion time
- def go(self):
- tmax = 0
- for d in range(self.numDisks):
- # print '**** disk ****', d
- t = self.disks[d].go()
- if t > tmax:
- tmax = t
- return tmax
-
- # helper functions
- def doSingleRead(self, disk, off, doNewline=False):
- if self.printPhysical:
- print ' read [disk %d, offset %d] ' % (disk, off),
- if doNewline:
- print ''
- self.disks[disk].enqueue(off)
-
- def doSingleWrite(self, disk, off, doNewline=False):
- if self.printPhysical:
- print ' write [disk %d, offset %d] ' % (disk, off),
- if doNewline:
- print ''
- self.disks[disk].enqueue(off)
-
- #
- # mapping for RAID 0 (striping)
- #
- def bmap0(self, bnum):
- cnum = bnum / self.chunkSize
- coff = bnum % self.chunkSize
- return (cnum % self.numDisks, (cnum / self.numDisks) * self.chunkSize + coff)
-
- def enqueue0(self, addr, size, isWrite):
- # can ignore isWrite, as I/O pattern is the same for striping
- for b in range(addr, addr+size):
- (disk, off) = self.bmap0(b)
- if isWrite:
- self.doSingleWrite(disk, off, True)
- else:
- self.doSingleRead(disk, off, True)
- if self.timing == False and self.printPhysical:
- print ''
-
- #
- # mapping for RAID 1 (mirroring)
- #
- def bmap1(self, bnum):
- cnum = bnum / self.chunkSize
- coff = bnum % self.chunkSize
- disk = 2 * (cnum % (self.numDisks / 2))
- return (disk, disk + 1, (cnum / (self.numDisks / 2)) * self.chunkSize + coff)
-
- def enqueue1(self, addr, size, isWrite):
- for b in range(addr, addr+size):
- (disk1, disk2, off) = self.bmap1(b)
- # print 'enqueue:', addr, size, '-->', m
- if isWrite:
- self.doSingleWrite(disk1, off, False)
- self.doSingleWrite(disk2, off, True)
- else:
- # the raid-1 read balancing algorithm is here;
- # could be something more intelligent --
- # instead, it is just based on the disk offset
- # to produce something easily reproducible
- if off % 2 == 0:
- self.doSingleRead(disk1, off, True)
- else:
- self.doSingleRead(disk2, off, True)
- if self.timing == False and self.printPhysical:
- print ''
-
- #
- # mapping for RAID 4 (parity disk)
- #
- # assumes (for now) that there is just one parity disk
- #
- def bmap4(self, bnum):
- cnum = bnum / self.chunkSize
- coff = bnum % self.chunkSize
- return (cnum % (self.numDisks - 1), (cnum / (self.numDisks - 1)) * self.chunkSize + coff)
-
- def pmap4(self, snum):
- return self.pdisk
-
- #
- # mapping for RAID 5 (rotated parity)
- #
- def __bmap5(self, bnum):
- cnum = bnum / self.chunkSize
- coff = bnum % self.chunkSize
- ddsk = cnum / (self.numDisks - 1)
- doff = (ddsk * self.chunkSize) + coff
- disk = cnum % (self.numDisks - 1)
- col = (ddsk % self.numDisks)
- pdsk = (self.numDisks - 1) - col
-
- # supports left-asymmetric and left-symmetric layouts
- if self.raid5type == 'LA':
- if disk >= pdisk:
- disk += 1
- elif self.raid5type == 'LS':
- disk = (disk - col) % (self.numDisks)
- else:
- print 'error: no such RAID scheme'
- exit(1)
- assert(disk != pdsk)
- return (disk, pdsk, doff)
-
- # yes this is lame (redundant call to __bmap5 is serious programmer laziness)
- def bmap5(self, bnum):
- (disk, pdisk, off) = self.__bmap5(bnum)
- return (disk, off)
-
- # this too is lame (redundant call to __bmap5 is serious programmer laziness)
- def pmap5(self, snum):
- (disk, pdisk, off) = self.__bmap5(snum * self.blocksInStripe)
- return pdisk
-
- # RAID 4/5 helper routine to write out some blocks in a stripe
- def doPartialWrite(self, stripe, begin, end, bmap, pmap):
- numWrites = end - begin
- pdisk = pmap(stripe)
- if (numWrites + 1) <= (self.blocksInStripe - numWrites):
- # SUBTRACTIVE PARITY
- # print 'SUBTRACTIVE'
- offList = []
- for voff in range(begin, end):
- (disk, off) = bmap(voff)
- self.doSingleRead(disk, off)
- if off not in offList:
- offList.append(off)
- for i in range(len(offList)):
- self.doSingleRead(pdisk, offList[i], i == (len(offList) - 1))
- else:
- # ADDITIVE PARITY
- # print 'ADDITIVE'
- stripeBegin = stripe * self.blocksInStripe
- stripeEnd = stripeBegin + self.blocksInStripe
- for voff in range(stripeBegin, begin):
- (disk, off) = bmap(voff)
- self.doSingleRead(disk, off, (voff == (begin - 1)) and (end == stripeEnd))
- for voff in range(end, stripeEnd):
- (disk, off) = bmap(voff)
- self.doSingleRead(disk, off, voff == (stripeEnd - 1))
-
- # WRITES: same for additive or subtractive parity
- offList = []
- for voff in range(begin, end):
- (disk, off) = bmap(voff)
- self.doSingleWrite(disk, off)
- if off not in offList:
- offList.append(off)
- for i in range(len(offList)):
- self.doSingleWrite(pdisk, offList[i], i == (len(offList) - 1))
-
- # RAID 4/5 enqueue routine
- def enqueue45(self, addr, size, isWrite):
- if self.raidLevel == 4:
- (bmap, pmap) = (self.bmap4, self.pmap4)
- elif self.raidLevel == 5:
- (bmap, pmap) = (self.bmap5, self.pmap5)
-
- if isWrite == False:
- for b in range(addr, addr+size):
- (disk, off) = bmap(b)
- self.doSingleRead(disk, off)
- else:
- # process the write request, one stripe at a time
- initStripe = (addr) / self.blocksInStripe
- finalStripe = (addr + size - 1) / self.blocksInStripe
-
- left = size
- begin = addr
- for stripe in range(initStripe, finalStripe + 1):
- endOfStripe = (stripe * self.blocksInStripe) + self.blocksInStripe
-
- if left >= self.blocksInStripe:
- end = begin + self.blocksInStripe
- else:
- end = begin + left
-
- if end >= endOfStripe:
- end = endOfStripe
-
- self.doPartialWrite(stripe, begin, end, bmap, pmap)
-
- left -= (end - begin)
- begin = end
-
- # for all cases, print this for pretty-ness in mapping mode
- if self.timing == False and self.printPhysical:
- print ''
-
- #
- # main program
- #
- parser = OptionParser()
-
- parser.add_option('-s', '--seed', default=0, help='the random seed', action='store', type='int', dest='seed')
- parser.add_option('-D', '--numDisks', default=4, help='number of disks in RAID', action='store', type='int', dest='numDisks')
- parser.add_option('-C', '--chunkSize', default='4k', help='chunk size of the RAID', action='store', type='string', dest='chunkSize')
- parser.add_option('-n', '--numRequests', default=10, help='number of requests to simulate', action='store', type='int', dest='numRequests')
- parser.add_option('-S', '--reqSize', default='4k', help='size of requests', action='store', type='string', dest='size')
- parser.add_option('-W', '--workload', default='rand', help='either "rand" or "seq" workloads', action='store', type='string', dest='workload')
- parser.add_option('-w', '--writeFrac', default=0, help='write fraction (100->all writes, 0->all reads)', action='store', type='int', dest='writeFrac')
- parser.add_option('-R', '--randRange', default=10000, help='range of requests (when using "rand" workload)', action='store', type='int', dest='range')
- parser.add_option('-L', '--level', default=0, help='RAID level (0, 1, 4, 5)', action='store', type='int', dest='level')
- parser.add_option('-5', '--raid5', default='LS', help='RAID-5 left-symmetric "LS" or left-asym "LA"', action='store', type='string', dest='raid5type')
- parser.add_option('-r', '--reverse', default=False, help='instead of showing logical ops, show physical', action='store_true', dest='reverse')
- parser.add_option('-t', '--timing', default=False, help='use timing mode, instead of mapping mode', action='store_true', dest='timing')
- parser.add_option('-c', '--compute', default=False, help='compute answers for me', action='store_true', dest='solve')
-
- (options, args) = parser.parse_args()
-
- print 'ARG blockSize', BLOCKSIZE
- print 'ARG seed', options.seed
- print 'ARG numDisks', options.numDisks
- print 'ARG chunkSize', options.chunkSize
- print 'ARG numRequests', options.numRequests
- print 'ARG reqSize', options.size
- print 'ARG workload', options.workload
- print 'ARG writeFrac', options.writeFrac
- print 'ARG randRange', options.range
- print 'ARG level', options.level
- print 'ARG raid5', options.raid5type
- print 'ARG reverse', options.reverse
- print 'ARG timing', options.timing
-
- print ''
-
- writeFrac = float(options.writeFrac) / 100.0
- assert(writeFrac >= 0.0 and writeFrac <= 1.0)
-
- random.seed(options.seed)
-
- size = convert(options.size)
- if size % BLOCKSIZE != 0:
- print 'error: request size (%d) must be a multiple of BLOCKSIZE (%d)' % (size, BLOCKSIZE)
- exit(1)
- size = size / BLOCKSIZE
-
- if options.workload == 'seq' or options.workload == 's' or options.workload == 'sequential':
- workloadIsSequential = True
- elif options.workload == 'rand' or options.workload == 'r' or options.workload == 'random':
- workloadIsSequential = False
- else:
- print 'error: workload must be either r/rand/random or s/seq/sequential'
- exit(1)
-
- assert(options.level == 0 or options.level == 1 or options.level == 4 or options.level == 5)
- if options.level != 0 and options.numDisks < 2:
- print 'RAID-4 and RAID-5 need more than 1 disk'
- exit(1)
-
- if options.level == 5 and options.raid5type != 'LA' and options.raid5type != 'LS':
- print 'Only two types of RAID-5 supported: left-asymmetric (LA) and left-symmetric (LS) (%s is not)' % options.raid5type
- exit(1)
-
- # instantiate RAID
- r = raid(chunkSize=options.chunkSize, numDisks=options.numDisks, level=options.level, timing=options.timing,
- reverse=options.reverse, solve=options.solve, raid5type=options.raid5type)
-
- # generate requests
- off = 0
- for i in range(options.numRequests):
- if workloadIsSequential == True:
- blk = off
- off += size
- else:
- blk = int(random.random() * options.range)
- if random.random() < writeFrac:
- r.enqueue(blk, size, True)
- else:
- r.enqueue(blk, size, False)
-
- # process requests
- t = r.go()
-
- # print out some final info, if needed
- if options.timing == False:
- print ''
- exit(0)
-
- if options.solve:
- print ''
- r.stats(t)
- print ''
- print 'STAT totalTime', t
- print ''
- else:
- print ''
- print 'Estimate how long the workload should take to complete.'
- print '- Roughly how many requests should each disk receive?'
- print '- How many requests are random, how many sequential?'
- print ''
|