defparse_model(d, ch): """Parses a YOLOv5 model from a dict `d`, configuring layers based on input channels `ch` and model architecture.""" LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}{'module':<40}{'arguments':<30}") anchors, nc, gd, gw, act, ch_mul = ( d["anchors"], d["nc"], d["depth_multiple"], d["width_multiple"], d.get("activation"), d.get("channel_multiple"), ) if act: Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU() LOGGER.info(f"{colorstr('activation:')}{act}") # print ifnot ch_mul: ch_mul = 8 na = (len(anchors[0]) // 2) ifisinstance(anchors, list) else anchors # number of anchors no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out for i, (f, n, m, args) inenumerate(d["backbone"] + d["head"]): # from, number, module, args m = eval(m) ifisinstance(m, str) else m # eval strings for j, a inenumerate(args): with contextlib.suppress(NameError): args[j] = eval(a) ifisinstance(a, str) else a # eval strings
n = n_ = max(round(n * gd), 1) if n > 1else n # depth gain if m in { Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x, }: c1, c2 = ch[f], args[0] if c2 != no: # if not output, 保证输出通道数是ch_mul的倍数 c2 = make_divisible(c2 * gw, ch_mul)
args = [c1, c2, *args[1:]] if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}: args.insert(2, n) # number of repeats n = 1 elif m is nn.BatchNorm2d: args = [ch[f]] elif m is Concat: c2 = sum(ch[x] for x in f) # TODO: channel, gw, gd elif m in {Detect, Segment}: args.append([ch[x] for x in f]) ifisinstance(args[1], int): # number of anchors args[1] = [list(range(args[1] * 2))] * len(f) if m is Segment: args[3] = make_divisible(args[3] * gw, ch_mul) elif m is Contract: c2 = ch[f] * args[0] ** 2 elif m is Expand: c2 = ch[f] // args[0] ** 2 else: c2 = ch[f]
m_ = nn.Sequential(*(m(*args) for _ inrange(n))) if n > 1else m(*args) # module t = str(m)[8:-2].replace("__main__.", "") # module type np = sum(x.numel() for x in m_.parameters()) # number params m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params LOGGER.info(f"{i:>3}{str(f):>18}{n_:>3}{np:10.0f}{t:<40}{str(args):<30}") # print save.extend(x % i for x in ([f] ifisinstance(f, int) else f) if x != -1) # append to savelist layers.append(m_) if i == 0: ch = [] ch.append(c2) return nn.Sequential(*layers), sorted(save)
def_forward_once(self, x, profile=False, visualize=False): """Performs a forward pass on the YOLOv5 model, enabling profiling and feature visualization options.""" y, dt = [], [] # outputs for m in self.model: if m.f != -1: # if not from previous layer x = y[m.f] ifisinstance(m.f, int) else [x if j == -1else y[j] for j in m.f] # from earlier layers, 如果有多层就使用多层数据, 这样下面的时候就必须使用concat, m就必须是concat, 或者在这里就concat if profile: self._profile_one_layer(m, x, dt) x = m(x) # run y.append(x if m.i in self.save elseNone) # save output if visualize: feature_visualization(x, m.type, m.i, save_dir=visualize) return x
缓存
yolo提供了 cache这个选项, 有三种:
ram 在dataset里面一次将图片全部加载到内存中, 这种在使用的时候是最快的
disk 会在图片路径下创建同名的.npy文件, 需要的时候就是加载这个文件了, 这个文件是经过了处理图片后的numpy数组格式, 加载的时候更快