In [ ]:
%install-location $cwd/swift-install
%install '.package(path: "$cwd/FastaiNotebook_05b_early_stopping")' FastaiNotebook_05b_early_stopping
In [ ]:
//export
import Path
import TensorFlow
import Python
In [ ]:
import FastaiNotebook_05b_early_stopping
In [ ]:
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")
Out[ ]:
In [ ]:
let data = mnistDataBunch(flat: false, bs: 512)
In [ ]:
let firstBatch = data.train.ds.first(where: { _ in true })!
let batchShape = firstBatch.xb.shape
let batchSize = batchShape.dimensions[0]
let exampleSideSize = batchShape.dimensions[1]
assert(exampleSideSize == batchShape.dimensions[2])
print("Batch size: \(batchSize)")
print("Example side size: \(exampleSideSize)")
let classCount = firstBatch.yb.shape.dimensions[0]
print("Class count: \(classCount)")
In [ ]:
firstBatch.xb.shape
Out[ ]:
In [ ]:
// export
extension Learner {
public class AddChannel: Delegate {
public override func batchWillStart(learner: Learner) {
learner.currentInput = learner.currentInput!.expandingShape(at: -1)
}
}
public func makeAddChannel() -> AddChannel { return AddChannel() }
}
In [ ]:
//export
public struct CnnModel: Layer {
public var convs: [FAConv2D<Float>]
public var pool = FAGlobalAvgPool2D<Float>()
public var linear: FADense<Float>
public init(channelIn: Int, nOut: Int, filters: [Int]){
let allFilters = [channelIn] + filters
convs = Array(0..<filters.count).map { i in
return FAConv2D(allFilters[i], allFilters[i+1], ks: 3, stride: 2)
}
linear = FADense<Float>(filters.last!, nOut)
}
@differentiable
public func callAsFunction(_ input: TF) -> TF {
return linear(pool(convs(input)))
}
}
In [ ]:
let model = CnnModel(channelIn: 1, nOut: 10, filters: [8, 16, 32, 32])
In [ ]:
// Test that data goes through the model as expected.
let predictions = model(firstBatch.xb.expandingShape(at: -1))
print(predictions.shape)
print(predictions[0])
In [ ]:
func optFunc(_ model: CnnModel) -> SGD<CnnModel> { return SGD(for: model, learningRate: 0.4)}
func modelInit() -> CnnModel { return CnnModel(channelIn: 1, nOut: 10, filters: [8, 16, 32, 32]) }
let learner = Learner(data: data, lossFunc: softmaxCrossEntropy, optFunc: optFunc, modelInit: modelInit)
let recorder = learner.makeDefaultDelegates(metrics: [accuracy])
learner.addDelegates([learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std),
learner.makeAddChannel()])
In [ ]:
// This happens on the GPU (if you have one and it's configured correctly).
// I tried this on a GCE 8vCPU 30GB + Tesla P100:
// - time: ~4.3s
// - nvidia-smi shows ~10% GPU-Util while this is running
time { try! learner.fit(1) }
In [ ]:
// This happens on the CPU.
// I tried this on a GCE 8vCPU 30GB + Tesla P100:
// - time: ~6.3s
// - nvidia-smi shows 0% GPU-Util while this is running
time {
withDevice(.cpu) { try! learner.fit(1) }
}
In [ ]:
class ActivationStatsHook {
var means: [Float] = []
var stds: [Float] = []
func update(_ act: TF) {
means.append(act.mean().scalarized())
stds.append (act.std() .scalarized())
}
}
In [ ]:
let learner = Learner(data: data, lossFunc: softmaxCrossEntropy, optFunc: optFunc, modelInit: modelInit)
let recorder = learner.makeDefaultDelegates(metrics: [accuracy])
learner.addDelegates([learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std),
learner.makeAddChannel()])
In [ ]:
var statHooks: [ActivationStatsHook] = (0..<learner.model.convs.count).map { i in
var stat = ActivationStatsHook()
learner.model.convs[i].addDelegate(stat.update)
return stat
}
In [ ]:
// This LayerDelegate stuff slows it down to ~6s/epoch.
time { try! learner.fit(2) }
In [ ]:
for stat in statHooks {
plt.plot(stat.means)
}
plt.legend(Array(1...statHooks.count))
plt.show()
Out[ ]:
In [ ]:
for stat in statHooks {
plt.plot(stat.stds)
}
plt.legend(Array(1...statHooks.count))
plt.show()
Out[ ]:
In [ ]:
import NotebookExport
let exporter = NotebookExport(Path.cwd/"06_cuda.ipynb")
print(exporter.export(usingPrefix: "FastaiNotebook_"))
In [ ]: