In [1]:
%pylab inline
from fatiando import gridder, utils
from fatiando import gravmag as gm
from fatiando.mesher import Prism, PrismMesh, vremove
from fatiando.vis import mpl, myv


Welcome to pylab, a matplotlib-based Python environment [backend: module://IPython.zmq.pylab.backend_inline].
For more information, type 'help(pylab)'.

In [2]:
model = [Prism(250, 750, 250, 750, 200, 700, {'density':1000})]
shape = (50, 50)
bounds = [0, 1000, 0, 1000, 0, 1000]
area = bounds[0:4]
xp, yp, zp = gridder.regular(area, shape, z=-1)
noise = 0.1 # 0.1 mGal noise
gz = utils.contaminate(gm.prism.gz(xp, yp, zp, model), 0.1)
gzz = utils.contaminate(gm.prism.gzz(xp, yp, zp, model), 0.01, percent=True)
mesh = PrismMesh(bounds, (10, 10, 10))
data = [gm.harvester.Gz(xp, yp, zp, gz), gm.harvester.Gzz(xp, yp, zp, gzz)]
seeds = gm.harvester.sow([[500, 500, 450, {'density':1000}]], mesh)

In [17]:
estimate, predicted = gm.harvester.harvest(data, seeds, mesh, 100, 0.001)
mesh.addprop('density', estimate['density'])

mpl.figure()
mpl.title("True: color | Inversion: contour")
mpl.axis('scaled')
levels = mpl.contourf(yp, xp, gz, shape, 12)
mpl.colorbar()
mpl.contour(yp, xp, predicted[0], shape, levels, color='k')
mpl.xlabel('Horizontal coordinate y (km)')
mpl.ylabel('Horizontal coordinate x (km)')
mpl.m2km()
residuals = gz - predicted[0]
mpl.figure()
mpl.title('Residuals: mean=%g stddev=%g' % (residuals.mean(), residuals.std()))
mpl.hist(residuals, bins=8)
mpl.xlabel('Residuals (mGal)')
mpl.ylabel('# of')

myv.figure()
myv.prisms(model, 'density', style='wireframe')
myv.prisms(vremove(0, 'density', mesh), 'density')
myv.axes(myv.outline(bounds), ranges=[i*0.001 for i in bounds], fmt='%.1f',
    nlabels=6)
myv.wall_bottom(bounds)
myv.wall_north(bounds)
myv.show()



In [4]:
%timeit -n 5 gm.harvester.harvest(data, seeds, mesh, 100, 0.001)


5 loops, best of 3: 2.87 s per loop

In [5]:
%prun -T gravmag_harvester.profile gm.harvester.harvest(data, seeds, mesh, 100, 0.001)
!cat gravmag_harvester.profile


 
*** Profile printout saved to text file u'gravmag_harvester.profile'. 
         431749 function calls in 3.071 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      259    0.880    0.003    0.882    0.003 _prism.py:210(gz)
      259    0.428    0.002    0.430    0.002 _prism.py:568(gzz)
    10078    0.425    0.000    0.897    0.000 harvester.py:402(_shapefunc)
    41680    0.379    0.000    0.670    0.000 linalg.py:1840(norm)
      132    0.268    0.002    1.727    0.013 harvester.py:379(_grow)
    41680    0.176    0.000    0.176    0.000 {method 'reduce' of 'numpy.ufunc' objects}
    32286    0.150    0.000    0.499    0.000 harvester.py:418(<genexpr>)
    20156    0.112    0.000    0.112    0.000 {method 'sum' of 'numpy.ndarray' objects}
    41680    0.044    0.000    0.044    0.000 {numpy.core.multiarray.array}
    10763    0.036    0.000    0.535    0.000 {sum}
    41680    0.031    0.000    0.074    0.000 numeric.py:167(asarray)
    41680    0.029    0.000    0.029    0.000 {method 'ravel' of 'numpy.ndarray' objects}
    20156    0.020    0.000    0.144    0.000 fromnumeric.py:1379(sum)
    31732    0.018    0.000    0.018    0.000 {zip}
    10762    0.013    0.000    0.552    0.000 harvester.py:413(_misfitfunc)
    20156    0.012    0.000    0.012    0.000 {isinstance}
    41680    0.012    0.000    0.012    0.000 {method 'conj' of 'numpy.ndarray' objects}
     1052    0.006    0.000    0.011    0.000 mesher.py:863(__getitem__)
      792    0.005    0.000    0.005    0.000 harvester.py:473(_is_neighbor)
        1    0.005    0.005    3.071    3.071 harvester.py:218(harvest)
     1052    0.003    0.000    0.004    0.000 mesher.py:418(__init__)
      518    0.002    0.000    1.314    0.003 harvester.py:584(effect)
      132    0.002    0.000    0.010    0.000 harvester.py:485(_neighbor_indexes)
    10094    0.002    0.000    0.002    0.000 {abs}
     3626    0.002    0.000    0.002    0.000 {range}
      132    0.001    0.000    1.332    0.010 harvester.py:421(_get_neighbors)
      518    0.001    0.000    0.001    0.000 {method 'fill' of 'numpy.ndarray' objects}
      518    0.001    0.000    0.001    0.000 {numpy.core.multiarray.empty_like}
      390    0.001    0.000    1.315    0.003 harvester.py:431(<genexpr>)
      258    0.001    0.000    1.313    0.005 harvester.py:437(_calc_effect)
      518    0.001    0.000    0.003    0.000 numeric.py:65(zeros_like)
     1052    0.001    0.000    0.001    0.000 mesher.py:47(__init__)
      258    0.001    0.000    0.001    0.000 harvester.py:445(_distance)
        1    0.000    0.000    3.071    3.071 <string>:1(<module>)
      516    0.000    0.000    0.000    0.000 harvester.py:453(_index2ijk)
      262    0.000    0.000    0.000    0.000 {method 'update' of 'dict' objects}
      560    0.000    0.000    0.000    0.000 harvester.py:463(_in_estimate)
     1052    0.000    0.000    0.000    0.000 mesher.py:881(<genexpr>)
      258    0.000    0.000    0.000    0.000 harvester.py:531(__init__)
      258    0.000    0.000    0.000    0.000 {math.sqrt}
      795    0.000    0.000    0.000    0.000 {method 'append' of 'list' objects}
        1    0.000    0.000    0.000    0.000 harvester.py:366(_fmt_estimate)
      131    0.000    0.000    0.000    0.000 {method 'pop' of 'dict' objects}
        1    0.000    0.000    0.006    0.006 harvester.py:354(_init_predicted)
      132    0.000    0.000    0.000    0.000 utils.py:315(__setitem__)
       13    0.000    0.000    0.000    0.000 __init__.py:1130(info)
       13    0.000    0.000    0.000    0.000 __init__.py:1332(isEnabledFor)
       13    0.000    0.000    0.000    0.000 __init__.py:1318(getEffectiveLevel)
        2    0.000    0.000    0.000    0.000 {numpy.core.multiarray.zeros}
        1    0.000    0.000    0.000    0.000 utils.py:327(sec2hms)
        1    0.000    0.000    0.000    0.000 utils.py:288(__init__)
        2    0.000    0.000    0.000    0.000 harvester.py:305(<genexpr>)
        2    0.000    0.000    0.000    0.000 {time.time}
        4    0.000    0.000    0.000    0.000 {len}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}

In [6]:
from line_profiler import LineProfiler

In [7]:
lp = LineProfiler(gm.harvester.harvest)
lp.runcall(gm.harvester.harvest, data, seeds, mesh, 100, 0.001)
lp.print_stats()


Timer unit: 1e-06 s

File: /home/leo/src/fatiando/fatiando/gravmag/harvester.py
Function: harvest at line 218
Total time: 3.37109 s

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   218                                           def harvest(data, seeds, mesh, compactness, threshold):
   219                                               """
   220                                               Run the inversion algorithm and produce an estimate physical property
   221                                               distribution (density and/or magnetization).
   222                                           
   223                                               Paramters:
   224                                           
   225                                               * data : list of data (e.g., :class:`~fatiando.gravmag.harvester.Gz`)
   226                                                   The data that will be inverted. Data used must match the physical
   227                                                   properties given to the seeds (e.g., gravity data requires seeds to have
   228                                                   ``'density'`` prop)
   229                                           
   230                                               * seeds : list of :class:`~fatiando.gravmag.harvester.Seed`
   231                                                   Lits of seeds used to start the growth process of the inversion. Use
   232                                                   :func:`~fatiando.gravmag.harvester.sow` to generate seeds.
   233                                           
   234                                               * mesh : :class:`fatiando.mesher.PrismMesh`
   235                                                   The mesh used in the inversion. Will estimate the physical property
   236                                                   distribution on this mesh
   237                                           
   238                                               * compactness : float
   239                                                   The compactness regularing parameter (i.e., how much should the estimate
   240                                                   be consentrated around the seeds). Must be positive. To find a good
   241                                                   value for this, start with a small value (like 0.001), run the inversion
   242                                                   and increase the value until desired compactness is achieved.
   243                                           
   244                                               * threshold : float
   245                                                   Control how much the solution can grow (usually downward). In order for
   246                                                   estimate to grow by the accretion of 1 prism, this prism must decrease
   247                                                   the data misfit measure by *threshold* decimal percent. Depends on the
   248                                                   size of the cells in the *mesh* and the distance from a cell to the
   249                                                   observations. Use values between 0.001 and 0.000001.
   250                                                   If cells are small and *threshold* is large (0.001), the seeds won't
   251                                                   grow. If cells are large and *threshold* is small (0.000001), the seeds
   252                                                   will grow too much.
   253                                           
   254                                               Returns:
   255                                           
   256                                               * estimate, predicted_data : a dict and a list
   257                                                   *estimate* is a dict like::
   258                                           
   259                                                       {'physical_property':array, ...}
   260                                           
   261                                                   *estimate* contains the estimates physical properties. The properties
   262                                                   present in *estimate* are the ones given to the seeds. Include the
   263                                                   properties in the *mesh* using::
   264                                           
   265                                                       mesh.addprop('density', estimate['density'])
   266                                           
   267                                                   This way you can plot the estimate using :mod:`fatiando.vis.myv`.
   268                                           
   269                                                   *predicted_data* is a list of numpy arrays with the predicted (model)
   270                                                   data. The list is in the same order as *data*. To plot a map of the fit
   271                                                   for visual inspection and a histogram of the residuals::
   272                                           
   273                                                       from fatiando.vis import mpl
   274                                                       mpl.figure()
   275                                                       # Plot the observed and predicted data as contours for visual
   276                                                       # inspection
   277                                                       mpl.subplot(1, 2, 1)
   278                                                       mpl.axis('scaled')
   279                                                       mpl.title('Observed and predicted data')
   280                                                       levels = mpl.contourf(x, y, gz, (ny, nx), 10)
   281                                                       mpl.colorbar()
   282                                                       # Assuming gz is the only data used
   283                                                       mpl.contour(x, y, predicted[0], (ny, nx), levels)
   284                                                       # Plot a histogram of the residuals
   285                                                       residuals = gz - predicted[0]
   286                                                       mpl.subplot(1, 2, 2)
   287                                                       mpl.title('Residuals')
   288                                                       mpl.hist(residuals, bins=10)
   289                                                       mpl.show()
   290                                                       # It's also good to see the mean and standard deviation of the
   291                                                       # residuals
   292                                                       print "Residuals mean:", residuals.mean()
   293                                                       print "Residuals stddev:", residuals.std()
   294                                           
   295                                           
   296                                               """
   297         1           48     48.0      0.0      log.info('Harvesting inversion results:')
   298         1            5      5.0      0.0      nseeds = len(seeds)
   299         1           23     23.0      0.0      log.info('  compactness: %g' % (compactness))
   300         1           17     17.0      0.0      log.info('  threshold: %g' % (threshold))
   301         1           24     24.0      0.0      log.info('  # of seeds: %d' % (nseeds))
   302         1           18     18.0      0.0      log.info('  # of data types: %d' % (len(data)))
   303         1            5      5.0      0.0      tstart = time.time()
   304                                               # Initialize the estimate with the seeds
   305         1           20     20.0      0.0      estimate = dict((s.i, s.props) for s in seeds)
   306                                               # Initialize the neighbors list
   307         1            5      5.0      0.0      neighbors = []
   308         2            6      3.0      0.0      for seed in seeds:
   309         1        34445  34445.0      1.0          neighbors.append(_get_neighbors(seed, neighbors, estimate, mesh, data))
   310                                               # Initialize the predicted data
   311         1         5655   5655.0      0.2      predicted = _init_predicted(data, seeds, mesh)
   312                                               # Start the goal function, data-misfit function and regularizing function
   313         1          218    218.0      0.0      totalgoal = _shapefunc(data, predicted)
   314         1           70     70.0      0.0      totalmisfit = _misfitfunc(data, predicted)
   315         1            2      2.0      0.0      regularizer = 0.
   316         1           17     17.0      0.0      log.info('  initial goal function: %g' % (totalgoal))
   317         1            7      7.0      0.0      log.info('  initial data misfit: %g' % (totalmisfit))
   318                                               # Weight the regularizing function by the mean extent of the mesh
   319         1            4      4.0      0.0      mu = compactness*1./(sum(mesh.shape)/3.)
   320                                               # Begin the growth process
   321         1            5      5.0      0.0      log.info('  Running...')
   322         1            1      1.0      0.0      accretions = 0
   323       132          223      1.7      0.0      for iteration in xrange(mesh.size - nseeds):
   324       132          213      1.6      0.0          grew = False # To check if at least one seed grew (stopping criterion)
   325       264          645      2.4      0.0          for s in xrange(nseeds):
   326       132          227      1.7      0.0              best, bestgoal, bestmisfit, bestregularizer = _grow(neighbors[s],
   327       132      1916007  14515.2     56.8                  data, predicted, totalmisfit, mu, regularizer, threshold)
   328                                                       # If there was a best, add to estimate, remove it, and add its
   329                                                       # neighbors
   330       132          364      2.8      0.0              if best is not None:
   331       131          274      2.1      0.0                  if best.i not in estimate:
   332       131          273      2.1      0.0                      estimate[best.i] = {}
   333       131          437      3.3      0.0                  estimate[best.i].update(best.props)
   334       131          254      1.9      0.0                  totalgoal = bestgoal
   335       131          219      1.7      0.0                  totalmisfit = bestmisfit
   336       131          214      1.6      0.0                  regularizer = bestregularizer
   337       393          944      2.4      0.0                  for p, e in zip(predicted, best.effect):
   338       262         4410     16.8      0.1                      p += e
   339       131          348      2.7      0.0                  neighbors[s].pop(best.i)
   340       131          226      1.7      0.0                  neighbors[s].update(
   341       131      1403638  10714.8     41.6                      _get_neighbors(best, neighbors, estimate, mesh, data))
   342       131          441      3.4      0.0                  del best
   343       131          241      1.8      0.0                  grew = True
   344       131          236      1.8      0.0                  accretions += 1
   345       132          209      1.6      0.0          if not grew:
   346         1            2      2.0      0.0              break
   347         1           20     20.0      0.0      log.info('  # of accretions: %d' % (accretions))
   348         1           10     10.0      0.0      log.info('  final goal function: %g' % (totalgoal))
   349         1            6      6.0      0.0      log.info('  final compactness regularizing function: %g' % (regularizer))
   350         1            7      7.0      0.0      log.info('  final data misfit: %g' % (totalmisfit))
   351         1           21     21.0      0.0      log.info('  time it took: %s' % (utils.sec2hms(time.time() - tstart)))
   352         1          384    384.0      0.0      return _fmt_estimate(estimate, mesh.size), predicted


In [8]:
lp = LineProfiler(gm.harvester._get_neighbors)
lp.runcall(gm.harvester.harvest, data, seeds, mesh, 100, 0.001)
lp.print_stats()


Timer unit: 1e-06 s

File: /home/leo/src/fatiando/fatiando/gravmag/harvester.py
Function: _get_neighbors at line 421
Total time: 1.40001 s

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   421                                           def _get_neighbors(cell, neighborhood, estimate, mesh, data):
   422                                               """
   423                                               Return a dict with the new neighbors of cell.
   424                                               keys are the index of the neighbors in the mesh. values are the Neighbor
   425                                               objects.
   426                                               """
   427       924        19939     21.6      1.4      indexes = [n for n in _neighbor_indexes(cell.i, mesh)
   428       792        26406     33.3      1.9                 if not _is_neighbor(n, cell.props, neighborhood)
   429       560         1322      2.4      0.1                    and not _in_estimate(n, cell.props, estimate)]
   430       132          122      0.9      0.0      neighbors = dict(
   431       132          151      1.1      0.0          (i, Neighbor(
   432                                                       i, cell.props, cell.seed, _distance(i, cell.seed, mesh),
   433                                                       _calc_effect(i, cell.props, mesh, data)))
   434       132      1351877  10241.5     96.6          for i in indexes)
   435       132          190      1.4      0.0      return neighbors


In [9]:
lp = LineProfiler(gm.harvester._grow)
lp.runcall(gm.harvester.harvest, data, seeds, mesh, 100, 0.001)
lp.print_stats()


Timer unit: 1e-06 s

File: /home/leo/src/fatiando/fatiando/gravmag/harvester.py
Function: _grow at line 379
Total time: 1.92835 s

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   379                                           def _grow(neighbors, data, predicted, totalmisfit, mu, regularizer, threshold):
   380                                               """
   381                                               Find the neighbor with smallest goal function that also decreases the
   382                                               misfit
   383                                               """
   384       132          164      1.2      0.0      best = None
   385       132          111      0.8      0.0      bestgoal = None
   386       132           95      0.7      0.0      bestmisfit = None
   387       132           87      0.7      0.0      bestregularizer = None
   388     10893         9262      0.9      0.5      for n in neighbors:
   389     32283       237948      7.4     12.3          pred = [p + e for p, e in zip(predicted, neighbors[n].effect)]
   390     10761       600750     55.8     31.2          misfit = _misfitfunc(data, pred)
   391     10761        13938      1.3      0.7          if (misfit < totalmisfit and
   392     10094        41899      4.2      2.2              float(abs(misfit - totalmisfit))/totalmisfit >= threshold):
   393     10077        11647      1.2      0.6              reg = regularizer + neighbors[n].distance
   394     10077       994925     98.7     51.6              goal = _shapefunc(data, pred) + mu*reg
   395     10077        15187      1.5      0.8              if bestgoal is None or goal < bestgoal:
   396       692          554      0.8      0.0                  bestgoal = goal
   397       692          586      0.8      0.0                  best = neighbors[n]
   398       692          530      0.8      0.0                  bestmisfit = misfit
   399       692          564      0.8      0.0                  bestregularizer = reg
   400       132          107      0.8      0.0      return best, bestgoal, bestmisfit, bestregularizer


In [ ]: