Examining SQL for Optimization


In [1]:
posts = Post.objects.all()

In [2]:
print(posts.query)


SELECT "blog_post"."id", "blog_post"."title", "blog_post"."slug", "blog_post"."author_id", "blog_post"."text", "blog_post"."pub_date" FROM "blog_post" ORDER BY "blog_post"."pub_date" DESC, "blog_post"."title" ASC

In [3]:
print(
    Post.objects
    .select_related('author')
    .query)


SELECT "blog_post"."id", "blog_post"."title", "blog_post"."slug", "blog_post"."author_id", "blog_post"."text", "blog_post"."pub_date", "user_user"."id", "user_user"."password", "user_user"."last_login", "user_user"."is_superuser", "user_user"."email", "user_user"."is_staff", "user_user"."is_active" FROM "blog_post" INNER JOIN "user_user" ON ( "blog_post"."author_id" = "user_user"."id" ) ORDER BY "blog_post"."pub_date" DESC, "blog_post"."title" ASC

Prefetch SQL


In [4]:
posts_startups = (
    Post.objects.prefetch_related(
        'startups',
    )
)

In [5]:
list(posts_startups) == list(posts)


Out[5]:
True

In [6]:
str(posts.query) == str(posts_startups.query)


Out[6]:
True

In [7]:
print(posts_startups.query)


SELECT "blog_post"."id", "blog_post"."title", "blog_post"."slug", "blog_post"."author_id", "blog_post"."text", "blog_post"."pub_date" FROM "blog_post" ORDER BY "blog_post"."pub_date" DESC, "blog_post"."title" ASC

In [8]:
from django.db import connection
from pprint import pprint

In [9]:
print(connection.queries[-1]['sql'])


QUERY = 'SELECT "blog_post"."id", "blog_post"."title", "blog_post"."slug", "blog_post"."author_id", "blog_post"."text", "blog_post"."pub_date" FROM "blog_post" ORDER BY "blog_post"."pub_date" DESC, "blog_post"."title" ASC' - PARAMS = ()

In [10]:
print(connection.queries[-2]['sql'])
# a prefetch causes a new query


QUERY = 'SELECT ("blog_post_startups"."post_id") AS "_prefetch_related_val_post_id", "organizer_startup"."id", "organizer_startup"."name", "organizer_startup"."slug", "organizer_startup"."description", "organizer_startup"."founded_date", "organizer_startup"."contact", "organizer_startup"."website" FROM "organizer_startup" INNER JOIN "blog_post_startups" ON ( "organizer_startup"."id" = "blog_post_startups"."startup_id" ) WHERE "blog_post_startups"."post_id" IN (%s, %s, %s, %s, %s, %s) ORDER BY "organizer_startup"."name" ASC' - PARAMS = (6, 5, 4, 3, 2, 1)

In [11]:
from django.db import reset_queries
reset_queries()

In [12]:
pprint(connection.queries)


[]

In [13]:
posts = Post.objects.prefetch_related('startups')

In [14]:
# the queryset never evaluated!
pprint(connection.queries)


[]

In [15]:
posts = list(posts)  # force evaluation

In [16]:
# two queries:
#     the first for Post,
#     the second for Startups related to the Post
pprint(connection.queries)


[{'sql': 'QUERY = \'SELECT "blog_post"."id", "blog_post"."title", '
         '"blog_post"."slug", "blog_post"."author_id", "blog_post"."text", '
         '"blog_post"."pub_date" FROM "blog_post" ORDER BY '
         '"blog_post"."pub_date" DESC, "blog_post"."title" ASC\' - PARAMS = '
         '()',
  'time': '0.000'},
 {'sql': 'QUERY = \'SELECT ("blog_post_startups"."post_id") AS '
         '"_prefetch_related_val_post_id", "organizer_startup"."id", '
         '"organizer_startup"."name", "organizer_startup"."slug", '
         '"organizer_startup"."description", '
         '"organizer_startup"."founded_date", '
         '"organizer_startup"."contact", "organizer_startup"."website" FROM '
         '"organizer_startup" INNER JOIN "blog_post_startups" ON ( '
         '"organizer_startup"."id" = "blog_post_startups"."startup_id" ) '
         'WHERE "blog_post_startups"."post_id" IN (%s, %s, %s, %s, %s, %s) '
         'ORDER BY "organizer_startup"."name" ASC\' - PARAMS = (6, 5, 4, 3, '
         '2, 1)',
  'time': '0.000'}]

In [17]:
reset_queries()
# three queries:
#     first for Post,
#     the second for Startups associated with those Posts
#     and then for the Tags associated with the Startups
posts = list(
    Post.objects.prefetch_related(
        'startups__tags'
    ),
)
posts_conn = connection.queries
pprint(posts_conn)


[{'sql': 'QUERY = \'SELECT "blog_post"."id", "blog_post"."title", '
         '"blog_post"."slug", "blog_post"."author_id", "blog_post"."text", '
         '"blog_post"."pub_date" FROM "blog_post" ORDER BY '
         '"blog_post"."pub_date" DESC, "blog_post"."title" ASC\' - PARAMS = '
         '()',
  'time': '0.000'},
 {'sql': 'QUERY = \'SELECT ("blog_post_startups"."post_id") AS '
         '"_prefetch_related_val_post_id", "organizer_startup"."id", '
         '"organizer_startup"."name", "organizer_startup"."slug", '
         '"organizer_startup"."description", '
         '"organizer_startup"."founded_date", '
         '"organizer_startup"."contact", "organizer_startup"."website" FROM '
         '"organizer_startup" INNER JOIN "blog_post_startups" ON ( '
         '"organizer_startup"."id" = "blog_post_startups"."startup_id" ) '
         'WHERE "blog_post_startups"."post_id" IN (%s, %s, %s, %s, %s, %s) '
         'ORDER BY "organizer_startup"."name" ASC\' - PARAMS = (6, 5, 4, 3, '
         '2, 1)',
  'time': '0.000'},
 {'sql': 'QUERY = \'SELECT ("organizer_startup_tags"."startup_id") AS '
         '"_prefetch_related_val_startup_id", "organizer_tag"."id", '
         '"organizer_tag"."name", "organizer_tag"."slug" FROM '
         '"organizer_tag" INNER JOIN "organizer_startup_tags" ON ( '
         '"organizer_tag"."id" = "organizer_startup_tags"."tag_id" ) WHERE '
         '"organizer_startup_tags"."startup_id" IN (%s, %s, %s) ORDER BY '
         '"organizer_tag"."name" ASC\' - PARAMS = (4, 4, 7)',
  'time': '0.000'}]

In [18]:
reset_queries()
# the following query is redundant
posts = list(
    Post.objects.prefetch_related(
        'startups',
        'startups__tags'
    ),
)
posts_conn == connection.queries


Out[18]:
True

Prefetch Objects


In [19]:
reset_queries()
posts = list(
    Post.objects.prefetch_related(
        Prefetch(
            'startups__tags',
        ),
    )
)
posts_conn == connection.queries


Out[19]:
True

In [20]:
posts = list(
    Post.objects.prefetch_related(
        Prefetch(
            'startups__tags',
            queryset=Tag.objects.all(),
            to_attr='cached_tags',
        ),
    )
)

In [21]:
# make sure none of the code calls the database!
reset_queries()

In [22]:
posts


Out[22]:
[<Post: New Django Version on 2020-05-15>,
 <Post: More Django Info on 2015-04-08>,
 <Post: Django 1.8 Release on 2015-04-01>,
 <Post: Django Training on 2013-01-18>,
 <Post: Simple Robots for Sale on 2011-02-21>,
 <Post: Django 1.0 Release on 2008-09-03>]

In [23]:
django_training = posts[3]

In [24]:
django_training.startups.all()


Out[24]:
[<Startup: JamBon Software>]

In [25]:
type(django_training.startups.all())


Out[25]:
django.db.models.query.QuerySet

In [26]:
jambon_software = django_training.startups.all()[0]
jambon_software.cached_tags


Out[26]:
[<Tag: Django>]

In [27]:
type(jambon_software.cached_tags)


Out[27]:
list

In [28]:
# no database calls!
pprint(connection.queries)


[]

In [29]:
jambon_software.tags.all()


Out[29]:
[<Tag: Django>]

In [30]:
pprint(connection.queries)


[{'sql': 'QUERY = \'SELECT "organizer_tag"."id", "organizer_tag"."name", '
         '"organizer_tag"."slug" FROM "organizer_tag" INNER JOIN '
         '"organizer_startup_tags" ON ( "organizer_tag"."id" = '
         '"organizer_startup_tags"."tag_id" ) WHERE '
         '"organizer_startup_tags"."startup_id" = %s ORDER BY '
         '"organizer_tag"."name" ASC LIMIT 21\' - PARAMS = (4,)',
  'time': '0.000'}]

In [31]:
reset_queries()
startups = list(
    Startup.objects.prefetch_related(
        Prefetch(
            'blog_posts',
            queryset=(
                Post.objects
                .select_related(
                    'author__profile')),
            to_attr='cached_posts',
        ),
        Prefetch(
            'cached_posts__tags',
            to_attr='cached_post_tags',
        ),
        Prefetch(
            'tags',
            to_attr='cached_tags',
        ),
        Prefetch(
            'cached_tags__startup_set',
        ),
    )
)
len(connection.queries)


Out[31]:
5

In [32]:
reset_queries()
startups


Out[32]:
[<Startup: Arachnobots>,
 <Startup: Boundless Software>,
 <Startup: Game Congress>,
 <Startup: JamBon Software>,
 <Startup: Lightning Rod Consulting>,
 <Startup: Monkey Software>,
 <Startup: Simple Robots>,
 <Startup: Thingies>]

In [33]:
jambon_software = startups[3]

In [34]:
jambon_software.cached_posts


Out[34]:
[<Post: More Django Info on 2015-04-08>, <Post: Django Training on 2013-01-18>]

In [35]:
django_training = jambon_software.cached_posts[0]
django_training.author


Out[35]:
<User: django@jambonsw.com>

In [36]:
django_training.author.profile.name


Out[36]:
'Andrew'

In [37]:
django_training.cached_post_tags


Out[37]:
[<Tag: Django>, <Tag: Web>]

In [38]:
jambon_software.cached_tags


Out[38]:
[<Tag: Django>]

In [39]:
django = jambon_software.cached_tags[0]
django.startup_set.all()


Out[39]:
[<Startup: JamBon Software>]

In [40]:
for startup in startups:
    print(
        '{} has the following competitors:'
        .format(startup))
    for tag in startup.cached_tags:
        for competitor in tag.startup_set.all():
            if competitor.pk != startup.pk:
                print(
                    '    {}'
                    .format(competitor))


Arachnobots has the following competitors:
    Simple Robots
Boundless Software has the following competitors:
    Lightning Rod Consulting
Game Congress has the following competitors:
    Monkey Software
JamBon Software has the following competitors:
Lightning Rod Consulting has the following competitors:
    Boundless Software
Monkey Software has the following competitors:
    Game Congress
Simple Robots has the following competitors:
    Arachnobots
    Thingies
Thingies has the following competitors:
    Simple Robots

In [41]:
pprint(connection.queries)


[]

Limiting Fields


In [42]:
list(Tag.objects.values('name', 'slug'))


Out[42]:
[{'name': 'augmented reality', 'slug': 'augmented-reality'},
 {'name': 'big data', 'slug': 'big-data'},
 {'name': 'django', 'slug': 'django'},
 {'name': 'education', 'slug': 'education'},
 {'name': 'ipython', 'slug': 'ipython'},
 {'name': 'javascript', 'slug': 'javascript'},
 {'name': 'jupyter', 'slug': 'jupyter'},
 {'name': 'mobile', 'slug': 'mobile'},
 {'name': 'node.js', 'slug': 'node-js'},
 {'name': 'php', 'slug': 'php'},
 {'name': 'python', 'slug': 'python'},
 {'name': 'ruby', 'slug': 'ruby'},
 {'name': 'ruby on rails', 'slug': 'ruby-on-rails'},
 {'name': 'video games', 'slug': 'video-games'},
 {'name': 'web', 'slug': 'web'},
 {'name': 'zend', 'slug': 'zend'}]

In [43]:
print(Tag.objects.values('name', 'slug').query)


SELECT "organizer_tag"."name", "organizer_tag"."slug" FROM "organizer_tag" ORDER BY "organizer_tag"."name" ASC

In [44]:
list(Tag.objects.values_list('name', flat=True))


Out[44]:
['augmented reality',
 'big data',
 'django',
 'education',
 'ipython',
 'javascript',
 'jupyter',
 'mobile',
 'node.js',
 'php',
 'python',
 'ruby',
 'ruby on rails',
 'video games',
 'web',
 'zend']

In [45]:
print(Tag.objects.values_list('name', flat=True).query)


SELECT "organizer_tag"."name" FROM "organizer_tag" ORDER BY "organizer_tag"."name" ASC

In [46]:
print(Tag.objects.all().query)


SELECT "organizer_tag"."id", "organizer_tag"."name", "organizer_tag"."slug" FROM "organizer_tag" ORDER BY "organizer_tag"."name" ASC

In [47]:
print(Tag.objects.defer('slug').query)


SELECT "organizer_tag"."id", "organizer_tag"."name" FROM "organizer_tag" ORDER BY "organizer_tag"."name" ASC

In [48]:
print(Tag.objects.defer('id').query)


SELECT "organizer_tag"."id", "organizer_tag"."name", "organizer_tag"."slug" FROM "organizer_tag" ORDER BY "organizer_tag"."name" ASC

In [49]:
from django.core.exceptions import FieldDoesNotExist
try:
    tags = list(Tag.objects.defer('pk'))
except FieldDoesNotExist as e:
    print(e)


Tag has no field named 'pk'

In [50]:
print(Tag.objects.only('name').query)


SELECT "organizer_tag"."id", "organizer_tag"."name" FROM "organizer_tag" ORDER BY "organizer_tag"."name" ASC

In [51]:
str(Tag.objects.defer('slug').query) == str(Tag.objects.only('name').query)


Out[51]:
True

In [52]:
print(Tag.objects.only('name').values().query)


SELECT "organizer_tag"."id", "organizer_tag"."name", "organizer_tag"."slug" FROM "organizer_tag" ORDER BY "organizer_tag"."name" ASC

In [53]:
try:
    tags = Tag.objects.values().only('name')
except NotImplementedError as e:
    print(e)


ValuesQuerySet does not implement only()