CG_StringBasics

``````

In [1]:

'A'

``````
``````

Out[1]:

'A'

``````
``````

In [2]:

'ACGT'

``````
``````

Out[2]:

'ACGT'

``````
``````

In [3]:

st = 'ACGT'

``````
``````

In [4]:

len(st) # getting the length of a string

``````
``````

Out[4]:

4

``````
``````

In [5]:

'' # empty string (epsilon)

``````
``````

Out[5]:

''

``````
``````

In [6]:

len('')

``````
``````

Out[6]:

0

``````
``````

In [7]:

import random
random.choice('ACGT') # generating a random nucleotide

``````
``````

Out[7]:

'C'

``````
``````

In [8]:

random.choice('ACGT') # repeated invocations might yield different nucleotides

``````
``````

Out[8]:

'C'

``````
``````

In [9]:

random.choice('ACGT') # repeated invocations might yield different nucleotides

``````
``````

Out[9]:

'T'

``````
``````

In [10]:

random.choice('ACGT') # repeated invocations might yield different nucleotides

``````
``````

Out[10]:

'A'

``````
``````

In [11]:

random.choice('ACGT') # repeated invocations might yield different nucleotides

``````
``````

Out[11]:

'G'

``````
``````

In [12]:

# now I'll make a random nucleotide string by concatenating random nucleotides
st = ''.join([random.choice('ACGT') for _ in range(40)])
st

``````
``````

Out[12]:

'GTCACATAATGCTGTAGATTAAGCGAGAAGACCTTAGGTC'

``````
``````

In [13]:

st[1:3] # substring, starting at position 1 and extending up to but not including position 3
# note that the first position is numbered 0

``````
``````

Out[13]:

'TC'

``````
``````

In [14]:

st[0:3] # prefix of length 3

``````
``````

Out[14]:

'GTC'

``````
``````

In [15]:

st[:3] # another way of getting the prefix of length 3

``````
``````

Out[15]:

'GTC'

``````
``````

In [16]:

st[len(st)-3:len(st)] # suffix of length 3

``````
``````

Out[16]:

'GTC'

``````
``````

In [17]:

st[-3:] # another way of getting the suffix of length 3

``````
``````

Out[17]:

'GTC'

``````
``````

In [18]:

st1, st2 = 'CAT', 'ATAC'

``````
``````

In [19]:

st1

``````
``````

Out[19]:

'CAT'

``````
``````

In [20]:

st2

``````
``````

Out[20]:

'ATAC'

``````
``````

In [21]:

st1 + st2 # concatenation of 2 strings

``````
``````

Out[21]:

'CATATAC'

``````