In [3]:
string = 'some text<a href="http://www.somesite.com/a/page"> <p> The red color. <br /> <img src="some/url/to/image" /> </p></a>some final text<ref href="http://www.somesite.com/a/page"> <p> The blue color. <br /> <img src="some/url/to/image" /> </p></ref>'

In [35]:
import re

TAG_REGEX = re.compile(r"<(\w+).+?/\1\s*>", flags=re.DOTALL)

def remove_html_tags(text):
    pos = 0
    start_pos = 0
    last_block_end = 0
    new_string = ""
    while pos < len(text):
        if text[pos] == "<":
            start_pos = pos
        elif text[pos:pos+2] == "/>":
            new_string += text[last_block_end:start_pos] + " "
            pos += 2
            last_block_end = pos
        pos += 1
    new_string += text[last_block_end:]
    text = new_string
    
    new_string = ""
    last_end = 0
    for match in TAG_REGEX.finditer(text):
        new_string += text[last_end:match.start(0)] + " " 
        last_end = match.end(0)
    new_string += text[last_end:]
    
    return new_string

In [36]:
result = remove_html_tags(string)

In [37]:
print result


some text some final text 

In [ ]: