In [4]:
import re

In [3]:
s1="The Lazarus Project (2008)"
s2="The Cry (Il Grido) (1957)"
s3="A Hell of a Day (Reines d'un jour) (2001)"

In [79]:
def format(original_title_with_leading_article):
    articles = ["The","A","An"]
    
    # e.g.: "The Lazarus Project (2008)"
    pat_no_subtitle = r'^([^\(]+) (\(\d{4}\))$'
    
    # e.g. "A Hell of a Day (Reines d'un jour) (2001)"
    pat_with_subtitle = r'^([^\(]+) (\([^\(]+\)) (\(\d{4}\))$'
    
    pat_first_word_w_space = r'^(\w+ )(.+)$'
    
    for article in articles:
        if original_title_with_leading_article.startswith(article):
            
            # option 1 - no subtitles
            match = re.match(pat_no_subtitle,original_title_with_leading_article)
                
            if match:
                title = match.group(1)
                year = match.group(2)
                                
                first_word = re.match(pat_first_word_w_space,title).group(1)
                
                title_no_article = re.sub(pat_first_word_w_space,r'\2',title)
                
                title_article_in_front = title_no_article+", "+first_word
                
                formatted_title = title_article_in_front+year
                
                return '"'+formatted_title+'"'
                
            # option 2 - with subtitles
            match = re.match(pat_with_subtitle,original_title_with_leading_article)
            
            if match:
                main_title = match.group(1)
                subtitle = match.group(2)
                year = match.group(3)
                
                first_word = re.match(pat_first_word_w_space,main_title).group(1)
                
                main_title_no_article = re.sub(pat_first_word_w_space,r'\2',main_title)
                
                main_title_article_in_front = main_title_no_article+", "+first_word
                
                formatted_title = main_title_article_in_front+subtitle+" "+ year
                
                return '"'+formatted_title+'"'                
                
                
    # no match, return original
    return original_title_with_leading_article

In [81]:
format(s2)


Out[81]:
'"Cry, The (Il Grido) (1957)"'

In [ ]:


In [ ]: