In [1]:
    
# fairing:include-cell
import sys
sys.path.append("/py")
from label_microservice.repo_config import RepoConfig
from code_intelligence.embeddings import pass_through
from code_intelligence.embeddings import load_model_artifact
from code_intelligence.embeddings import get_all_issue_text
import dill as dpickle
import os
import yaml
import logging
from google.cloud import storage
    
In [2]:
    
# fairing:include-cell
class IssuesLoader(object):
    def __init__(self, owner=None, repo=None):
        self.load_yaml(owner, repo)
    def load_yaml(self, owner, repo):
        config = RepoConfig(owner, repo)
        self.repo_owner = config.repo_owner
        self.repo_name = config.repo_name
        self.bucket_name = config.embeddings_bucket_name
        self.embeddings_file = config.embeddings_local_path
        self.embeddings_dest = config.embeddings_gcs_path
    def load_lang_model(self):
        model_url = 'https://storage.googleapis.com/issue_label_bot/model/lang_model/models_22zkdqlr/trained_model_22zkdqlr.pkl'
        return load_model_artifact(model_url)
    def save_issue_embeddings(self):
        # check whether embeddings exist in gcs
        if self.check_embeddings_in_gcs():
            return
        inference_wrapper = self.load_lang_model()
        data = get_all_issue_text(owner=self.repo_owner, repo=self.repo_name,
                                  inf_wrapper=inference_wrapper)
        with open(self.embeddings_file, 'wb') as f:
            dpickle.dump(data, f)
        self.upload_embeddings_to_gcs()
    def check_embeddings_in_gcs(self):
        storage_client = storage.Client()
        bucket = storage_client.get_bucket(self.bucket_name)
        return storage.Blob(bucket=bucket, name=self.embeddings_dest).exists(storage_client)
    def upload_embeddings_to_gcs(self):
        storage_client = storage.Client()
        bucket = storage_client.get_bucket(self.bucket_name)
        blob = bucket.blob(self.embeddings_dest)
        blob.upload_from_filename(self.embeddings_file)
    
In [3]:
    
ldr = IssuesLoader()
    
In [4]:
    
ldr.save_issue_embeddings()
    
    
In [5]:
    
from fairing.preprocessors.converted_notebook import ConvertNotebookPreprocessorWithFire
    
In [6]:
    
preprocessor = ConvertNotebookPreprocessorWithFire('IssuesLoader')
if not preprocessor.input_files:
    preprocessor.input_files = set()
input_files = ['embeddings.py', 'inference.py', 'repo_config.py']
preprocessor.input_files =  set([os.path.normpath(f) for f in input_files])
preprocessor.preprocess()
    
    Out[6]:
In [ ]: