Source code for github_search_engine.github_search_engine

  1import inspect
  2import logging
  3import sys
  4from typing import List
  5from typing import Optional
  6
  7import chevron
  8from githubkit.versions.v2022_11_28.models import Issue
  9from qdrant_client import QdrantClient
 10from qdrant_client.http.models import QueryResponse
 11
 12from github_search_engine.clients.github_client_manager import (
 13  GithubClientManager,
 14)
 15from github_search_engine.clients.ollama_client_manager import (
 16  OllamaClientManager,
 17)
 18
 19
[docs] 20class GithubSearchEngine: 21 def __init__( 22 self, 23 github_access_token: str, 24 qdrant_location: Optional[str] = None, 25 qdrant_path: Optional[str] = None, 26 ): 27 logging.basicConfig(level=logging.WARNING) 28 29 self._github_client = GithubClientManager(access_token=github_access_token) 30 self._database_client = QdrantClient( 31 location=qdrant_location, 32 path=qdrant_path, 33 ) 34 self._ollama_client = OllamaClientManager() 35 36 self._database_client.set_model( 37 "snowflake/snowflake-arctic-embed-m", 38 providers=["CUDAExecutionProvider", "CPUExecutionProvider"], 39 ) 40 41 @staticmethod
[docs] 42 def summarise_issue(issue: Issue) -> str: 43 issue_summary = f""" 44 {issue.title} 45 46 {issue.body} 47 """ 48 return issue_summary
49
[docs] 50 def summarise_results( 51 self, 52 results: List[QueryResponse], 53 owner: str, 54 repository_name: str, 55 query: str, 56 ): 57 prompt_template = """ 58 Please briefly summarise the content and discussion of the following github issues. 59 Keep it short, concise and to the point and explain how it relates to '{{originalQuery}}' 60 Do not write headings or titles, simply summarize into a single 2-3 sentence paragraph. 61 62 # {{issue.title}} 63 {{issue.body}} 64 65 Comments: 66 {{#comments}} 67 * {{body}} 68 {{/comments}} 69 """ 70 prompt_template = inspect.cleandoc(prompt_template) 71 logging.info("Summarising issues") 72 summaries = [] 73 for issue in results: 74 summary = self._ollama_client.chat( 75 chevron.render( 76 template=prompt_template, 77 data={ 78 "issue": issue.metadata, 79 "comments": [ 80 {"body": comment.body} 81 for comment in self._github_client.get_issue_comments( 82 owner=owner, 83 repository_name=repository_name, 84 issue_number=issue.metadata["number"], 85 ) 86 ], 87 "originalQuery": query, 88 }, 89 ) 90 ) 91 summary = f""" 92 # Issue [#{issue.metadata["number"]}]({issue.metadata["html_url"]}) 93 94 {summary} 95 """ 96 summaries.append(inspect.cleandoc(summary)) 97 final_summary = "\n\n".join(summaries) 98 logging.info("Done") 99 return final_summary
100
[docs] 101 async def index_repository(self, owner: str, repository_name: str): 102 logging.info(f"Fetching Issues from {owner}/{repository_name}") 103 issues = await self._github_client.get_repository_issues( 104 owner, repository_name 105 ) 106 107 logging.info("Adding to Vector DB") 108 self._database_client.add( 109 collection_name=f"{owner}/{repository_name}", 110 documents=[self.summarise_issue(issue) for issue in issues], 111 metadata=[issue.model_dump() for issue in issues], 112 ) 113 logging.info("Done")
114
[docs] 115 def search( 116 self, owner: str, repository_name: str, text: str 117 ) -> List[QueryResponse]: 118 if not self._database_client.collection_exists( 119 collection_name=f"{owner}/{repository_name}", 120 ): 121 logging.error( 122 "DB Collection not found. Try indexing the repository first." 123 ) 124 sys.exit(1) 125 else: 126 results = self._database_client.query( 127 collection_name=f"{owner}/{repository_name}", 128 query_text=text, 129 score_threshold=0.8, 130 limit=5, 131 ) 132 133 # Filter empty issues 134 results = [result for result in results if result.metadata["body"]] 135 return results