This repository was archived by the owner on Aug 3, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgithub_lang_analyzer.py
More file actions
153 lines (121 loc) · 4.78 KB
/
github_lang_analyzer.py
File metadata and controls
153 lines (121 loc) · 4.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import os
import sys
import colorlog
import matplotlib.pyplot as plt
import requests
GITHUB_API_URL = "https://api.github.com"
def setup_logger():
handler = colorlog.StreamHandler()
formatter = colorlog.ColoredFormatter(
"%(log_color)s[%(levelname)s]%(reset)s %(message)s",
log_colors={
"DEBUG": "cyan",
"INFO": "green",
"WARNING": "yellow",
"ERROR": "red",
"CRITICAL": "red,bg_white",
}
)
handler.setFormatter(formatter)
logger = colorlog.getLogger()
logger.addHandler(handler)
logger.setLevel(colorlog.INFO)
return logger
def get_github_token(logger):
token = os.environ.get("GITHUB_TOKEN", "").strip()
if token:
logger.debug("Using GitHub token from environment variable.")
return token
logger.warning("GitHub token not found in environment variable 'GITHUB_TOKEN'.")
token = input("Enter your GitHub personal access token (press Enter to continue unauthenticated): ").strip()
if not token:
logger.warning("Running unauthenticated. Limited to 60 API requests per hour.")
else:
logger.debug("Using GitHub token from user input.")
return token
def get_repos(session, username):
repos = []
page = 1
while True:
url = f"{GITHUB_API_URL}/users/{username}/repos"
params = {"per_page": 100, "page": page}
response = session.get(url, params=params)
if response.status_code == 404:
raise Exception(f"User '{username}' not found.")
elif response.status_code != 200:
raise Exception(f"Failed to fetch repos: {response.status_code} {response.text}")
data = response.json()
if not data:
break
repos.extend(data)
page += 1
return repos
def get_languages(session, owner, repo_name, logger):
url = f"{GITHUB_API_URL}/repos/{owner}/{repo_name}/languages"
response = session.get(url)
if response.status_code == 404:
logger.warning(f"Repo '{repo_name}' not found or inaccessible.")
return {}
elif response.status_code != 200:
raise Exception(f"Failed to fetch languages for repo '{repo_name}': {response.status_code} {response.text}")
return response.json()
def save_language_data_to_file(language_totals, username, logger):
filename = f"{username}_languages.txt"
try:
total_bytes = sum(language_totals.values())
with open(filename, "w", encoding="utf-8") as f:
f.write(f"Language usage for GitHub user: {username}\n")
f.write("=" * 40 + "\n")
for lang, count in sorted(language_totals.items(), key=lambda x: x[1], reverse=True):
f.write(f"{lang}: {count} bytes\n")
f.write("=" * 40 + "\n")
f.write(f"Total bytes: {total_bytes}\n")
logger.info(f"Language data saved to '{filename}'.")
except Exception as e:
logger.error(f"Failed to write language data to file: {e}")
def plot_language_usage(language_totals, username):
if not language_totals:
return
sorted_langs = sorted(language_totals.items(), key=lambda x: x[1], reverse=True)
langs, counts = zip(*sorted_langs)
plt.figure(figsize=(10, 6))
plt.bar(langs, counts, color='skyblue')
plt.xticks(rotation=45, ha='right')
plt.ylabel('Bytes of Code')
plt.title(f'Languages used by GitHub user: {username}')
plt.tight_layout()
plt.show()
def main():
logger = setup_logger()
token = get_github_token(logger)
headers = {"Authorization": f"token {token}"} if token else {}
session = requests.Session()
session.headers.update(headers)
username = input("Enter GitHub username: ").strip()
if not username:
logger.error("Username cannot be empty.")
sys.exit(1)
try:
logger.info(f"Fetching repositories for user '{username}'...")
repos = get_repos(session, username)
logger.info(f"Found {len(repos)} repositories.")
if not repos:
logger.info("User has no public repositories.")
return
language_totals = {}
for repo in repos:
repo_name = repo['name']
logger.info(f"Fetching languages for repo: {repo_name}...")
langs = get_languages(session, username, repo_name, logger)
for lang, bytes_count in langs.items():
language_totals[lang] = language_totals.get(lang, 0) + bytes_count
if not language_totals:
logger.info("No language data found in user's repositories.")
return
save_language_data_to_file(language_totals, username, logger)
plot_language_usage(language_totals, username)
except Exception as e:
logger.error(f"Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()