From dc055d6f81251a2dd6a54ddd19036dc9cc2ee733 Mon Sep 17 00:00:00 2001 From: Mickey Gousset Date: Sun, 6 Apr 2025 20:16:45 -0500 Subject: [PATCH 1/2] checks commit sizes for entire repo and specific hash --- git/git-commit-analyzer.sh | 131 +++++++++++++++++++++ git/git-repo-commit-analyzer.sh | 195 ++++++++++++++++++++++++++++++++ 2 files changed, 326 insertions(+) create mode 100755 git/git-commit-analyzer.sh create mode 100755 git/git-repo-commit-analyzer.sh diff --git a/git/git-commit-analyzer.sh b/git/git-commit-analyzer.sh new file mode 100755 index 0000000..d323ff3 --- /dev/null +++ b/git/git-commit-analyzer.sh @@ -0,0 +1,131 @@ +#!/bin/bash + +# ----------------------------------------------------------------------------- +# Script Name: git-commit-analyzer.sh +# Description: This script analyzes the size of a specific Git commit. +# It calculates the total size of all files in the commit, the +# number of files modified, and provides a detailed breakdown of +# file sizes. The results are displayed in a human-readable format. +# +# Usage: ./git-commit-analyzer.sh +# +# Features: +# - Verifies if the script is run inside a valid Git repository. +# - Checks if the specified commit hash exists in the repository. +# - Calculates the total size of all files in the commit. +# - Displays the size of each file in the commit in a sorted, descending order. +# - Formats file sizes for readability (bytes, KB, MB). +# - Handles the initial commit by comparing it to an empty tree. +# +# Requirements: +# - Must be run from within a valid Git repository. +# - Requires a valid commit hash to be passed as an argument. +# +# Output: +# - A detailed breakdown of file sizes in the specified commit. +# - Total number of files and the total size of the commit. +# +# Example: +# ./git-commit-analyzer.sh abc1234 +# This will analyze the commit with hash `abc1234` and display the size of +# each file in the commit, along with the total size and file count. +# +# Author: Mickey Gousset (@mickeygousset) +# Date: 2025-04-05 +# ----------------------------------------------------------------------------- + +set -e + +if [ $# -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +COMMIT=$1 + +# Verify this is a valid git repository +if ! git rev-parse --is-inside-work-tree &>/dev/null; then + echo "Error: Not in a git repository" + exit 1 +fi + +# Verify the commit exists +if ! git cat-file -e "$COMMIT^{commit}" 2>/dev/null; then + echo "Error: Commit $COMMIT does not exist" + exit 1 +fi + +echo "Analyzing commit: $COMMIT" +echo "------------------------" + +# Get the parent commit +PARENT=$(git rev-parse "$COMMIT^" 2>/dev/null || echo "") + +# If there's no parent (first commit), we'll compare with empty tree +if [ -z "$PARENT" ]; then + PARENT=$(git hash-object -t tree /dev/null) + echo "This is the initial commit. Comparing with empty tree." +fi + +# Get the list of files changed in this commit +FILES=$(git diff-tree --no-commit-id --name-only -r "$COMMIT") + +# Create a temporary file to store file sizes for sorting +TEMP_FILE=$(mktemp) + +# Process each file +while IFS= read -r file; do + # Get the file blob from the commit + BLOB=$(git ls-tree -r "$COMMIT" -- "$file" 2>/dev/null | awk '{print $3}') + + if [ -n "$BLOB" ]; then + # Get the size of the blob + SIZE=$(git cat-file -s "$BLOB") + + # Add to temp file with size and filename + echo "$SIZE $file" >> "$TEMP_FILE" + fi +done <<< "$FILES" + +# Calculate total size and count +TOTAL_SIZE=0 +FILE_COUNT=0 + +# Print header +printf "%-60s %15s\n" "FILE" "SIZE" +printf "%-60s %15s\n" "----" "----" + +# Sort by size (numerically, descending) and display +while read -r SIZE file; do + TOTAL_SIZE=$((TOTAL_SIZE + SIZE)) + FILE_COUNT=$((FILE_COUNT + 1)) + + # Format size for human readability + if [ "$SIZE" -gt 1048576 ]; then + FORMATTED_SIZE=$(echo "scale=2; $SIZE/1048576" | bc)" MB" + elif [ "$SIZE" -gt 1024 ]; then + FORMATTED_SIZE=$(echo "scale=2; $SIZE/1024" | bc)" KB" + else + FORMATTED_SIZE="$SIZE bytes" + fi + + # Print file with size + printf "%-60s %15s\n" "$file" "$FORMATTED_SIZE" + +done < <(sort -nr "$TEMP_FILE") + +# Clean up temp file +rm "$TEMP_FILE" + +# Print total +if [ "$TOTAL_SIZE" -gt 1048576 ]; then + TOTAL_FORMATTED=$(echo "scale=2; $TOTAL_SIZE/1048576" | bc)" MB" +elif [ "$TOTAL_SIZE" -gt 1024 ]; then + TOTAL_FORMATTED=$(echo "scale=2; $TOTAL_SIZE/1024" | bc)" KB" +else + TOTAL_FORMATTED="$TOTAL_SIZE bytes" +fi + +echo "------------------------" +echo "Total files: $FILE_COUNT" +echo "Total size: $TOTAL_FORMATTED" \ No newline at end of file diff --git a/git/git-repo-commit-analyzer.sh b/git/git-repo-commit-analyzer.sh new file mode 100755 index 0000000..1eff16f --- /dev/null +++ b/git/git-repo-commit-analyzer.sh @@ -0,0 +1,195 @@ +#!/bin/bash + +# ----------------------------------------------------------------------------- +# Script Name: git-repo-commit-analyzer.sh +# Description: This script analyzes the size of all commits in a Git repository. +# It generates detailed logs, CSV reports, and exception logs for +# commits that meet or exceed a specified size threshold. +# +# Usage: ./git-repo-commit-analyzer.sh +# +# Features: +# - Analyzes all commits in the current Git repository. +# - Calculates the total size of each commit and the number of files it modifies. +# - Generates the following output files: +# 1. A log file with detailed analysis of all commits. +# 2. A CSV file summarizing commit hash, size, and file count. +# 3. An exceptions log file for commits exceeding the size threshold, +# including detailed file sizes for each commit. +# - Identifies and logs the largest commit in the repository. +# +# Requirements: +# - Must be run from within a valid Git repository. +# - Requires a size threshold (in bytes) to be passed as an argument. +# +# Output Files: +# - -analyzer-.log +# - -commits-size-.csv +# - -commit-size-exceptions-.log +# +# Example: +# ./git-repo-commit-analyzer.sh 100000 +# This will analyze all commits in the repository and log details for commits +# with a total size of 100,000 bytes or more. +# +# Author: Mickey Gousset (@mickeygousset) +# Date: 2025-04-05 +# ----------------------------------------------------------------------------- + +set -e + +# Verify this is a valid git repository +if ! git rev-parse --is-inside-work-tree &>/dev/null; then + echo "Error: Not in a git repository" + exit 1 +fi + +# Get the repository name +REPO_NAME=$(basename "$(git rev-parse --show-toplevel)") + +# Check if a size threshold is provided +if [ $# -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +SIZE_THRESHOLD=$1 + +# Generate timestamp for log and CSV filenames +TIMESTAMP=$(date +"%Y%m%d-%H%M%S") +LOG_FILE="${REPO_NAME}-analyzer-$TIMESTAMP.log" +CSV_FILE="${REPO_NAME}-commits-size-$TIMESTAMP.csv" +EXCEPTIONS_FILE="${REPO_NAME}-commit-size-exceptions-$TIMESTAMP.log" + +# Initialize variables to track the largest commit +LARGEST_COMMIT="" +LARGEST_SIZE=0 + +# Create the CSV file and add the header +echo "Commit Hash,Commit Size (bytes),Number of Files" > "$CSV_FILE" + +# Create the exceptions file +echo "Commits meeting or exceeding the size threshold ($SIZE_THRESHOLD bytes) in repository '$REPO_NAME':" > "$EXCEPTIONS_FILE" + +# Function to log output to both the screen and the log file +log() { + echo "$1" | tee -a "$LOG_FILE" +} + +# Function to log exceptions to the exceptions file +log_exception() { + echo "$1" | tee -a "$EXCEPTIONS_FILE" +} + +# Array to store commits that meet the size threshold +declare -a LARGE_COMMITS + +# Loop through all commits in the repository +for COMMIT in $(git rev-list --all); do + log "Analyzing commit: $COMMIT in repository '$REPO_NAME'" + log "------------------------" + + # Get the parent commit + PARENT=$(git rev-parse "$COMMIT^" 2>/dev/null || echo "") + + # If there's no parent (first commit), compare with empty tree + if [ -z "$PARENT" ]; then + PARENT=$(git hash-object -t tree /dev/null) + log "This is the initial commit. Comparing with empty tree." + fi + + # Get the list of files changed in this commit + FILES=$(git diff-tree --no-commit-id --name-only -r "$COMMIT" 2>/dev/null || echo "") + + if [ -z "$FILES" ]; then + log "No files changed in this commit." + continue + fi + + # Create a temporary file to store file sizes for sorting + TEMP_FILE=$(mktemp) + + # Process each file + while IFS= read -r file; do + # Get the file blob from the commit + BLOB=$(git ls-tree -r "$COMMIT" -- "$file" 2>/dev/null | awk '{print $3}') + + if [ -n "$BLOB" ]; then + # Get the size of the blob + SIZE=$(git cat-file -s "$BLOB" 2>/dev/null || echo "0") + + if [ "$SIZE" -gt 0 ]; then + # Add to temp file with size and filename + echo "$SIZE $file" >> "$TEMP_FILE" + fi + fi + done <<< "$FILES" + + # Calculate total size and count + TOTAL_SIZE=0 + FILE_COUNT=0 + + if [ -s "$TEMP_FILE" ]; then + # Sort by size (numerically, descending) and calculate totals + while read -r SIZE file; do + TOTAL_SIZE=$((TOTAL_SIZE + SIZE)) + FILE_COUNT=$((FILE_COUNT + 1)) + done < <(sort -nr "$TEMP_FILE") + fi + + # Clean up temp file + rm "$TEMP_FILE" + + # Log total for this commit + log "Total files: $FILE_COUNT" + log "Total size: $TOTAL_SIZE bytes" + log "------------------------" + + # Append commit details to the CSV file + echo "$COMMIT,$TOTAL_SIZE,$FILE_COUNT" >> "$CSV_FILE" + + # Check if this is the largest commit + if [ "$TOTAL_SIZE" -gt "$LARGEST_SIZE" ]; then + LARGEST_SIZE=$TOTAL_SIZE + LARGEST_COMMIT=$COMMIT + fi + + # Check if the commit meets the size threshold + if [ "$TOTAL_SIZE" -ge "$SIZE_THRESHOLD" ]; then + LARGE_COMMITS+=("$COMMIT ($TOTAL_SIZE bytes)") + + # Log details to the exceptions file + log_exception "Commit: $COMMIT" + log_exception "Total Size: $TOTAL_SIZE bytes" + log_exception "Files:" + + # Log each file and its size + while IFS= read -r file; do + BLOB=$(git ls-tree -r "$COMMIT" -- "$file" 2>/dev/null | awk '{print $3}') + if [ -n "$BLOB" ]; then + SIZE=$(git cat-file -s "$BLOB" 2>/dev/null || echo "0") + log_exception " $file: $SIZE bytes" + fi + done <<< "$FILES" + + log_exception "------------------------" + fi +done + +# Output the largest commit +log "Largest commit: $LARGEST_COMMIT" +log "Largest size: $LARGEST_SIZE bytes" + +# Output commits that meet the size threshold +if [ ${#LARGE_COMMITS[@]} -gt 0 ]; then + log "Commits meeting or exceeding the size threshold ($SIZE_THRESHOLD bytes):" + for COMMIT_INFO in "${LARGE_COMMITS[@]}"; do + log "$COMMIT_INFO" + done +else + log "No commits meet or exceed the size threshold ($SIZE_THRESHOLD bytes)." +fi + +log "Log file created: $LOG_FILE" +log "CSV file created: $CSV_FILE" +log "Exceptions file created: $EXCEPTIONS_FILE" \ No newline at end of file From ff2010d69d2090ed2f4ef2edd5a14165315be93b Mon Sep 17 00:00:00 2001 From: Mickey Gousset Date: Sun, 6 Apr 2025 20:26:51 -0500 Subject: [PATCH 2/2] feat: add descriptions for git-commit-analyzer.sh and git-repo-commit-analyzer.sh scripts --- git/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/git/README.md b/git/README.md index df75bdf..321a930 100644 --- a/git/README.md +++ b/git/README.md @@ -56,6 +56,10 @@ Amends the most recent commit by adding newly staged changes Amends the most recent commit's message +## git-commit-analyzer.sh + +This script analyzes the size of a specific Git commit + ## git-commit-empty-commit.sh Commits empty commit (useful for triggering CI builds) @@ -72,6 +76,10 @@ Finds the best common ancestor(s) between two commits (you can use it to compare Reorders the last 2 commits +## git-repo-commit-analyzer.sh + +This script analyzes the size of all commits in a Git repository + ## git-show-branch.sh Shows commits that are common for branches being compared