1{ 2 "cells": [ 3 { 4 "cell_type": "code", 5 "execution_count": null, 6 "metadata": {}, 7 "outputs": [], 8 "source": [ 9 "import pandas as pd\n", 10 "import numpy as np\n", 11 "from pprint import pprint\n", 12 "from collections import Counter\n", 13 "import common\n", 14 "import math" 15 ] 16 }, 17 { 18 "cell_type": "code", 19 "execution_count": null, 20 "metadata": {}, 21 "outputs": [], 22 "source": [ 23 "commit_list_df = pd.read_csv(\"results/classifier/commitlist.csv\")\n", 24 "mean_authors=commit_list_df.query(\"category == 'Uncategorized' & topic != 'not user facing'\").author.to_list()\n", 25 "counts = Counter(mean_authors)\n", 26 "commit_list_df.head()" 27 ] 28 }, 29 { 30 "cell_type": "code", 31 "execution_count": null, 32 "metadata": {}, 33 "outputs": [], 34 "source": [ 35 "commit_list_df.category.describe()" 36 ] 37 }, 38 { 39 "cell_type": "code", 40 "execution_count": null, 41 "metadata": {}, 42 "outputs": [], 43 "source": [ 44 "# The number un categorized and no topic commits\n", 45 "no_category = commit_list_df.query(\"category == 'Uncategorized' & topic != 'not user facing'\")\n", 46 "print(len(no_category))" 47 ] 48 }, 49 { 50 "cell_type": "code", 51 "execution_count": null, 52 "metadata": {}, 53 "outputs": [], 54 "source": [ 55 "# check for cherry-picked commits\n", 56 "example_sha = '55c76baf579cb6593f87d1a23e9a49afeb55f15a'\n", 57 "commit_hashes = set(commit_list_df.commit_hash.to_list())\n", 58 "\n", 59 "example_sha[:11] in commit_hashes" 60 ] 61 }, 62 { 63 "cell_type": "code", 64 "execution_count": null, 65 "metadata": {}, 66 "outputs": [], 67 "source": [ 68 "# Get the difference between known categories and categories from commits\n", 69 "\n", 70 "diff_categories = set(commit_list_df.category.to_list()) - set(common.categories)\n", 71 "print(len(diff_categories))\n", 72 "pprint(diff_categories)" 73 ] 74 }, 75 { 76 "cell_type": "code", 77 "execution_count": null, 78 "metadata": {}, 79 "outputs": [], 80 "source": [ 81 "# Counts of categories\n" 82 ] 83 } 84 ], 85 "metadata": { 86 "kernelspec": { 87 "display_name": "Python 3", 88 "language": "python", 89 "name": "python3" 90 }, 91 "language_info": { 92 "codemirror_mode": { 93 "name": "ipython", 94 "version": 3 95 }, 96 "file_extension": ".py", 97 "mimetype": "text/x-python", 98 "name": "python", 99 "nbconvert_exporter": "python", 100 "pygments_lexer": "ipython3" 101 }, 102 "vscode": { 103 "interpreter": { 104 "hash": "a867c59af434d7534e61ccb37014830daefd5fcd3816cab68d595dde5e446f52" 105 } 106 } 107 }, 108 "nbformat": 4, 109 "nbformat_minor": 2 110} 111