From 4c59501a165cfa33c0abae1d6cc5cb1e2c80f518 Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Sun, 4 Mar 2018 16:48:54 -0800 Subject: [PATCH 01/15] Added logic to convert from SLOC -> Person Hours Uses the COCOMO II model available at: http://csse.usc.edu/tools/cocomoii.php --- scraper/code_gov/__init__.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/scraper/code_gov/__init__.py b/scraper/code_gov/__init__.py index f88220b..5f92489 100644 --- a/scraper/code_gov/__init__.py +++ b/scraper/code_gov/__init__.py @@ -3,9 +3,11 @@ import json import logging +import re import github3 import gitlab +import requests # import stashy logger = logging.getLogger(__name__) @@ -113,6 +115,28 @@ def _prune_dict_null_str(dictionary): return dictionary +def compute_labor_hours(sloc): + """ + Compute the labor hours, given a count of source lines of code + + The intention is to use the COCOMO II model to compute this value. + + References: + - http://csse.usc.edu/tools/cocomoii.php + - http://docs.python-guide.org/en/latest/scenarios/scrape/ + """ + # (40 Hours / week) * (52 weeks / year) / (12 months / year) ~= 173.33 + HOURS_PER_PERSON_MONTH = 40.0 * 52 / 12 + + cocomo_url = 'http://csse.usc.edu/tools/cocomoii.php' + page = requests.post(cocomo_url, data={'new_size': sloc}) + + EFFORT_REGEX = re.compile(r'Effort = ([\d\.]+) Person-months') + person_months = float(EFFORT_REGEX.search(page.text).group(1)) + + return person_months * HOURS_PER_PERSON_MONTH + + class CodeGovMetadata(dict): """ Defines the entire contents of a Code.gov 's code.json file From 0baf11034a63476bc1e93a6b1a7b046f287618dc Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Wed, 7 Mar 2018 17:51:51 -0800 Subject: [PATCH 02/15] Added function for cloning repo and running cloc against it --- scraper/code_gov/__init__.py | 77 +++++++++++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/scraper/code_gov/__init__.py b/scraper/code_gov/__init__.py index 5f92489..0a955ce 100644 --- a/scraper/code_gov/__init__.py +++ b/scraper/code_gov/__init__.py @@ -8,7 +8,8 @@ import github3 import gitlab import requests -# import stashy + +from scraper.util import execute logger = logging.getLogger(__name__) @@ -115,6 +116,80 @@ def _prune_dict_null_str(dictionary): return dictionary +def git_repo_to_sloc(url): + """ + Given a Git repository URL, returns number of lines of code based on cloc + + Reference: + - cloc: https://github.com/AlDanial/cloc + + Sample cloc output: + { + "header": { + "cloc_url": "github.com/AlDanial/cloc", + "cloc_version": "1.74", + "elapsed_seconds": 0.195950984954834, + "n_files": 27, + "n_lines": 2435, + "files_per_second": 137.78956000769, + "lines_per_second": 12426.5769858787 + }, + "C++": { + "nFiles": 7, + "blank": 121, + "comment": 314, + "code": 371 + }, + "C/C++ Header": { + "nFiles": 8, + "blank": 107, + "comment": 604, + "code": 191 + }, + "CMake": { + "nFiles": 11, + "blank": 49, + "comment": 465, + "code": 165 + }, + "Markdown": { + "nFiles": 1, + "blank": 18, + "comment": 0, + "code": 30 + }, + "SUM": { + "blank": 295, + "comment": 1383, + "code": 757, + "nFiles": 27 + } + } + """ + tmp_dir = 'tmp-clone' + + cmd = ['rm', '-rf', tmp_dir] + out, err = execute(cmd) + # print(out,err) + + cmd = ['git', 'clone', '--depth=1', url, tmp_dir] + out, err = execute(cmd) + # print(out,err) + + cmd = ['cloc', '--json', tmp_dir] + out, err = execute(cmd) + # print(out,err) + + cloc_json = json.loads(out[1:].replace('\\n', '').replace('\'', '')) + sloc = cloc_json['SUM']['code'] + + cmd = ['rm', '-rf', tmp_dir] + out, err = execute(cmd) + # print(out,err) + + return sloc + + def compute_labor_hours(sloc): """ Compute the labor hours, given a count of source lines of code From 8c7228f051c4633b9a053ee3e1f9eaf015b78dd5 Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Fri, 9 Mar 2018 14:42:42 -0800 Subject: [PATCH 03/15] Implemented better tempdir handling for git clones --- scraper/code_gov/__init__.py | 40 +++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/scraper/code_gov/__init__.py b/scraper/code_gov/__init__.py index 0a955ce..5f96f30 100644 --- a/scraper/code_gov/__init__.py +++ b/scraper/code_gov/__init__.py @@ -3,7 +3,9 @@ import json import logging +import os import re +import tempfile import github3 import gitlab @@ -166,26 +168,21 @@ def git_repo_to_sloc(url): } } """ - tmp_dir = 'tmp-clone' - cmd = ['rm', '-rf', tmp_dir] - out, err = execute(cmd) - # print(out,err) + with tempfile.TemporaryDirectory() as tmp_dir: + logger.debug('Cloning: url=%s tmp_dir=%s', url, tmp_dir) - cmd = ['git', 'clone', '--depth=1', url, tmp_dir] - out, err = execute(cmd) - # print(out,err) + tmp_clone = os.path.join(tmp_dir, 'clone-dir') - cmd = ['cloc', '--json', tmp_dir] - out, err = execute(cmd) - # print(out,err) + cmd = ['git', 'clone', '--depth=1', url, tmp_clone] + execute(cmd) - cloc_json = json.loads(out[1:].replace('\\n', '').replace('\'', '')) - sloc = cloc_json['SUM']['code'] + cmd = ['cloc', '--json', tmp_clone] + out, _ = execute(cmd) - cmd = ['rm', '-rf', tmp_dir] - out, err = execute(cmd) - # print(out,err) + cloc_json = json.loads(out[1:].replace('\\n', '').replace('\'', '')) + sloc = cloc_json['SUM']['code'] + logger.debug('SLOC: url=%s, sloc=%d', sloc) return sloc @@ -207,7 +204,11 @@ def compute_labor_hours(sloc): page = requests.post(cocomo_url, data={'new_size': sloc}) EFFORT_REGEX = re.compile(r'Effort = ([\d\.]+) Person-months') - person_months = float(EFFORT_REGEX.search(page.text).group(1)) + try: + person_months = float(EFFORT_REGEX.search(page.text).group(1)) + except AttributeError: + # If there is no match, and .search(..) returns None + person_months = 0 return person_months * HOURS_PER_PERSON_MONTH @@ -445,6 +446,13 @@ def from_github3(klass, repository, organization=None): _prune_dict_null_str(project) + sum_sloc = git_repo_to_sloc(project['repositoryURL']) + laborHours = compute_labor_hours(sum_sloc) + print('GitHub3: sum_sloc=%d' % sum_sloc) + print('GitHub3: laborHours=%d' % laborHours) + logger.info('GitHub3: sum_sloc=%d', sum_sloc) + logger.info('GitHub3: laborHours=%d', laborHours) + return project @classmethod From 1f6318643dbe403d12e69d4694b9aad585be3ecb Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Fri, 9 Mar 2018 16:14:16 -0800 Subject: [PATCH 04/15] Added error handling to JSON parsing --- scraper/code_gov/__init__.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/scraper/code_gov/__init__.py b/scraper/code_gov/__init__.py index 5f96f30..d50401d 100644 --- a/scraper/code_gov/__init__.py +++ b/scraper/code_gov/__init__.py @@ -180,9 +180,13 @@ def git_repo_to_sloc(url): cmd = ['cloc', '--json', tmp_clone] out, _ = execute(cmd) - cloc_json = json.loads(out[1:].replace('\\n', '').replace('\'', '')) - sloc = cloc_json['SUM']['code'] - logger.debug('SLOC: url=%s, sloc=%d', sloc) + try: + cloc_json = json.loads(out[1:].replace('\\n', '').replace('\'', '')) + sloc = cloc_json['SUM']['code'] + logger.debug('SLOC: url=%s, sloc=%d', sloc) + except json.decoder.JSONDecodeError: + logger.debug('Error Decoding: url=%s, out=%s', url, out) + sloc = 0 return sloc From 77b3677207663c4275b735f9031c73b0d084c859 Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Fri, 9 Mar 2018 16:37:13 -0800 Subject: [PATCH 05/15] Added prototype script to calculate laborHours from code.json file --- scripts/codegov_compute_hours.py | 29 +++++++++++++++++++++++++++++ setup.py | 1 + 2 files changed, 30 insertions(+) create mode 100755 scripts/codegov_compute_hours.py diff --git a/scripts/codegov_compute_hours.py b/scripts/codegov_compute_hours.py new file mode 100755 index 0000000..e7e4eef --- /dev/null +++ b/scripts/codegov_compute_hours.py @@ -0,0 +1,29 @@ +#! /usr/bin/env python3 + +import argparse +import json + +from scraper import code_gov + + +parser = argparse.ArgumentParser(description='Scrape code repositories for Code.gov / DOECode') +parser.add_argument('filename', type=str, help='Path to locally stored `code.json` file') +args = parser.parse_args() + +code_gov_json = json.load(open(args.filename)) +releases = code_gov_json['releases'] + +repo_urls = { + release['repositoryURL'].rstrip('/') + for release in releases + if release.get('vcs', '') == 'git' +} + +for url in repo_urls: + # print(url) + + sloc = code_gov.git_repo_to_sloc(url) + # print(sloc) + + hours = code_gov.compute_labor_hours(sloc) + print('-- url=%s, sloc=%d, hours=%d' % (url, sloc, hours)) diff --git a/setup.py b/setup.py index b2adfed..b1e75df 100644 --- a/setup.py +++ b/setup.py @@ -22,6 +22,7 @@ 'scraper = scraper.gen_code_gov_json:main', ] }, + scripts=['scripts/codegov_computer_hours.py'], classifiers=[ 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', From ad6e68a2fd1f6779c7aaf880ee789920cea0540d Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Mon, 12 Mar 2018 09:53:47 -0700 Subject: [PATCH 06/15] Added missing utility file --- scraper/util.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 scraper/util.py diff --git a/scraper/util.py b/scraper/util.py new file mode 100644 index 0000000..e04ea52 --- /dev/null +++ b/scraper/util.py @@ -0,0 +1,19 @@ +import logging +import os + +from subprocess import Popen, PIPE, STDOUT + +logger = logging.getLogger(__name__) + + +def execute(command, cwd=None): + logger.debug('Forking command: %s', command) + + if cwd is None: + cwd = os.getcwd() + elif not os.path.isdir(cwd): + raise ValueError('path does not exist: %s', cwd) + + process = Popen(command, cwd=cwd, stdout=PIPE, stderr=STDOUT) + out, err = process.communicate() + return str(out), str(err) From e4af298ae2f7ba379b6cc8f589cbccd6328fd07d Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Mon, 12 Mar 2018 10:00:14 -0700 Subject: [PATCH 07/15] Fixed typo in utility script reference in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b1e75df..e5f197a 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ 'scraper = scraper.gen_code_gov_json:main', ] }, - scripts=['scripts/codegov_computer_hours.py'], + scripts=['scripts/codegov_compute_hours.py'], classifiers=[ 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', From 6e0a4130df56f37fd7352210f138bf3b09d47657 Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Mon, 12 Mar 2018 13:14:44 -0700 Subject: [PATCH 08/15] Added #nosec mark for Popen --- scraper/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scraper/util.py b/scraper/util.py index e04ea52..1cda29e 100644 --- a/scraper/util.py +++ b/scraper/util.py @@ -1,7 +1,7 @@ import logging import os -from subprocess import Popen, PIPE, STDOUT +from subprocess import Popen, PIPE, STDOUT # nosec logger = logging.getLogger(__name__) @@ -14,6 +14,6 @@ def execute(command, cwd=None): elif not os.path.isdir(cwd): raise ValueError('path does not exist: %s', cwd) - process = Popen(command, cwd=cwd, stdout=PIPE, stderr=STDOUT) + process = Popen(command, cwd=cwd, stdout=PIPE, stderr=STDOUT) # nosec out, err = process.communicate() return str(out), str(err) From c5c5e2cf52d8e7574ebbfa9b02bca3d5e8ff26ca Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Mon, 12 Mar 2018 13:15:47 -0700 Subject: [PATCH 09/15] Explicitly disable shell on Popen --- scraper/util.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scraper/util.py b/scraper/util.py index 1cda29e..e1901b8 100644 --- a/scraper/util.py +++ b/scraper/util.py @@ -14,6 +14,11 @@ def execute(command, cwd=None): elif not os.path.isdir(cwd): raise ValueError('path does not exist: %s', cwd) - process = Popen(command, cwd=cwd, stdout=PIPE, stderr=STDOUT) # nosec + process = Popen( + command, + cwd=cwd, + stdout=PIPE, + stderr=STDOUT, + shell=False) # nosec out, err = process.communicate() return str(out), str(err) From 9fbdd41bccba6783918e2528e99635ff8a328a75 Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Fri, 16 Mar 2018 11:56:32 -0700 Subject: [PATCH 10/15] Updated logging level of a couple messges --- scraper/gen_code_gov_json.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scraper/gen_code_gov_json.py b/scraper/gen_code_gov_json.py index 7d5e9e0..442fd9c 100755 --- a/scraper/gen_code_gov_json.py +++ b/scraper/gen_code_gov_json.py @@ -56,14 +56,14 @@ def _check_api_limits(min_requests_remaining=250, sleep_time=15): api_remaining = api_rates['rate']['remaining'] api_reset = api_rates['rate']['reset'] - logger.info('Rate Limit - %d requests remaining', api_remaining) + logger.debug('Rate Limit - %d requests remaining', api_remaining) if api_remaining > min_requests_remaining: return now_time = time.time() time_to_reset = int(api_reset - now_time) - logger.info('Rate Limit - Need to sleep for %d seconds', time_to_reset) + logger.warn('Rate Limit Depleted - Sleeping for %d seconds', time_to_reset) while now_time < api_reset: time.sleep(10) From e7d877ecbea5d0264615af16500f07de0aca007d Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Fri, 16 Mar 2018 11:57:08 -0700 Subject: [PATCH 11/15] Enabled actual calculation and storage of laborHours for public GitHub repos --- scraper/code_gov/__init__.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/scraper/code_gov/__init__.py b/scraper/code_gov/__init__.py index d50401d..7fd4687 100644 --- a/scraper/code_gov/__init__.py +++ b/scraper/code_gov/__init__.py @@ -401,8 +401,13 @@ def from_github3(klass, repository, organization=None): project['permissions']['licenses'] = None project['permissions']['usageType'] = 'openSource' - # TODO: Compute from git repo - project['laborHours'] = 0 + sum_sloc = git_repo_to_sloc(project['repositoryURL']) + logger.debug('GitHub3: sum_sloc=%d', sum_sloc) + + laborHours = compute_labor_hours(sum_sloc) + logger.debug('GitHub3: laborHours=%d', laborHours) + + project['laborHours'] = laborHours # TODO: Compute from GitHub project['tags'] = ['github'] @@ -450,13 +455,6 @@ def from_github3(klass, repository, organization=None): _prune_dict_null_str(project) - sum_sloc = git_repo_to_sloc(project['repositoryURL']) - laborHours = compute_labor_hours(sum_sloc) - print('GitHub3: sum_sloc=%d' % sum_sloc) - print('GitHub3: laborHours=%d' % laborHours) - logger.info('GitHub3: sum_sloc=%d', sum_sloc) - logger.info('GitHub3: laborHours=%d', laborHours) - return project @classmethod From febbc1e501067eff63d3a65927d738e5687489f8 Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Sat, 17 Mar 2018 07:37:00 -0700 Subject: [PATCH 12/15] Refactored logging messages into relevant functions --- scraper/code_gov/__init__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scraper/code_gov/__init__.py b/scraper/code_gov/__init__.py index 7fd4687..fb86468 100644 --- a/scraper/code_gov/__init__.py +++ b/scraper/code_gov/__init__.py @@ -183,11 +183,12 @@ def git_repo_to_sloc(url): try: cloc_json = json.loads(out[1:].replace('\\n', '').replace('\'', '')) sloc = cloc_json['SUM']['code'] - logger.debug('SLOC: url=%s, sloc=%d', sloc) except json.decoder.JSONDecodeError: logger.debug('Error Decoding: url=%s, out=%s', url, out) sloc = 0 + logger.debug('SLOC: url=%s, sloc=%d', sloc) + return sloc @@ -214,7 +215,10 @@ def compute_labor_hours(sloc): # If there is no match, and .search(..) returns None person_months = 0 - return person_months * HOURS_PER_PERSON_MONTH + labor_hours = person_months * HOURS_PER_PERSON_MONTH + logger.debug('sloc=%d labor_hours=%d', sloc, labor_hours) + + return labor_hours class CodeGovMetadata(dict): @@ -402,11 +406,7 @@ def from_github3(klass, repository, organization=None): project['permissions']['usageType'] = 'openSource' sum_sloc = git_repo_to_sloc(project['repositoryURL']) - logger.debug('GitHub3: sum_sloc=%d', sum_sloc) - laborHours = compute_labor_hours(sum_sloc) - logger.debug('GitHub3: laborHours=%d', laborHours) - project['laborHours'] = laborHours # TODO: Compute from GitHub From c5be59fa333cd2d615635bdebeafd2c9655ac157 Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Sat, 17 Mar 2018 07:37:48 -0700 Subject: [PATCH 13/15] Moved Effort regex compilation to global scope. --- scraper/code_gov/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scraper/code_gov/__init__.py b/scraper/code_gov/__init__.py index fb86468..4a83f4e 100644 --- a/scraper/code_gov/__init__.py +++ b/scraper/code_gov/__init__.py @@ -15,6 +15,8 @@ logger = logging.getLogger(__name__) +EFFORT_REGEX = re.compile(r'Effort = ([\d\.]+) Person-months') + DOE_LAB_MAPPING = { 'AMES': 'Ames Laboratory (AMES)', 'ANL': 'Argonne National Laboratory (ANL)', @@ -208,7 +210,6 @@ def compute_labor_hours(sloc): cocomo_url = 'http://csse.usc.edu/tools/cocomoii.php' page = requests.post(cocomo_url, data={'new_size': sloc}) - EFFORT_REGEX = re.compile(r'Effort = ([\d\.]+) Person-months') try: person_months = float(EFFORT_REGEX.search(page.text).group(1)) except AttributeError: From 3bf00ac088c4fc2bde273e17797203ff9b5d0a2b Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Sat, 17 Mar 2018 07:45:28 -0700 Subject: [PATCH 14/15] Moved US government org calculation to separate module --- scraper/gen_code_gov_json.py | 21 ++------------------- scraper/github.py | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 19 deletions(-) create mode 100644 scraper/github.py diff --git a/scraper/gen_code_gov_json.py b/scraper/gen_code_gov_json.py index 442fd9c..5d2bf98 100755 --- a/scraper/gen_code_gov_json.py +++ b/scraper/gen_code_gov_json.py @@ -10,10 +10,10 @@ import github3 import stashy -import requests from scraper.code_gov import CodeGovMetadata, CodeGovProject from scraper.code_gov.doe import to_doe_csv +from scraper.github import gov_orgs logger = logging.getLogger(__name__) @@ -136,23 +136,6 @@ def process_doecode(doecode_json_filename): return projects -def government_at_github(): - """ - Returns a list of US Government GitHub orgs - - Based on: https://government.github.com/community/ - """ - us_gov_github_orgs = set() - - gov_orgs = requests.get('https://government.github.com/organizations.json').json() - - us_gov_github_orgs.update(gov_orgs['governments']['U.S. Federal']) - us_gov_github_orgs.update(gov_orgs['governments']['U.S. Military and Intelligence']) - us_gov_github_orgs.update(gov_orgs['research']['U.S. Research Labs']) - - return list(us_gov_github_orgs) - - def main(): parser = argparse.ArgumentParser(description='Scrape code repositories for Code.gov / DOECode') @@ -205,7 +188,7 @@ def main(): logger.debug('GitHub.com Organizations: %s', github_orgs) if args.github_gov_orgs: - github_orgs.extend(government_at_github()) + github_orgs.extend(gov_orgs()) github_repos = config_json.get('github_repos', []) github_repos.extend(args.github_repos) diff --git a/scraper/github.py b/scraper/github.py new file mode 100644 index 0000000..5051f4a --- /dev/null +++ b/scraper/github.py @@ -0,0 +1,21 @@ +#! /usr/bin/env python +# -*- coding: UTF-8 -*- + +import requests + + +def gov_orgs(): + """ + Returns a list of US Government GitHub orgs + + Based on: https://government.github.com/community/ + """ + us_gov_github_orgs = set() + + gov_orgs = requests.get('https://government.github.com/organizations.json').json() + + us_gov_github_orgs.update(gov_orgs['governments']['U.S. Federal']) + us_gov_github_orgs.update(gov_orgs['governments']['U.S. Military and Intelligence']) + us_gov_github_orgs.update(gov_orgs['research']['U.S. Research Labs']) + + return list(us_gov_github_orgs) From 1797a9190091afd00c8ac5ad97e06b9abe75ccc3 Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Sat, 17 Mar 2018 07:48:11 -0700 Subject: [PATCH 15/15] Update API limit check to take session as input --- scraper/gen_code_gov_json.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scraper/gen_code_gov_json.py b/scraper/gen_code_gov_json.py index 5d2bf98..fcbe532 100755 --- a/scraper/gen_code_gov_json.py +++ b/scraper/gen_code_gov_json.py @@ -42,7 +42,7 @@ def _configure_logging(verbose=False): logger.addHandler(handler) -def _check_api_limits(min_requests_remaining=250, sleep_time=15): +def _check_api_limits(gh_session, min_requests_remaining=250, sleep_time=15): """ Simplified check for API limits @@ -52,7 +52,7 @@ def _check_api_limits(min_requests_remaining=250, sleep_time=15): See: https://developer.github.com/v3/#rate-limiting """ - api_rates = gh.rate_limit() + api_rates = gh_session.rate_limit() api_remaining = api_rates['rate']['remaining'] api_reset = api_rates['rate']['reset'] @@ -83,7 +83,7 @@ def process_organization(org_name): WIGGLE_ROOM = 100 num_requests_needed = 2 * num_repos + WIGGLE_ROOM - _check_api_limits(min_requests_remaining=num_requests_needed) + _check_api_limits(gh, min_requests_remaining=num_requests_needed) logger.info('Processing GitHub Org: %s (%d public repos)', org_name, num_repos)