Adds --short

cslarsen · cslarsen · commit 0f94726ea27c · 2019-02-17T17:00:09.000+01:00
diff --git a/wpm/commandline.py b/wpm/commandline.py
@@ -13,7 +13,9 @@
 
 import argparse
 import codecs
+import math
 import os
+import random
 import sys
 
 from wpm.convert import wpm_to_cpm
@@ -60,6 +62,9 @@ def parse_args():
     argp.add_argument("--search", default=None, type=str,
                       help="Put quotes/authors/titles matching case-insensitive text query first")
 
+    argp.add_argument("--short", default=False, action="store_true",
+                      help="Starts wpm with short texts")
+
     opts = argp.parse_args()
 
     if opts.version:
@@ -196,6 +201,36 @@ def search(quotes, query):
         if (query in text) or (query in author) or (query in title):
             yield quote.text_id
 
+
+def short_quotes_first(quotes, cutoff=0.2):
+    """Returns text IDs of all quotes with shorter ones first (but still
+    randomized)."""
+
+    cutoff = cutoff / 0.5  # find absolute cutoff percentage based on avg (0.5)
+    words = 0
+
+    def word_length(text):
+        return len(text.split(" "))
+
+    # Find average number of words first
+    for quote in iter(quotes):
+        quote = wpm.quotes.Quote.from_tuple(quote)
+        words += word_length(quote.text)
+
+    avg = words / len(quotes)
+    threshold = int(math.ceil(avg * cutoff))
+
+    # Put short quotes i a randomized, starting bucket
+    short = []
+    for quote in iter(quotes):
+        quote = wpm.quotes.Quote.from_tuple(quote)
+        if word_length(quote.text) < threshold:
+            short.append(quote.text_id)
+
+    random.shuffle(short)
+    return short
+
+
 def main():
     """Main entry point for command line invocation."""
     try:
@@ -219,12 +254,16 @@ def main():
             print_stats(stats, opts.cpm)
             return
 
+        text_ids = None
+
         if opts.search:
             text_ids = list(search(quotes, opts.search.lower()))
 
             if not text_ids:
                 print("No quotes matching %r" % opts.search)
                 sys.exit(1)
+        elif opts.short:
+            text_ids = short_quotes_first(quotes)
         elif opts.id is not None:
             text_ids = [opts.id]
         else:
diff --git a/wpm/quotes.py b/wpm/quotes.py
@@ -82,13 +82,17 @@ def put_to_front(self, text_ids):
         front = []
         back = []
 
+        # Calculate indices: Maps quote's text_id to internal index
+        tid = {}
         for index in range(len(self.quotes)):
             quote = self[index]
+            tid[quote.text_id] = index
 
-            if quote.text_id in text_ids:
-                front.append(index)
-            else:
-                back.append(index)
+        # Preserve text_ids order in front
+        front = [tid[i] for i in text_ids]
+        random.shuffle(front)
+        back = [tid[i] for i in (set(tid.keys()) - set(text_ids))]
+        # The whole quote indexes stuff badly needs rafactoring
 
         random.shuffle(back)
         self.indices = front + back