mirror of
https://github.com/leisurelicht/wtfpython-cn.git
synced 2025-03-06 16:50:09 +08:00
396 lines
14 KiB
Python
396 lines
14 KiB
Python
"""
|
||
An inefficient monolithic piece of code that'll generate jupyter notebook
|
||
from the projects main README.
|
||
|
||
PS: If you are a recruiter, please don't judge me by this piece of code. I wrote it
|
||
in hurry. I know this is messy and can be simplified, but I don't want to change it
|
||
much because it just works.
|
||
|
||
Simplifictions and improvements through patches are more than welcome however :)
|
||
|
||
|
||
#TODOs
|
||
|
||
- CLI arguments for running this thing
|
||
- Add it to prepush hook
|
||
- Add support for skip comments, to skip examples that are not meant for notebook environment.
|
||
- Use templates?
|
||
"""
|
||
|
||
import json
|
||
import os
|
||
import pprint
|
||
|
||
fpath = os.path.join(os.path.dirname( __file__ ), '..', 'README.md')
|
||
examples = []
|
||
|
||
# The globals
|
||
current_example = 1
|
||
sequence_num = 1
|
||
current_section_name = ""
|
||
|
||
|
||
STATEMENT_PREFIXES = ["...", ">>> ", "$ "]
|
||
|
||
HOSTED_NOTEBOOK_INSTRUCTIONS = """
|
||
|
||
## 托管笔记本指南
|
||
|
||
这只是通过 jupyter notebook 浏览 wtfpython 的实验性尝试。因为一些示例是只读的:
|
||
- 它们要么需要托管运行时不支持的 Python 版本。
|
||
- 要么它们无法在此环境中复现。
|
||
|
||
预期的输出已经存在于代码单元之后的折叠单元中。 Google colab 提供 Python2(2.7)和 Python3(3.6,默认)运行环境。 您可以在这些 Python2 特定示例之间切换。 对于特定于其他次要版本的示例,您可以简单地参考折叠的输出(目前无法在托管笔记本中控制次要版本)。 您可以检查当前的活动版本
|
||
```py
|
||
>>> import sys
|
||
>>> sys.version
|
||
# Prints out Python version here.
|
||
```
|
||
|
||
话虽如此,大多数示例都按预期工作。 如果您遇到任何问题,请随时查阅 wtfpython 上的原始内容并在 repo 中创建问题。 祝君顺利!
|
||
---
|
||
"""
|
||
|
||
|
||
def generate_code_block(statements, output):
|
||
"""
|
||
Generates a code block that executes the given statements.
|
||
|
||
:param statements: The list of statements to execute.
|
||
:type statements: list(str)
|
||
"""
|
||
global sequence_num
|
||
result = {
|
||
"type": "code",
|
||
"sequence_num": sequence_num,
|
||
"statements": statements,
|
||
"output": output
|
||
}
|
||
sequence_num += 1
|
||
return result
|
||
|
||
|
||
def generate_markdown_block(lines):
|
||
"""
|
||
Generates a markdown block from a list of lines.
|
||
"""
|
||
global sequence_num
|
||
result = {
|
||
"type": "markdown",
|
||
"sequence_num": sequence_num,
|
||
"value": lines
|
||
}
|
||
sequence_num += 1
|
||
return result
|
||
|
||
|
||
def is_interactive_statement(line):
|
||
for prefix in STATEMENT_PREFIXES:
|
||
if line.lstrip().startswith(prefix):
|
||
return True
|
||
return False
|
||
|
||
|
||
def parse_example_parts(lines, title, current_line):
|
||
"""
|
||
Parse the given lines and return a dictionary with two keys:
|
||
build_up, which contains all the text before an H4 (explanation) is encountered,
|
||
and
|
||
explanation, which contains all the text after build_up until --- or another H3 is encountered.
|
||
"""
|
||
parts = {
|
||
"build_up": [],
|
||
"explanation": []
|
||
}
|
||
content = [title]
|
||
statements_so_far = []
|
||
output_so_far = []
|
||
next_line = current_line
|
||
# store build_up till an H4 (explanation) is encountered
|
||
while not (next_line.startswith("#### ")or next_line.startswith('---')):
|
||
# Watching out for the snippets
|
||
if next_line.startswith("```py"):
|
||
# It's a snippet, whatever found until now is text
|
||
is_interactive = False
|
||
output_encountered = False
|
||
if content:
|
||
parts["build_up"].append(generate_markdown_block(content))
|
||
content = []
|
||
|
||
next_line = next(lines)
|
||
|
||
while not next_line.startswith("```"):
|
||
if is_interactive_statement(next_line):
|
||
is_interactive = True
|
||
if (output_so_far):
|
||
parts["build_up"].append(generate_code_block(statements_so_far, output_so_far))
|
||
statements_so_far, output_so_far = [], []
|
||
statements_so_far.append(next_line)
|
||
else:
|
||
# can be either output or normal code
|
||
if is_interactive:
|
||
output_so_far.append(next_line)
|
||
elif output_encountered:
|
||
output_so_far.append(next_line)
|
||
else:
|
||
statements_so_far.append(next_line)
|
||
next_line = next(lines)
|
||
|
||
# Snippet is over
|
||
parts["build_up"].append(generate_code_block(statements_so_far, output_so_far))
|
||
statements_so_far, output_so_far = [], []
|
||
next_line = next(lines)
|
||
else:
|
||
# It's a text, go on.
|
||
content.append(next_line)
|
||
next_line = next(lines)
|
||
|
||
# Explanation encountered, save any content till now (if any)
|
||
if content:
|
||
parts["build_up"].append(generate_markdown_block(content))
|
||
|
||
# Reset stuff
|
||
content = []
|
||
statements_so_far, output_so_far = [], []
|
||
|
||
# store lines again until --- or another H3 is encountered
|
||
while not (next_line.startswith("---") or
|
||
next_line.startswith("### ")):
|
||
if next_line.lstrip().startswith("```py"):
|
||
# It's a snippet, whatever found until now is text
|
||
is_interactive = False
|
||
if content:
|
||
parts["explanation"].append(generate_markdown_block(content))
|
||
content = []
|
||
|
||
next_line = next(lines)
|
||
|
||
while not next_line.lstrip().startswith("```"):
|
||
if is_interactive_statement(next_line):
|
||
is_interactive = True
|
||
if (output_so_far):
|
||
parts["explanation"].append(generate_code_block(statements_so_far, output_so_far))
|
||
statements_so_far, output_so_far = [], []
|
||
statements_so_far.append(next_line)
|
||
else:
|
||
# can be either output or normal code
|
||
if is_interactive:
|
||
output_so_far.append(next_line)
|
||
else:
|
||
statements_so_far.append(next_line)
|
||
next_line = next(lines)
|
||
|
||
# Snippet is over
|
||
parts["explanation"].append(generate_code_block(statements_so_far, output_so_far))
|
||
statements_so_far, output_so_far = [], []
|
||
next_line = next(lines)
|
||
else:
|
||
# It's a text, go on.
|
||
content.append(next_line)
|
||
next_line = next(lines)
|
||
|
||
# All done
|
||
if content:
|
||
parts["explanation"].append(generate_markdown_block(content))
|
||
|
||
return next_line, parts
|
||
|
||
|
||
def remove_from_beginning(tokens, line):
|
||
for token in tokens:
|
||
if line.lstrip().startswith(token):
|
||
line = line.replace(token, "")
|
||
return line
|
||
|
||
|
||
def inspect_and_sanitize_code_lines(lines):
|
||
"""
|
||
Remove lines from the beginning of a code block that are not statements.
|
||
|
||
:param lines: A list of strings, each representing a line in the code block.
|
||
:returns is_print_present, sanitized_lines: A boolean indicating whether print was present in the original code and a list of strings representing
|
||
sanitized lines. The latter may be an empty list if all input lines were removed as comments or whitespace (and thus did not contain any statements).
|
||
This function does not remove blank lines at the end of `lines`.
|
||
"""
|
||
tokens_to_remove = STATEMENT_PREFIXES
|
||
result = []
|
||
is_print_present = False
|
||
for line in lines:
|
||
line = remove_from_beginning(tokens_to_remove, line)
|
||
if line.startswith("print ") or line.startswith("print("):
|
||
is_print_present = True
|
||
result.append(line)
|
||
return is_print_present, result
|
||
|
||
|
||
def convert_to_cells(cell_contents, read_only):
|
||
"""
|
||
Converts a list of dictionaries containing markdown and code cells into a Jupyter notebook.
|
||
|
||
:param cell_contents: A list of dictionaries, each
|
||
dictionary representing either a markdown or code cell. Each dictionary should have the following keys: "type", which is either "markdown" or "code",
|
||
and "value". The value for type = 'markdown' is the content as string, whereas the value for type = 'code' is another dictionary with two keys,
|
||
statements and output. The statements key contains all lines in between ```py\n``` (including) until ```\n```, while output contains all lines after
|
||
```.output py\n```.
|
||
:type cell_contents: List[Dict]
|
||
|
||
:param read_only (optional): If True then only print outputs are included in converted
|
||
cells. Default False
|
||
:type read_only (optional): bool
|
||
|
||
:returns A Jupyter notebook containing all cells from input parameter `cell_contents`.
|
||
Each converted cell has metadata attribute collapsed set to true if it's code-cell otherwise None if it's markdow-cell.
|
||
"""
|
||
cells = []
|
||
for stuff in cell_contents:
|
||
if stuff["type"] == "markdown":
|
||
# todo add metadata later
|
||
cells.append(
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": stuff["value"]
|
||
}
|
||
)
|
||
elif stuff["type"] == "code":
|
||
if read_only:
|
||
# Skip read only
|
||
# TODO: Fix
|
||
cells.append(
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": ["```py\n"] + stuff["statements"] + ["```\n"] + ["```py\n"] + stuff['output'] + ["```\n"]
|
||
}
|
||
)
|
||
continue
|
||
|
||
is_print_present, sanitized_code = inspect_and_sanitize_code_lines(stuff["statements"])
|
||
if is_print_present:
|
||
cells.append(
|
||
{
|
||
"cell_type": "code",
|
||
"metadata": {
|
||
"collapsed": True,
|
||
|
||
},
|
||
"execution_count": None,
|
||
"outputs": [{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": stuff["output"]
|
||
}],
|
||
"source": sanitized_code
|
||
}
|
||
)
|
||
else:
|
||
cells.append(
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": None,
|
||
"metadata": {
|
||
"collapsed": True
|
||
},
|
||
"outputs": [{
|
||
"data": {
|
||
"text/plain": stuff["output"]
|
||
},
|
||
"output_type": "execute_result",
|
||
"metadata": {},
|
||
"execution_count": None
|
||
}],
|
||
"source": sanitized_code
|
||
}
|
||
)
|
||
|
||
return cells
|
||
|
||
|
||
def convert_to_notebook(pre_examples_content, parsed_json, post_examples_content):
|
||
"""
|
||
Convert a JSON file containing the examples to a Jupyter Notebook.
|
||
"""
|
||
result = {
|
||
"cells": [],
|
||
"metadata": {},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|
||
|
||
notebook_path = "wtf.ipynb"
|
||
|
||
result["cells"] += convert_to_cells([generate_markdown_block(pre_examples_content)], False)
|
||
|
||
for example in parsed_json:
|
||
parts = example["parts"]
|
||
build_up = parts.get("build_up")
|
||
explanation = parts.get("explanation")
|
||
read_only = example.get("read_only")
|
||
|
||
if build_up:
|
||
result["cells"] += convert_to_cells(build_up, read_only)
|
||
|
||
if explanation:
|
||
result["cells"] += convert_to_cells(explanation, read_only)
|
||
|
||
result["cells"] += convert_to_cells([generate_markdown_block(post_examples_content)], False)
|
||
|
||
#pprint.pprint(result, indent=2)
|
||
with open(notebook_path, "w") as f:
|
||
json.dump(result, f, indent=2)
|
||
|
||
|
||
with open(fpath, 'r+', encoding="utf-8") as f:
|
||
lines = iter(f.readlines())
|
||
line = next(lines)
|
||
result = []
|
||
pre_examples_phase = True
|
||
pre_stuff = []
|
||
post_stuff = []
|
||
try:
|
||
while True:
|
||
if line.startswith("## "):
|
||
pre_examples_phase = False
|
||
# A section is encountered
|
||
current_section_name = line.replace("## ", "").strip()
|
||
section_text = []
|
||
line = next(lines)
|
||
# Until a new section is encountered
|
||
while not (line.startswith("## ") or line.startswith("# ")):
|
||
# check if it's a H3
|
||
if line.startswith("### "):
|
||
# An example is encountered
|
||
title_line = line
|
||
line = next(lines)
|
||
read_only = False
|
||
while line.strip() == "" or line.startswith('<!--'):
|
||
#TODO: Capture example ID here using regex.
|
||
if '<!-- read-only -->' in line:
|
||
read_only = True
|
||
line = next(lines)
|
||
|
||
example_details = {
|
||
"id": current_example,
|
||
"title": title_line.replace("### ", ""),
|
||
"section": current_section_name,
|
||
"read_only": read_only
|
||
}
|
||
line, example_details["parts"] = parse_example_parts(lines, title_line, line)
|
||
result.append(example_details)
|
||
current_example += 1
|
||
else:
|
||
section_text.append(line)
|
||
line = next(lines)
|
||
else:
|
||
if pre_examples_phase:
|
||
pre_stuff.append(line)
|
||
else:
|
||
post_stuff.append(line)
|
||
line = next(lines)
|
||
|
||
except StopIteration as e:
|
||
#pprint.pprint(result, indent=2)
|
||
pre_stuff.append(HOSTED_NOTEBOOK_INSTRUCTIONS)
|
||
result.sort(key = lambda x: x["read_only"])
|
||
convert_to_notebook(pre_stuff, result, post_stuff)
|