import os
from pathlib import Path
import pandas as pd
import numpy as np
from nbformat import read, NO_CONVERT
import re
import glob
from github import Github
import git
import warnings
import webbrowser
def _find_assignment(directory=None):
"""Find an Assignment Dyamically
A helper function to validate and locate the lab file based on its
extension being Rmd or ipynb. The utility will search recursively up the
directory. If multiple candidate files are found, the user will be
prompted to select which file they wish.
Args:
directory ([type], optional): A directory path to be searched
recursively. If None given, will use the working directory.
Returns:
[str]: A path to the file selected.
"""
if directory is None:
directory = os.getcwd()
types = ["*.ipynb", "*.Rmd"]
files = []
for type in types:
path = str(Path(directory).expanduser())
# path = str(directory)
pathname = path + "/**/*" + type
type_files = glob.glob(pathname, recursive=True)
files += type_files
names = [
str(n + 1) + "." + os.path.basename(file)
for n, file in enumerate(files)
]
print("The existing files are:")
for item in names:
print(item)
notebook = input("Enter your file number from the above list:")
notebook = files[int(notebook) - 1]
return notebook
[docs]def create_github_token(host="https://github.ubc.ca"):
"""Open A Browser to Generate a New Github Enterprise Token
Args:
host (str):
The URL to the upstream host. Defaults to UBC Github Enterprise.
Returns:
None
Examples:
>>> from labzen import labzen as lz
>>> # Open a web browser
>>> lz.create_github_token()
"""
opts = "scopes=repo,user,gist,workflow&description=LABZEN"
url = f"{host}/settings/tokens/new?{opts}"
webbrowser.open(url, new=2)
[docs]def parse_lab(notebook=None):
"""Parse MDS lab files to return the markdown content
Args:
notebook (str):
A path or list of paths to MDS lab files (either .ipynb
or .Rmd). If left blank, the function will recursively
search for all labs in the working directory based on the file
extension.
Returns:
list: Each element of list is a content of one markdown cell.
Example:
>>> # Download the demo files into the working directory
>>> import urllib.request
>>> from labzen import labzen as lz
>>>
>>> baseurl = (
>>> "https://raw.githubusercontent.com"
>>> + "/UBC-MDS/labzen/main/data-raw"
>>> )
>>> labs = {
>>> "dummylab.Rmd": f"{baseurl}/dummylab.Rmd",
>>> "dummylab.ipynb": f"{baseurl}/dummylab.ipynb",
>>> }
>>>
>>> for name, url in labs.items():
>>> urllib.request.urlretrieve(url, name)
>>>
>>> # parse the labs
>>> lz.parse_lab("dummylab.ipynb")
>>> lz.parse_lab("dummylab.Rmd")
>>>
>>> # Alternatively, navigate to a student assignment repo and
>>> # run the following code.
>>> lz.parse_lab()
"""
# If the user did not define the specific file, recursively
# search for rmd and ipynb files in the working directory
if notebook is None:
notebook = _find_assignment()
path = Path(notebook)
_, extension = os.path.splitext(notebook)
# defensive tests
if extension != ".Rmd" and extension != ".ipynb":
raise Exception(
"Sorry, you have not provided Rmarkdown or jupyter notebook file"
)
if not isinstance(notebook, str):
raise Exception("The file path should be string")
# Parse the markdown contents of rmd or ipynb file
source = []
if extension == ".Rmd":
text_and_code = path.expanduser().read_text()
text_and_code = text_and_code.split("```")
code_blocks = []
for string in text_and_code:
if string.startswith("{r"):
code_blocks.append(string)
elif string.startswith("{python"):
code_blocks.append(string)
else:
source.append(string)
else:
with open(notebook, encoding="utf8") as file:
notebook = read(file, NO_CONVERT)
cells = notebook["cells"]
code_cells = [c for c in cells if c["cell_type"] == "markdown"]
for cell in code_cells:
source.append(cell["source"])
return source
[docs]def count_points(file_name: str = None, margins: bool = True):
"""Tally Available Points in Lab
Args:
file_name (str): A path or list of paths to MDS lab files (either
.ipynb or .Rmd). If left blank, the function will recursively
search for all labs in the working directory based on the file
extension.
margins (bool): A boolean indicating whether to add a row for the
total number of points (optional + required). Defaults to True.
Returns:
(pandas.core.frame.DataFrame, pandas.core.frame.DataFrame):
A tuple of DataFrames. The first is a section-by-section overview
of points available. The second is a cross table summarising the
number of optional, required, and total points per lab.
Example:
>>> from labzen import labzen as lz
>>> import urllib.request
>>>
>>> # Download the demo files into the working directory
>>> baseurl = (
>>> "https://raw.githubusercontent.com"
>>> + "/UBC-MDS/labzen/main/data-raw"
>>> )
>>> labs = {
>>> "dummylab.Rmd": f"{baseurl}/dummylab.Rmd",
>>> "dummylab.ipynb": f"{baseurl}/dummylab.ipynb",
>>> }
>>> for name, url in labs.items():
>>> urllib.request.urlretrieve(url, name)
>>>
>>> # for Jupyter notebooks:
>>> df, tab = lz.count_points("dummylab.ipynb")
>>> print(df[["rubric", "points", "type"]])
rubric points type
0 [mechanics] [5] Non-Optional
1 [reasoning] [4] Non-Optional
2 [accuracy, reasoning] [3, 2] Non-Optional
3 [accuracy, reasoning] [6, 4] Optional
4 [accuracy, reasoning] [7, 3] Optional
5 [viz] [5] Non-Optional
>>> print(tab)
type total prop
0 Non-Optional 19 0.95
1 Optional 20 1.00
2 All 39 1.95
>>>
>>> # for Rmarkdown:
>>> df, tab = lz.count_points("dummylab.Rmd")
>>> print(tab)
type total prop
0 Non-Optional 42 0.950000
1 Optional 12 0.271429
2 All 54 1.221429
>>>
>>> # Alternatively, navigate to a student assignment repo and run the
>>> # following code.
>>> df, tab = lz.count_points()
"""
# Parse a lab file into its markdown blocks
res = parse_lab(file_name)
df = pd.DataFrame({"block": np.arange(1, len(res) + 1), "txt": res})
# Tidy breaks, new lines, extra spaces, and make each line a row
df["txt"] = df["txt"].str.replace("<br>", "\n")
df["txt"] = df["txt"].str.split("\n")
df = df.explode("txt")
df["txt"] = df["txt"].replace(["", "<hr>"], np.nan)
df = df.dropna()
df["txt"] = df["txt"].str.strip()
# Add variable transformations
df["header"] = df["txt"].shift(1)
df["rubric"] = df["txt"].str.contains(r"^rubric\=\{")
df["below_header"] = df["header"].str.contains(r"^[#]{1,6}\s")
df["optional"] = df["header"].str.contains("optional|bonus", case=False)
# Subset to lines containing rubrics only
df = df.dropna().query("rubric")
# Extract and sum points
df["points"] = df["txt"].str.findall(r"(\d+)")
df2 = df.explode("points")
df2["points"] = df2["points"].astype(int)
df["points"] = df2["points"].groupby(df2.index).apply(list)
df["total"] = df["points"].apply(sum)
# defensive check
if not all(df["below_header"]):
raise Exception(
"There is a problem parsing this lab. Expecting a rubric tag to "
+ "below a markdown header."
)
# Tidy and make the result more human-readable
booldict = {True: "Optional", False: "Non-Optional"}
df["type"] = df["optional"].replace(booldict)
df = df.drop(columns=["rubric", "below_header", "optional"])
df = df.reset_index(drop=True)
df["header"] = df["header"].str.replace(r"(^[#]+\s+)", "", regex=True)
# Generate crosstab
tab = df.pivot_table("total", "type", aggfunc=sum, margins=margins)
tab = tab.reset_index()
one_pt_worth = 0.95 / tab.loc[tab["type"] == "Non-Optional", "total"]
tab["prop"] = tab["total"] * one_pt_worth[0]
# add percent to full table
df["prop"] = df["total"] * one_pt_worth[0]
# simplify rubric names
df["rubric"] = df["txt"].str.findall(r"([a-z]+)(?=\:\d)")
# re-order columns
df = df[["block", "header", "rubric", "points", "total", "prop", "type"]]
return df, tab
def _check_repo_link(file_name: str = None):
"""Check whether the user has included the github repo link in his/her
repository
Args:
file_name (str):
A path or list of paths to MDS lab files (either
.ipynb or .Rmd). If left blank, the function will recursively
search for all labs in the working directory based on the file
extension.
Returns:
bool: a boolean output
Example:
>>> # Navigate to the root of labzen repo and run the following code
>>> # using the dummy files:
>>>
>>> # for jupyter notebook:
>>> _check_repo_link("data-raw/dummylab.ipynb")
>>>
>>> # for Rmarkdown:
>>> _check_repo_link("data-raw/dummylab.Rmd")
>>>
>>> # Alternatively, navigate to a student assignment repo and run the
>>> # following code.
>>> _check_repo_link()
"""
# Parse a lab file into its markdown blocks
res = parse_lab(file_name)
df = pd.DataFrame({"block": np.arange(1, len(res) + 1), "txt": res})
# finding out if there is any link
rex = re.compile(
r"((https://)?(www.)?github\.ubc\.ca" # base url for GH Enterprise
r"\/MDS-\d{4}-\d{2}" # organization
r"\/DSCI_\d{3}_lab\d_[a-z]+)" # lab repo with CWL username
)
warnings.filterwarnings("ignore", "This pattern has match groups")
df["link"] = df["txt"].str.contains(rex, regex=True)
# displaying the result in boolean
repo_link = df["link"].any()
if repo_link:
print("Check 3: Repository link is included in the file")
print(f"Check 3: {repo_link}")
else:
print("Check 3: Repository link is not included in the file")
print("Check 3:", False)
return repo_link
def _check_lat_version(path: str, token: str):
"""Check whether the user has pushed the latest version in his/her
repository
Args:
path (str): A local file path to either a lab directory or to a
lab file inside a local git directory.
token (str): A token for https://github.ubc.ca
Returns:
bool: a boolean output
Example:
>>> from labzen import labzen as lz
>>>
>>> # navigate to a student repo and run:
>>> token = "544c96ce0d3dc9b66ac8d70b32c07bd0c46129db"
>>> lz._check_lat_version(token=token)
"""
# locate the repo root
local_repo = git.Repo(path, search_parent_directories=True)
# locate the Github Enterprise repo name
ghe = Github(token, base_url="https://github.ubc.ca/api/v3")
ghe_repo = ghe.get_repo(__find_ghe_repo(local_repo))
# find latest commit on GHE
ghe_commit = ghe_repo.get_commits()[0].sha
# find the latest local commit
local_commit = str(local_repo.head.commit)
# comparing the both SHAs
if ghe_commit == local_commit:
print("Check 2: Remote has the latest version of the repository")
print("Check 2:", True)
else:
print(
"Check 2: Remote does not have the latest version of the ",
"repository",
)
print("Check 2:", False)
return ghe_commit == local_commit
def __find_ghe_repo(local_repo, org="MDS-2020-21"):
"""Find a Github Repo Path
Args:
local_repo ([git.repo.base.Repo]): A local Github repository
org (str): The name of the organization on Github Enterprise to search.
Defaults to 'MDS-2020-21'.
Returns:
[type]: [github.Repository.Repository] An UBC GHE remote (student
project repo).
"""
# find the name of the repo on Github Enterprise
remote_urls = [list(x.urls)[0] for x in local_repo.remotes]
rex = "([A-Za-z0-9_-]+)(?=\\.git$)"
ghe_name = list(set([re.search(rex, x).group(1) for x in remote_urls]))[0]
return f"{org}/{ghe_name}"
def _check_commits(path: str, token: str, verbose=False):
"""Check whether the user has at least three commits
Args:
path (str): A local repo path or local file path to a lab.git
token (str): A token for Github Enterprise.
verbose (bool): Whether to print commit details to screen
Returns:
bool: a boolean output
Example:
>>> from labzen import labzen as lz
>>>
>>> path = "/Users/jene/MDS/Block5/lab/DSCI_599_lab1_jene3456"
>>> token = "544c96ce0d3dc9b66ac8d70b32c07bd0c46129db"
>>> lz._check_commits(path, token)
"""
# locate the repo root
local_repo = git.Repo(path, search_parent_directories=True)
# locate the Github Enterprise repo name
ghe = Github(token, base_url="https://github.ubc.ca/api/v3")
try:
# org/repo_name
ghe_name = __find_ghe_repo(local_repo)
ghe_repo = ghe.get_repo(ghe_name)
except Exception:
raise Exception(f"{ghe_name} not found on github.ubc.ca")
# count the total number of commits on the remote
ghe_commits = ghe_repo.get_commits()
ghe_commit_n = ghe_commits.totalCount
if ghe_commit_n > 3:
student_name = ghe.get_user().name
student_commits_n = 0
for commit in ghe_commits:
valid_names = set()
if hasattr(commit.author, "name"):
valid_names.add(commit.author.name)
if hasattr(commit.committer, "name"):
valid_names.add(commit.committer.name)
if student_name in valid_names:
student_commits_n += 1
if verbose:
print(
commit,
commit.commit.committer,
commit.commit.author.email,
commit.commit.author.date,
commit.author.name,
)
check_result = student_commits_n >= 3
else:
check_result = False
# print check result to screen
if check_result:
print(
"Check 1: Repository has at least 3 commits with the ",
f"student username {student_name}",
)
print("Check 1:", check_result)
elif student_commits_n < 3:
print(
"Check 1: Repository does not have 3 commits with the "
f"student username {student_name}"
)
print("Check 1:", check_result)
else:
print(
f"Check 1: Repo {ghe_repo.name} has fewer than 3 commits ",
f"with the student username {student_name}",
)
print("Check 1:", check_result)
return check_result
[docs]def check_mechanics(path: str = None, token=None):
"""Performs Mechanics Checks on a MDS Lab
This function checks that you...
1. ... have a Github repo link;
2. ... have pushed your latest commit; and
3. ... have at least three commit messages authored by you in
your history.
Args:
path (str): A local path to a Github directory or an MDS lab file
(.ipynb or .Rmd) within such a directory.
token (str) : A personal access token for https://github.ubc.ca. See
``create_github_token()`` for details.
Returns:
bool : A boolean whether all checks passed. The function also prints
informative messages as a side effect.
Example:
>>> from labzen import labzen as lz
>>>
>>> # Step 1: get a token
>>> lz.create_github_token()
>>>
>>> # Step 2: check mechanics
>>> file = "~/MDS/Block5/lab1/DSCI_599_lab1_jene3456"
>>> token = "544c96ce0d3dc9b66ac8d70b32c07bd0c46129db"
>>> lz.check_mechanics(file, token)
Check 1: Repository has at least 3 commits with the student
username JENE SMITH
Check 1: True
Check 2: Remote has the latest version of the repository
Check 2: True
Check 3: Repository link is included in the file
Check 3: True
>>>
>>> # Alternatively, just run the following from an MDS lab directory:
>>> lz.check_mechanics(token = token)
"""
# use the current working directory if no path given
if path is None:
path = os.getcwd()
# local the repo root
repo_path = git.Repo(path, search_parent_directories=True).git_dir
# local lab file
_, extension = os.path.splitext(path)
if extension == ".Rmd" or extension == ".ipynb":
lab_path = path
else:
lab_path = _find_assignment(path)
result = [
_check_commits(repo_path, token=token),
_check_lat_version(repo_path, token=token),
_check_repo_link(lab_path),
]
return all(result)