# Copyright 2023, Dr John A.R. Williams
# SPDX-License-Identifier: GPL-3.0-only
"""Classes to provide access to cohort and student information.
representing a specific cohort
representing a student in a cohort
Gets a cohort by name (or default cohort)
returns a generated default cohort name by current (UK style) academic year
import subprocess
import logging
import json
import shutil
import hashlib
import re
import glob
from os import walk
from typing import Union, Dict, List
from datetime import date, datetime
from pathlib import Path
import pyam.config_manager as config
from pyam.config import CONFIG
from pyam.files import read_csv
from pyam.run_pytest import run_pytest
[docs]def current_academic_year() -> str:
"""Current (by date) academic year to use as a cohort name
This is based on common UK naming where the academic year starts in late September
and finished in June with referred assessments are in July/August
returns the current year
return previous year
returns <previous year>-referred
today = date.today()
year = today.year
if today.month < 9:
year -= 1
if today.month > 6:
return f"{year}-referred"
return str(year)
[docs]class Cohort(config.ConfigManager):
"""Class representing a cohort of students with associated tests and reports
name (str): The cohort name (subdirectory name) (e.g. academic year of study)
path (Path): The Path to this cohort
test_path (Path): The Path to the tests for this cohort
report_path (Path) : The Path to report directory for this cohort
log (logging.Logger): The Logger for cohort.
def __init__(self, name):
self.name: str = name
self.path: Path = CONFIG.cohorts_path / name
self.test_path: Path = CONFIG.tests_path / name
self.report_path: Path = CONFIG.reports_path / name
super().__init__(self.path / "manifest.json", "cohort")
self.log: logging.Logger = logging.getLogger("cohort")
handler = logging.FileHandler(filename=self.report_path / "info.log",
logging.Formatter('%(asctime)s: %(levelname)-8s: %(message)s',
'%Y-%m-%d %H:%M:%S'))
if not self.path.exists():
raise FileNotFoundError(self.path)
for path in (self.test_path, self.report_path):
student_list = []
for rec in read_csv(self.path / "students.csv",
student_list.append(Student(self, rec))
self._students: 'tuple[Student]' = tuple(student_list)
[docs] def student_columns(self) -> List[tuple]:
"""Return a list of student column information for this cohort configuration
A list of tuples of regex and column titles suitable for read_csv
cols = []
for name, value in config.SCHEMA["student-column"].items():
regex = self.get(f"student-column.{name}",
value.get("default", name))
cols.append((regex, name))
return cols
[docs] def students(self,
name: Union[str, None, List[str]] = None
) -> 'Union[Student, List[Student]]':
"""Return student or students from a cohort.
Finds students by full name, student id or username in cohort.
name: Name or names to be found in the cohort.
May be *username*, *student_id* or *common name*
If name is not given prodes list of all students in cohort.
If name is a string returns the first matching student found.
If name is a list return list of students corresponding to list of names.
if not name:
return list(self._students)
if isinstance(name, str):
for student in self._students:
if name in (student.username, student.student_id,
return student
raise KeyError(f"Student {name} not found in {self.name} cohort.")
students = []
for i in name:
return students
[docs] def start_log_section(self, title: str) -> None:
"Write a section header in cohort log file"
fix = "=" * (40 - len(title) // 2)
self.log.info("%s %s %s", fix, title, fix)
[docs] def tests(self) -> Dict[str, Dict]:
"""Return dictionary of tests for this cohort indexed by pytest nodeids
If a manifest.json is provided in the test directory then this is the "tests"
value from that file. Otherwise the nodeids are collected by pytest and the values
fields are empty dictionaies. Future implementations may use the values.
A dictionary of tests for this cohort indexed by pytest nodeids.
Value is a dictionary of test attributes from the test manifest if provided.
Currently "description" is used to provide a human readable description
and "mark" to provide a numerical mark for this test in the generated template.
#Load manifest data if present
test_manifest_path = self.test_path / "manifest.json"
test_manifest = {}
if test_manifest_path.exists():
with open(test_manifest_path, "r") as fid:
test_manifest = json.load(fid).get("tests", None)
#Ensure all tests are included by collecting from pytest
result = run_pytest(self, '--collect-only', '-q')
for line in result.stdout.splitlines():
if len(line) == 0:
if line.startswith(self.name + "/"):
line = line[len(self.name) + 1:]
if not test_manifest.get(line):
test_manifest[line] = {}
return test_manifest
[docs]class Student:
"""A student in a cohort. Initialised from students.csv file in cohort
username (str): Their username
path (Path): The Path to where the students submission resides.
student_id (str): Their official student id
last_name (str): Their family name
first_name (str): their first name
cohort (Cohort): The Cohort in which they reside
course (str): Possible subcohort course name
github_username (str): Github username if specified
def __init__(self, cohort: Cohort, rec: dict):
"""Initialise student into cohort from a csv record rec
cohort: The Cohort object to which this student belongs
rec: A dictionary of values from csv file to initialise student from
self.rec = rec
self.cohort = cohort
self.username = rec["username"]
self.student_id = rec["studentid"]
self.last_name = rec["lastname"]
self.first_name = rec["firstname"]
self.course = rec.get("course", self.cohort.get("course"))
self.github_username = rec.get("github-username", None)
folder = cohort.get("student-folder-name")
if folder:
folder = self.rec[folder]
folder = self.username
self.path = self.cohort.path / folder
def __hash__(self):
return hash((self.cohort.name, self.username))
def __repr__(self):
return f"<Student {self.cohort.name}/{self.student_id}>"
def __lt__(self,other):
return (self.last_name, self.first_name) < (other.last_name, other.first_name)
def __str__(self):
return self.name()
[docs] def name(self, style: str = "ref") -> str:
"""Return common name formats - default is student id first name, last name
style: Style - can be a field from csv file, username or ref(default)
name string in given style. Default is ref - student username, Lastname and firstname
if style:
if style in self.rec:
return self.rec[style]
if style == "ref":
return f"{self.student_id} ({self.last_name}, {self.first_name})"
if style == "username":
return f"{self.username} ({self.last_name}, {self.first_name})"
return f"{self.last_name}, {self.first_name}"
[docs] def check_manifest(self,
files: Union[list, None] = None,
log: bool = False) -> List[str]:
"""Check if student directory contains all files on cohort manifest
files: (optional) List of files to find. If not given use cohort manifest
log (bool): If True log missing files in cohort logger
List of missing files
if not files:
files = self.cohort.get("files", ())
if not self.path.exists():
self.cohort.log.warning("No submission: %s", self.name())
return files.keys()
missing = []
if log:
for rec in files.keys():
if not self.file(rec,False):
if missing and log:
self.cohort.log.warning("Missing Files: %s - %s",
self.name(), missing)
return missing
[docs] def repository_name(self) -> Union[str, None]:
"""Return Github repository name if applicable else False
if self.github_username:
github = self.cohort.get("github.template", None)
if github:
return f"{github}-{self.github_username}"
return None
[docs] def repository_url(self) -> Union[str, None]:
"""Return students github repository url if present else False"""
name = self.repository_name()
if name:
return f"{self.cohort['github.url']}/{self.repository_name()}"
return None
[docs] def git(self,*args,**kwargs):
"""Run git with given args in the student repository.
Return stdout if successful.
if log keyword is set Logs action either as info or as an error depending on success
# pylint: disable=W1510
proc=subprocess.run(("git",*args), cwd=self.path, text=True, check=True, capture_output=True)
if log:
f"Successful {args} {self.repository_name()}"\
+ f" for '{self.name()}': {proc.stdout.strip()}"
return proc.stdout.strip()
except subprocess.CalledProcessError as error:
if log:
f"Unable to {args} {self.repository_name()}"\
+ f" to '{self.path.relative_to(self.cohort.path)}'"\
+f" for '{self.name()}: {error.output.strip()} {error.stderr.strip()}"
return False
[docs] def github_retrieve(self,reset: bool=True,branch=None) -> bool:
"""Clone or pull asssessments for this student from their repository.
Success of retrieval
if not self.repository_url():
self.cohort.log.warning(f"No repository known for '{self.name()}'")
return False
if self.path.exists():
if reset: # by default do a reset hard first to ensure workarea is clean
if branch:
return self.git("pull")
return self.git("git", "clone", self.repository_url(), self.path)
[docs] def github_push(self, files: List[Path], subdir=None, reset: bool=True, branch: str=None, msg: str = "Push from pyAutoMark"):
"""Push given set of files into student repository
files: List of files or directories to copy
subdir: If set - the name of subdirectory in student repository to copy files into
reset: If True, do a github_retrieve first to ensure we are consistent with student repo
branch: If set checkout and push files into this branch
If branch is specified original branch checkou in local repository is restored after push
if reset:
#rensure we are synced with student work if reset is true
if branch:
#save current branch name and checkout specified branch
original_branch = self.git("branch", "--show-current").stdout
self.git("checkout", branch,log=False)
destination = self.path
if subdir:
destination = destination / subdir
if not destination.exists():
destination.mkdir(parents=True, exist_ok=True)
for file in files:
#do stuffd
if file.is_file():
elif file.is_dir():
self.git("commit","-m", msg,log=False)
if branch:
self.git("checkout", original_branch,log=False)
[docs] def checkout(self,until,branch=None):
"""Checkout last repository for student before given date until or to a specified branch"""
if self.path.exists():
if branch:
if until:
result=self.git("log", r"--pretty='%h%'","-1", r"--format=%h","--until", until.isoformat(),log=False)
if result:
[docs] def github_lastcommit(self) -> Union[datetime, None]:
"Return last github commit time if applicable"
if self.path.exists():
result=self.git("log", "-1", r"--format=%cd",log=False)
if result:
return datetime.strptime(result,"%a %b %d %H:%M:%S %Y %z")
[docs] def hash(self) -> int:
"""Return a hash based on students username - this will be first integer of first 8 characters of md5hash of username"""
return int("0x"+hashlib.md5(self.username.encode("utf-8")).hexdigest()[:8],16)
[docs] def file(self, pattern: str, log: Union[logging.Logger,None] = None) -> Union[Path,None]:
"""Attempt to find file matching given pathname pattern based on the configuration setting
pattern: The file pattern to search for - either an exact path, glob or regexp
depending on the configuration setting filematch.pattern
log: If given use to log if no file found or multiple files found
The path to the (first) matching file found or None if none found
if matchtype=="exact":
path=self.path / pattern
if path.exists():
elif matchtype=="glob":
elif matchtype=="regexp":
matcher = re.compile(pattern)
for path in self.path.glob("**/*"):
if matcher.search(str(path)):
raise ValueError("Invalid filematch.pattern", matchtype)
if not files:
if log:
log.warning("File Not Found: %s: '%s'",self.name(), pattern)
return None
if len(files)>1 and log:
log.warning("Multiple files found: %s matching '%s': using %s",
self.name(), pattern, files[0].relative_to(self.path))
return files[0]
[docs] def find_files(self,
pathname: str,
containing: str = None,
recursive: bool = False) -> List[Path]:
"""Return filtered list of files found in student directory.
If containing is given also filter to files containing this regexp
If recursive is true also recurse subdirectories looking for matches
pathname: Unix style pathaname glob
containing: An optional regexp to match against file contents
recursive: If true also look in subdirectories
List of files found - may be empty list
files = glob.glob(str(self.path) + "/" + pathname, recursive=recursive)
if not containing:
return files
matching = []
matcher = re.compile(containing)
for file in files:
with open(file, 'r') as fid:
if matcher.findall(fid.read()):
return matching
[docs]def get_cohort(name: str = CONFIG.get("cohort",
current_academic_year())) -> Cohort:
"""Return cohort for given name or current default cohort if name=None
Optional name of cohort to load.
If not present will use default - "cohort" field from configuration
Or use calculated current_academic_year
The cohort object
assert name is not None or CONFIG.cohort
if (not (CONFIG.cohort) or CONFIG.cohort.name != name):
CONFIG.cohort = Cohort(name)
return CONFIG.cohort
[docs]def list_cohorts() -> List[str]:
"""List the cohorts
These are ubdirectories of cohorts which have student.csv and manifest.json files
List of valid cohort names
results = []
for path in CONFIG.cohorts_path.iterdir():
if path.is_dir():
if (path / "manifest.json").exists() and (path /
return results