Source code for pyam.cohort

# Copyright 2023, Dr John A.R. Williams
# SPDX-License-Identifier: GPL-3.0-only
"""Classes to provide access to cohort and student information.

Classes

  :class:`Cohort`
    representing a specific cohort
  :class:`Student`
    representing a student in a cohort

Functions

  :func:`get_cohort`
    Gets a cohort by name (or default cohort)
  :func:`current_academic_year`
    returns a generated default cohort name by current (UK style) academic year

"""
import subprocess
import logging
import json
import shutil
import hashlib
import re
import glob
from os import walk
from typing import Union, Dict, List
from datetime import date, datetime
from pathlib import Path
import pyam.config_manager as config
from pyam.config import CONFIG
from pyam.files import read_csv
from pyam.run_pytest import run_pytest


[docs]def current_academic_year() -> str: """Current (by date) academic year to use as a cohort name This is based on common UK naming where the academic year starts in late September and finished in June with referred assessments are in July/August Returns: September--December returns the current year January--June return previous year July-August returns <previous year>-referred """ today = date.today() year = today.year if today.month < 9: year -= 1 if today.month > 6: return f"{year}-referred" return str(year)
[docs]class Cohort(config.ConfigManager): """Class representing a cohort of students with associated tests and reports Attributes: name (str): The cohort name (subdirectory name) (e.g. academic year of study) path (Path): The Path to this cohort test_path (Path): The Path to the tests for this cohort report_path (Path) : The Path to report directory for this cohort log (logging.Logger): The Logger for cohort. """ def __init__(self, name): self.name: str = name self.path: Path = CONFIG.cohorts_path / name self.test_path: Path = CONFIG.tests_path / name self.report_path: Path = CONFIG.reports_path / name self.report_path.mkdir(exist_ok=True) super().__init__(self.path / "manifest.json", "cohort") self.log: logging.Logger = logging.getLogger("cohort") self.log.handlers.clear() handler = logging.FileHandler(filename=self.report_path / "info.log", mode="a") handler.setFormatter( logging.Formatter('%(asctime)s: %(levelname)-8s: %(message)s', '%Y-%m-%d %H:%M:%S')) self.log.addHandler(handler) if not self.path.exists(): raise FileNotFoundError(self.path) for path in (self.test_path, self.report_path): path.mkdir(exist_ok=True) student_list = [] for rec in read_csv(self.path / "students.csv", self.student_columns()): student_list.append(Student(self, rec)) self._students: 'tuple[Student]' = tuple(student_list)
[docs] def student_columns(self) -> List[tuple]: """Return a list of student column information for this cohort configuration Returns: A list of tuples of regex and column titles suitable for read_csv """ cols = [] for name, value in config.SCHEMA["student-column"].items(): regex = self.get(f"student-column.{name}", value.get("default", name)) cols.append((regex, name)) return cols
[docs] def students(self, name: Union[str, None, List[str]] = None ) -> 'Union[Student, List[Student]]': """Return student or students from a cohort. Finds students by full name, student id or username in cohort. Args: name: Name or names to be found in the cohort. May be *username*, *student_id* or *common name* Returns: If name is not given prodes list of all students in cohort. If name is a string returns the first matching student found. If name is a list return list of students corresponding to list of names. """ if not name: return list(self._students) if isinstance(name, str): for student in self._students: if name in (student.username, student.student_id, student.name()): return student raise KeyError(f"Student {name} not found in {self.name} cohort.") students = [] for i in name: students.append(self.students(i)) return students
[docs] def start_log_section(self, title: str) -> None: "Write a section header in cohort log file" fix = "=" * (40 - len(title) // 2) self.log.info("%s %s %s", fix, title, fix)
[docs] def tests(self) -> Dict[str, Dict]: """Return dictionary of tests for this cohort indexed by pytest nodeids If a manifest.json is provided in the test directory then this is the "tests" value from that file. Otherwise the nodeids are collected by pytest and the values fields are empty dictionaies. Future implementations may use the values. Returns: A dictionary of tests for this cohort indexed by pytest nodeids. Value is a dictionary of test attributes from the test manifest if provided. Currently "description" is used to provide a human readable description and "mark" to provide a numerical mark for this test in the generated template. """ #Load manifest data if present test_manifest_path = self.test_path / "manifest.json" test_manifest = {} if test_manifest_path.exists(): with open(test_manifest_path, "r") as fid: test_manifest = json.load(fid).get("tests", None) #Ensure all tests are included by collecting from pytest result = run_pytest(self, '--collect-only', '-q') for line in result.stdout.splitlines(): if len(line) == 0: break if line.startswith(self.name + "/"): line = line[len(self.name) + 1:] if not test_manifest.get(line): test_manifest[line] = {} return test_manifest
[docs]class Student: """A student in a cohort. Initialised from students.csv file in cohort Attributes: username (str): Their username path (Path): The Path to where the students submission resides. student_id (str): Their official student id last_name (str): Their family name first_name (str): their first name cohort (Cohort): The Cohort in which they reside course (str): Possible subcohort course name github_username (str): Github username if specified """ def __init__(self, cohort: Cohort, rec: dict): """Initialise student into cohort from a csv record rec Args: cohort: The Cohort object to which this student belongs rec: A dictionary of values from csv file to initialise student from """ self.rec = rec self.cohort = cohort self.username = rec["username"] self.student_id = rec["studentid"] self.last_name = rec["lastname"] self.first_name = rec["firstname"] self.course = rec.get("course", self.cohort.get("course")) self.github_username = rec.get("github-username", None) folder = cohort.get("student-folder-name") if folder: folder = self.rec[folder] else: folder = self.username self.path = self.cohort.path / folder def __hash__(self): return hash((self.cohort.name, self.username)) def __repr__(self): return f"<Student {self.cohort.name}/{self.student_id}>" def __lt__(self,other): return (self.last_name, self.first_name) < (other.last_name, other.first_name) def __str__(self): return self.name()
[docs] def name(self, style: str = "ref") -> str: """Return common name formats - default is student id first name, last name Args: style: Style - can be a field from csv file, username or ref(default) Returns: name string in given style. Default is ref - student username, Lastname and firstname """ if style: if style in self.rec: return self.rec[style] if style == "ref": return f"{self.student_id} ({self.last_name}, {self.first_name})" if style == "username": return f"{self.username} ({self.last_name}, {self.first_name})" return f"{self.last_name}, {self.first_name}"
[docs] def check_manifest(self, files: Union[list, None] = None, log: bool = False) -> List[str]: """Check if student directory contains all files on cohort manifest Args: files: (optional) List of files to find. If not given use cohort manifest log (bool): If True log missing files in cohort logger Returns: List of missing files """ if not files: files = self.cohort.get("files", ()) if not self.path.exists(): self.cohort.log.warning("No submission: %s", self.name()) return files.keys() missing = [] if log: log=self.cohort.log for rec in files.keys(): if not self.file(rec,False): missing.append(rec) if missing and log: self.cohort.log.warning("Missing Files: %s - %s", self.name(), missing) return missing
[docs] def repository_name(self) -> Union[str, None]: """Return Github repository name if applicable else False """ if self.github_username: github = self.cohort.get("github.template", None) if github: return f"{github}-{self.github_username}" return None
[docs] def repository_url(self) -> Union[str, None]: """Return students github repository url if present else False""" name = self.repository_name() if name: return f"{self.cohort['github.url']}/{self.repository_name()}" return None
[docs] def git(self,*args,**kwargs): """Run git with given args in the student repository. Return stdout if successful. if log keyword is set Logs action either as info or as an error depending on success """ log=kwargs.get("log",True) try: # pylint: disable=W1510 proc=subprocess.run(("git",*args), cwd=self.path, text=True, check=True, capture_output=True) if log: self.cohort.log.info( f"Successful {args} {self.repository_name()}"\ + f" for '{self.name()}': {proc.stdout.strip()}" ) return proc.stdout.strip() except subprocess.CalledProcessError as error: if log: self.cohort.log.error( f"Unable to {args} {self.repository_name()}"\ + f" to '{self.path.relative_to(self.cohort.path)}'"\ +f" for '{self.name()}: {error.output.strip()} {error.stderr.strip()}" ) return False
[docs] def github_retrieve(self,reset: bool=True,branch=None) -> bool: """Clone or pull asssessments for this student from their repository. Returns: Success of retrieval """ if not self.repository_url(): self.cohort.log.warning(f"No repository known for '{self.name()}'") return False if self.path.exists(): if reset: # by default do a reset hard first to ensure workarea is clean self.git("reset","--hard",log=False) if branch: self.git("checkout",branch) return self.git("pull") else: return self.git("git", "clone", self.repository_url(), self.path)
[docs] def github_push(self, files: List[Path], subdir=None, reset: bool=True, branch: str=None, msg: str = "Push from pyAutoMark"): """Push given set of files into student repository Args: files: List of files or directories to copy subdir: If set - the name of subdirectory in student repository to copy files into reset: If True, do a github_retrieve first to ensure we are consistent with student repo branch: If set checkout and push files into this branch If branch is specified original branch checkou in local repository is restored after push """ if reset: #rensure we are synced with student work if reset is true self.github_retrieve(reset=True) if branch: #save current branch name and checkout specified branch original_branch = self.git("branch", "--show-current").stdout self.git("checkout", branch,log=False) destination = self.path if subdir: destination = destination / subdir if not destination.exists(): destination.mkdir(parents=True, exist_ok=True) for file in files: #do stuffd if file.is_file(): shutil.copyfile(file,destination/file.name) elif file.is_dir(): shutil.copytree(file,destination/file.name,copy_function=shutil.copyfile,dirs_exist_ok=True) self.git("add","--all",log=False) self.git("commit","-m", msg,log=False) self.git("push") if branch: self.git("checkout", original_branch,log=False)
[docs] def checkout(self,until,branch=None): """Checkout last repository for student before given date until or to a specified branch""" if self.path.exists(): if branch: self.git("checkout",branch) if until: result=self.git("log", r"--pretty='%h%'","-1", r"--format=%h","--until", until.isoformat(),log=False) if result: self.git("checkout",result)
[docs] def github_lastcommit(self) -> Union[datetime, None]: "Return last github commit time if applicable" if self.path.exists(): result=self.git("log", "-1", r"--format=%cd",log=False) if result: return datetime.strptime(result,"%a %b %d %H:%M:%S %Y %z")
[docs] def hash(self) -> int: """Return a hash based on students username - this will be first integer of first 8 characters of md5hash of username""" return int("0x"+hashlib.md5(self.username.encode("utf-8")).hexdigest()[:8],16)
[docs] def file(self, pattern: str, log: Union[logging.Logger,None] = None) -> Union[Path,None]: """Attempt to find file matching given pathname pattern based on the configuration setting filematch.pattern. Args: pattern: The file pattern to search for - either an exact path, glob or regexp depending on the configuration setting filematch.pattern log: If given use to log if no file found or multiple files found Returns: The path to the (first) matching file found or None if none found """ matchtype=self.cohort.get("filematch.pattern") files=[] if matchtype=="exact": path=self.path / pattern if path.exists(): files.append(path) elif matchtype=="glob": files=list(self.path.glob(pattern)) elif matchtype=="regexp": matcher = re.compile(pattern) for path in self.path.glob("**/*"): if matcher.search(str(path)): files.append(path) else: raise ValueError("Invalid filematch.pattern", matchtype) if not files: if log: log.warning("File Not Found: %s: '%s'",self.name(), pattern) return None if len(files)>1 and log: log.warning("Multiple files found: %s matching '%s': using %s", self.name(), pattern, files[0].relative_to(self.path)) return files[0]
[docs] def find_files(self, pathname: str, containing: str = None, recursive: bool = False) -> List[Path]: """Return filtered list of files found in student directory. If containing is given also filter to files containing this regexp If recursive is true also recurse subdirectories looking for matches Args: pathname: Unix style pathaname glob containing: An optional regexp to match against file contents recursive: If true also look in subdirectories Returns: List of files found - may be empty list """ files = glob.glob(str(self.path) + "/" + pathname, recursive=recursive) if not containing: return files matching = [] matcher = re.compile(containing) for file in files: with open(file, 'r') as fid: if matcher.findall(fid.read()): matching.append(file) return matching
[docs]def get_cohort(name: str = CONFIG.get("cohort", current_academic_year())) -> Cohort: """Return cohort for given name or current default cohort if name=None Args: name: Optional name of cohort to load. If not present will use default - "cohort" field from configuration Or use calculated current_academic_year Returns: The cohort object """ assert name is not None or CONFIG.cohort if (not (CONFIG.cohort) or CONFIG.cohort.name != name): CONFIG.cohort = Cohort(name) return CONFIG.cohort
[docs]def list_cohorts() -> List[str]: """List the cohorts These are ubdirectories of cohorts which have student.csv and manifest.json files Returns: List of valid cohort names """ results = [] for path in CONFIG.cohorts_path.iterdir(): if path.is_dir(): if (path / "manifest.json").exists() and (path / "students.csv").exists(): results.append(str(path.stem)) return results