To protect your data, the CISO officer has suggested users to enable GitLab 2FA as soon as possible.

Commit 7bbed7ec authored by Lincoln Smith's avatar Lincoln Smith
Browse files

Improved handling of original approaches to requirement layout on P&C website.

parent a849dd77
......@@ -50,6 +50,52 @@ ORDER_LABEL = {
'PROGRESSION': 'Progression depends on requirement text',
'UNKNOWN_REQUIREMENT': 'Unknown requirement text'
}
# This gets updated when course codes are seen during processing.
COURSE_CODES = {'COMP8173', 'ENGN1217', 'COMP7240', 'COMP1140', 'COMP6262', 'ENGN4511',
'COMP3310', 'VCPG6001', 'COMP6250', 'ENGN8150', 'ENGN2228', 'ENGN3810',
'COMP3620', 'ENGN2707', 'ENGN2222', 'ENGN3013', 'COMP4450', 'ENGN3224',
'COMP1030', 'COMP8460', 'VCPG6100', 'COMP7310', 'COMP6461', 'ENGN2229',
'ENGN2218', 'COMP2560', 'VCUG3001', 'ENGN6213', 'COMP5923', 'ENGN8833',
'ENGN6536', 'ENGN8120', 'VCUG3100', 'ENGN6613', 'ENGN3230', 'ENGN3410',
'COMP4560', 'COMP3650', 'COMP1600', 'ENGN3331', 'COMP1720', 'ENGN2225',
'COMP6340', 'ENGN3200', 'ENGN8180', 'ENGN6511', 'COMP3320', 'COMP8830',
'VCUG2004', 'ENGN6524', 'ENGN3334', 'ENGN6615', 'COMP4630', 'ENGN8526',
'ENGN4420', 'COMP3701', 'COMP8300', 'ENGN6520', 'ENGN6420', 'COMP2310',
'COMP6120', 'COMP4680', 'COMP6361', 'ENGN4536', 'COMP8755', 'ENGN4820',
'ENGN6528', 'COMP6301', 'ENGN1211', 'ENGN6212', 'COMP3610', 'COMP4330',
'COMP2410', 'COMP6710', 'COMP3900', 'COMP6331', 'COMP3430', 'ENGN3226',
'COMP3550', 'ENGN4523', 'COMP8330', 'COMP8670', 'COMP2120', 'ENGN6334',
'ENGN8100', 'COMP8440', 'COMP6420', 'VCUG2003', 'COMP3100', 'COMP6442',
'VCUG3200', 'COMP7230', 'COMP8410', 'ENGN6410', 'COMP2550', 'VCPG8001',
'COMP6330', 'COMP3300', 'ENGN4221', 'COMP8650', 'COMP2130', 'COMP2610',
'ENGN3820', 'INFT4005F', 'COMP8600', 'ENGN8602', 'COMP4650', 'ENGN6525',
'COMP8800', 'COMP6730', 'ENGN8537', 'ENGN6250', 'ENGN6224', 'ENGN6331',
'COMP4130', 'COMP8320', 'COMP3425', 'COMP6719', 'ENGN4625', 'ENGN4525',
'COMP6466', 'ENGN2226', 'ENGN6601', 'COMP4800', 'COMP8701', 'ENGN8104',
'COMP6390', 'ENGN6537', 'COMP4670', 'ENGN4810', 'COMP3630', 'COMP2420',
'ENGN4521', 'ENGN5923', 'ENGN8601', 'ENGN4520', 'ENGN6626', 'ENGN8536',
'COMP3710', 'COMP3702', 'COMP8100', 'ENGN4513', 'COMP8180', 'COMP8715',
'COMP4500', 'COMP3500', 'COMP6310', 'ENGN3512', 'COMP8620', 'ENGN6223',
'COMP1100', 'ENGN4615', 'ENGN3221', 'ENGN6627', 'ENGN4613', 'COMP1710',
'COMP4660', 'COMP6240', 'VCPG6004', 'COMP6353', 'COMP6490', 'COMP4610',
'ENGN8831', 'ENGN6516', 'COMP3120', 'COMP3820', 'COMP6311', 'COMP4540',
'ENGN5920', 'ENGN8820', 'COMP1130', 'INFT4005P', 'COMP6720', 'COMP4005P',
'ENGN4528', 'COMP6780', 'COMP5920', 'ENGN4522', 'ENGN8823', 'COMP6260',
'COMP8260', 'ENGN4627', 'VCUG2001', 'COMP2700', 'ENGN4718', 'ENGN8524',
'ENGN8528', 'ENGN8534', 'COMP3530', 'COMP1110', 'ENGN4516', 'COMP4620',
'COMP6320', 'COMP6300', 'ENGN2706', 'COMP8823', 'ENGN3223', 'ENGN6512',
'COMP8820', 'ENGN3712', 'ENGN8637', 'COMP4340', 'VCUG1001', 'COMP3560',
'COMP3600', 'COMP1040', 'ENGN8830', 'VCUG3002', 'COMP6445', 'ENGN8527',
'COMP8501', 'COMP6363', 'COMP4005F', 'ENGN4027', 'COMP4006', 'ENGN8224',
'ENGN4524', 'COMP1730', 'COMP7500', 'COMP6700', 'COMP2140', 'COMP8430',
'COMP6464', 'COMP6365', 'COMP8190', 'COMP2710', 'ENGN8160', 'COMP3770',
'COMP2300', 'VCPG6002', 'COMP4300', 'VCPG8002', 'ENGN3100', 'ENGN3601',
'ENGN1218', 'ENGN2219', 'ENGN8170', 'ENGN6625', 'COMP2100', 'ENGN3213',
'COMP6261', 'ENGN4706', 'COMP8110', 'ENGN2217', 'COMP6470', 'COMP8420',
'COMP3740', 'COMP2400', 'ENGN3706', 'ENGN4712', 'COMP2620', 'ENGN8538',
'ENGN8832', 'VCPG6200', 'ENGN6521', 'COMP4600', 'ENGN3212', 'ENGN1215',
'ENGN8535', 'COMP8502', 'COMP4550', 'ENGN8260', 'COMP8705', 'ENGN4537',
'ENGN4200'}
class ReqNode:
......@@ -92,6 +138,9 @@ class ReqNode:
yield current_sibling
current_sibling = current_sibling.next_sibling
def __str__(self):
return self.flatten_requirement_text()
@property
def is_leaf(self):
return self.children is None
......@@ -179,7 +228,7 @@ def _get_tag_indent(tag: BeautifulSoup, current_margin: int) -> int:
:return: The alignment of the tag
"""
if 'style' in tag.attrs:
match = re.search(r'(margin|padding)-left:\s?(?P<margin>-?\d{2,3})', tag['style'])
match = re.search(r'(margin|padding)-left:\s?(?P<margin>-?\d{1,3})', tag['style'])
if match:
new_margin = int(match.group('margin'))
else:
......@@ -194,6 +243,54 @@ def _get_tag_indent(tag: BeautifulSoup, current_margin: int) -> int:
return new_margin
def _split_multi_req_paragraph(tag: BeautifulSoup, new_margin: int) -> [BeautifulSoup]:
"""
Apparently sometimes we just jam a whole bunch of requirements in a single <p> tag
and use <br> to separate them. Restructure our html by walking the contents of this tag
and separating its children into new paragraphs at the same indent when we hit a <br>
tag. Delete the <br> tags as we encounter them.
:param tag:
:param new_margin:
:return: A list of the newly created paragraphs
"""
def _get_new_p_tag():
return BeautifulSoup(
'<p style="margin-left: {}px;"><p>'.format(new_margin),
'html5lib'
).p.extract()
new_paragraphs = []
current_paragraph = tag # type: BeautifulSoup
br_tag = tag.br # type: BeautifulSoup
child = br_tag.next_sibling
br_tag.decompose()
new_p_tag = _get_new_p_tag()
while child:
next_child = child.next_sibling
# If this is a <br> tag, if we've created a new paragraph with contents insert it
# after the current paragraph then delete the <br> tag, so it doesn't mess with
# stuff later.
if child.name == 'br':
# Check stripped_strings so we don't process an effectively empty line caused
# by adjacent <br> tags
if list(new_p_tag.stripped_strings):
current_paragraph.insert_after(new_p_tag)
current_paragraph = new_p_tag
new_paragraphs.append(new_p_tag)
new_p_tag = _get_new_p_tag()
child.decompose()
else:
new_p_tag.append(child)
child = next_child
if list(new_p_tag.stripped_strings):
current_paragraph.insert_after(new_p_tag)
new_paragraphs.append(new_p_tag)
return new_paragraphs
def build_reqnode_tree(study_block: BeautifulSoup, header_id='program-requirements') -> ReqNode:
"""
Construct a tree to replicate the structure of the Program Orders as rendered on P&C.
......@@ -231,7 +328,6 @@ def build_reqnode_tree(study_block: BeautifulSoup, header_id='program-requiremen
tag = tag.next_sibling
continue
requirement_text = ' '.join(tag.stripped_strings)
new_margin = _get_tag_indent(tag, current_margin)
# Of course there are different margin increments kicking around so normalise them to
......@@ -255,6 +351,26 @@ def build_reqnode_tree(study_block: BeautifulSoup, header_id='program-requiremen
alternative_indent = 0
current_parent = root
# Apparently sometimes we just jam a whole bunch of requirements in a single <p> tag
# and use <br> to separate them. Restructure our html by walking the contents of this tag
# and separating its children into new paragraphs at the same indent when we hit a <br>
# tag. Delete the <br> tags as we encounter them.
if tag.br:
new_paragraphs = _split_multi_req_paragraph(tag, new_margin)
# It's also possible the new paragraphs are actually a list of courses that should be
# indented underneath their parent requirement so try to detect this and adjust
# margins accordingly
new_lines = [' '.join(paragraph.stripped_strings) for paragraph in new_paragraphs]
if (
len(new_paragraphs) > 1
and not re.match(COURSE_REGEX, ' '.join(tag.stripped_strings))
and all([re.match(COURSE_REGEX, line) for line in new_lines])
):
for paragraph in new_paragraphs:
paragraph['style'] = 'margin-left: {}px;'.format(new_margin + 40)
requirement_text = ' '.join(tag.stripped_strings)
# If it's a blank line then that typically indicates the end of a requirement block. Take
# us back to the top level indent
if new_margin < 0 or not ''.join(tag.stripped_strings):
......@@ -340,6 +456,8 @@ class CourseListFilter(CourseFilter):
def __init__(self, course_codes: Iterable[str]):
super().__init__()
self.course_codes = course_codes
# Hack automatic collection of seen course codes
COURSE_CODES.update(set(course_codes))
def __repr__(self):
return str(self.course_codes)
......@@ -401,16 +519,16 @@ class ProgramOrder:
req1 & req2 & (req3 | (req4 & re5 & (req6 | req7)) | req8) & req9
Each requirement ultimately specifies some unit value that it contributes towards the
completion of a degree, and a set of courses that can be used to acquire those units.
Courses define their own rules in terms of pre-requisites, co-requisites and incompatible
courses etc. so at the Course level logical expressions will get more complex. See the
CourseFilter classes for more details on this.
completion of a degree and a set of courses that can be used to acquire those units,
or some administrative hurdle/requirement. Courses define their own rules in terms of
pre-requisites, co-requisites and incompatible courses etc. so at the Course level logical
expressions will get more complex.
At the moment there are really two types of ProgramOrder:
* One that stores a specific requirement. These have no children and generally apply a unit
value requirement to a set of courses via some min|max|equals operator.
* One that is a container of sorts and groups ProgramOrders joined by an AND/OR operator.
TODO: Split these up into a parent and subclasses to the role is explicit.
TODO: Split these up into a parent and subclasses so the role is explicit.
The resulting representation is a tree starting with a container ProgramOrder, and it will
have children that may themselves be containers, or could be specific requirement ProgramOrders.
......@@ -619,7 +737,7 @@ class DegreeRuleScraper:
child_text = []
while sibling_text and re.match(AREA_REGEX, sibling_text):
child_text.append(sibling_text)
sibling = node.next_sibling
sibling = sibling.next_sibling
sibling_text = sibling and sibling.flatten_requirement_text()
return ' '.join(child_text)
......@@ -724,6 +842,9 @@ class DegreeRuleScraper:
'single_set': r'(?P<min>A minimum of )?(?P<max>A maximum of )?(?P<units>\d{1,3}) '
r'units(?: may come| must come)? from(?: the)? completion of.*? courses? '
r'from the following(?: list)?',
'single_set2': r'(?P<min>A minimum of )?(?P<max>A maximum of )?(?P<units>\d{1,3}) '
r'units(?: may come| must come)? from( one of)?(?: the)? following.*? '
r'courses?',
'single_course_multi': r'(?P<min>A minimum of )?(?P<max>A maximum of )?(?P<units>\d{1,'
r'3}) units from(?: the)? completion of (?P<code>[A-Z]{4}\d{4}['
r'A-Z]?).*?, which (?:may|must) be completed more than once('
......@@ -828,6 +949,8 @@ class DegreeRuleScraper:
# Single set of courses. This must come after the "multiple_areas" regex due to
# potential overlap.
matches = re.match(principal_req_regex['single_set'], requirement_text)
if not matches:
matches = re.match(principal_req_regex['single_set2'], requirement_text)
if matches:
operator = self._minmax_operator(matches)
child_text = self._extract_child_text(node)
......@@ -1009,9 +1132,6 @@ class DegreeRuleScraper:
elif re.match(r'Either:?$', requirement_text):
return self.process_alternative_sets(node)
# Poorly laid out requirements lack indents for course lists etc.
elif re.match(AREA_REGEX, requirement_text):
return []
# Or: statements were already processed by the Either: statement
elif re.match(r'Or:?$', requirement_text):
return []
......
<html>
<head>
</head>
<body id="top">
<div class="main">
<div class="main-inner">
<div class="body transition">
<div class="tab-content" id="study" style="display: inline;">
<div class="body__inner w-doublewide copy">
<h2 id="program-requirements">
Program Requirements
</h2>
<p>
The Bachelor of Applied Data Analytics requires the completion of 144 units, of which:
</p>
<p style="margin-left: 40px;">
A maximum of 60 units may come from completion of 1000-level courses
</p>
<p>
</p>
<p>
The 144 units must consist of:
</p>
<p style="margin-left: 40px;">
72 units from completion of the following compulsory courses
</p>
<p style="margin-left: 80px;">
<a href="/2019/course/COMP2400">
COMP2400
</a>
Relational Databases
<br/>
<br/>
<a href="/2019/course/COMP3425">
COMP3425
</a>
Data Mining
<br/>
<br/>
<a href="/2019/course/COMP3430">
COMP3430
</a>
Data Wrangling
<br/>
<br/>
<a href="/2019/course/DEMO2002">
DEMO2002
</a>
Population Analysis
<br/>
<br/>
<a href="/2019/course/SOCR1001">
SOCR1001
</a>
Foundations of Social Research
<br/>
<br/>
<a href="/2019/course/SOCY2169">
SOCY2169
</a>
Online Research Methods
<br/>
<br/>
<a href="/2019/course/SOCY2166">
SOCY2166
</a>
Social Science of the Internet
<br/>
<br/>
<a href="/2019/course/SOCR3001">
SOCR3001
</a>
Data for Decision Making
<br/>
<br/>
<a href="/2019/course/STAT2001">
STAT2001
</a>
Introductory Mathematical Statistics
<br/>
<br/>
<a href="/2019/course/STAT2008">
STAT2008
</a>
Regression Modelling
<br/>
<br/>
<a href="/2019/course/STAT3011">
STAT3011
</a>
Graphical Data Analysis
<br/>
<br/>
<a href="/2019/course/STAT3040">
STAT3040
</a>
Statistical Learning
<br/>
</p>
<p style="margin-left: 40px;">
6 units from completion of courses from the following list:
</p>
<p style="margin-left: 80px;">
<a href="/2019/course/COMP1100">
COMP1100
</a>
Programming as Problem Solving
</p>
<p style="margin-left: 80px;">
<a href="/2019/course/COMP1130">
COMP1130
</a>
Programming as Problem Solving (Advanced)
</p>
<p style="margin-left: 80px;">
<a href="/2019/course/COMP1730">
COMP1730
</a>
Programming for Scientists
</p>
<p style="margin-left: 40px;">
<br/>
6 units from completion of courses from the following list:
</p>
<p style="margin-left: 80px;">
<a href="/2019/course/COMP1110">
COMP1110
</a>
Structured Programming
<br/>
<br/>
<a href="/2019/course/COMP1140">
COMP1140
</a>
Structured Programming (Advanced)
<br/>
</p>
<p>
Either:
</p>
<p style="margin-left: 40px;">
6 units from completion of
<a href="/2019/course/MATH1113">
MATH1113
</a>
Mathematical Foundations for Actuarial Studies
<br/>
<br/>
6 units from completion of courses from the following list:
</p>
<p style="margin-left: 80px;">
<a href="/2019/course/STAT1003">
STAT1003
</a>
Statistical Techniques
<br/>
<br/>
<a href="/2019/course/STAT1008">
STAT1008
</a>
Quantitative Research Methods
</p>
<p>
Or:
</p>
<p style="margin-left: 40px;">
12 units from completion of the following courses:
</p>
<p style="margin-left: 80px;">
<a href="/2019/course/MATH1003">
MATH1003
</a>
Algebra and Calculus Methods
<br/>
<br/>
<a href="/2019/course/MATH1113">
MATH1113
</a>
Mathematical Foundations for Actuarial Studies
</p>
<p>
Or:
</p>
<p style="margin-left: 40px;">
12 units from completion of the following courses:
</p>
<p style="margin-left: 80px;">
<a href="/2019/course/MATH1013">
MATH1013
</a>
Mathematics and Applications 1
<br/>
<br/>
<a href="/2019/course/MATH1014">
MATH1014
</a>
Mathematics and Applications 2
<br/>
<br/>
<a href="/2019/course/MATH1115">
MATH1115
</a>
Advanced Mathematics and Applications 1
<br/>
<br/>
<a href="/2019/course/MATH1116">
MATH1116
</a>
Advanced Mathematics and Applications 2
</p>
<p>
<br/>
48 units from completion of elective courses offered by ANU
</p>
<h2 id="electivestudy">
Elective Study
</h2>
<p>
Once you have met the program requirements of your degree, you may have enough electives to complete an additional elective
<a href="http://programsandcourses.anu.edu.au/Search?Source=Catalogue&amp;SearchText=&amp;Search=&amp;FiltersApplied=Majors&amp;Careers[0]=Undergraduate&amp;FilterByMajors=true&amp;SelectedYear=2016&amp;CollegeName=All+Colleges">
major
</a>
,
<a href="http://programsandcourses.anu.edu.au/Search?Source=Catalogue&amp;SearchText=&amp;Search=&amp;FiltersApplied=Minorss&amp;Careers[0]=Undergraduate&amp;FilterByMinors=true&amp;SelectedYear=2016&amp;CollegeName=All+Colleges">
minor
</a>
or
<a href="http://programsandcourses.anu.edu.au/Search?Source=Catalogue&amp;SearchText=&amp;Search=&amp;FiltersApplied=Specialisations&amp;Careers[0]=Undergraduate&amp;FilterBySpecialisations=true&amp;SelectedYear=2016&amp;CollegeName=All+Colleges">
specialisation
</a>
.
</p>
<h2 id="studyoptions">
Study Options
</h2>
<div class="table-container">
<table class="table-program">
<tbody>
<tr>
<td class="table-program__year" rowspan="2">
<span class="table-program__year-label">
Year 1
</span>
<span class="table-program__units-label">
48 units
</span>
</td>
<td class="table-program__course ">
<span class="table-program__course-type">
<a href="/course/COMP1100">
COMP1100
</a>
</span>
<span class="table-program__course-level">
Programming as Problem Solving
</span>
<span class="table-program__course-units">
6 units
</span>
</td>
<td class="table-program__course ">
<span class="table-program__course-type">
<a href="/course/MATH1013">
MATH1013
</a>
</span>
<span class="table-program__course-level">
Mathematics and Applications 1
</span>
<span class="table-program__course-units">
6 units
</span>
</td>
<td class="table-program__course ">
<span class="table-program__course-type">
Elective
</span>
<span class="table-program__course-units">
6 units
</span>
</td>
<td class="table-program__course ">
<span class="table-program__course-type">
Elective
</span>
<span class="table-program__course-units">
6 units
</span>
</td>
</tr>
<tr>
<td class="table-program__course ">
<span class="table-program__course-type">
<a href="/course/COMP1110">
COMP1110
</a>
</span>
<span class="table-program__course-level">
Structured Programming
</span>
<span class="table-program__course-units">
6 units
</span>
</td>
<td class="table-program__course ">
<span class="table-program__course-type">
<a href="/course/COMP2400">
COMP2400
</a>
</span>
<span class="table-program__course-level">
Relational Databases
</span>
<span class="table-program__course-units">
6 units
</span>
</td>
<td class="table-program__course ">
<span class="table-program__course-type">
<a href="/course/SOCR1001">
SOCR1001
</a>
</span>
<span class="table-program__course-level">
Foundations of Social Research
</span>
<span class="table-program__course-units">
6 units
</span>
</td>
<td class="table-program__course ">
<span class="table-program__course-type">
<a href="/course/MATH1014">
MATH1014
</a>
</span>
<span class="table-program__course-level">
Mathematics and Applications 2
</span>
<span class="table-program__course-units">
6 units
</span>
</td>
</tr>
<tr>
<td class="table-program__year" rowspan="2">
<span class="table-program__year-label">
Year 2
</span>
<span class="table-program__units-label">
48 units
</span>
</td>
<td class="table-program__course ">
<span class="table-program__course-type">
<a href="/course/COMP3425">
COMP3425
</a>
</span>
<span class="table-program__course-level">
Data Mining
</span>
<span class="table-program__course-units">
6 units
</span>
</td>
<td class="table-program__course ">
<span class="table-program__course-type">
<a href="/course/STAT2008">
STAT2008
</a>
</span>
<span class="table-program__course-level">
Regression Modelling
</span>
<span class="table-program__course-units">
6 units
</span>
</td>
<td class="table-program__course ">
<span class="table-program__course-type">
<a href="/course/DEMO2002">
DEMO2002
</a>
</span>
<span class="table-program__course-level">
Population Analysis
</span>
<span class="table-program__course-units">
6 units
</span>
</td>
<td class="table-program__course ">
<span class="table-program__course-type">
Elective
</span>