From d1c27f2112ba44770aa6b4c3eccf36a5ad5c1233 Mon Sep 17 00:00:00 2001 From: Tom Weber Date: Mon, 7 Feb 2022 18:11:10 +0100 Subject: [PATCH] linting --- Makefile | 2 +- src/dotd/__init__.py | 4 ++-- src/dotd/data.py | 18 +++++++++++------- src/dotd/data/markdown_header.txt | 2 +- src/dotd/generate.py | 15 ++++++++++----- src/dotd/model.py | 10 ++++++---- 6 files changed, 31 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index d8d9d76..c5af3ab 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ mypy: . .venv/bin/activate && mypy src lint: - . .venv/bin/activate && pylint src -j 4 --reports=y + . .venv/bin/activate && flake8 src docs: FORCE cd docs; . .venv/bin/activate && sphinx-apidoc -o ./source ./src diff --git a/src/dotd/__init__.py b/src/dotd/__init__.py index 245c8fa..758b85a 100644 --- a/src/dotd/__init__.py +++ b/src/dotd/__init__.py @@ -1,7 +1,7 @@ # coding: utf-8 - +""" Drama of the Day """ from os.path import dirname, abspath __version__ = "0.1.0" -ROOT_DIR = dirname(abspath(__file__)) \ No newline at end of file +ROOT_DIR = dirname(abspath(__file__)) diff --git a/src/dotd/data.py b/src/dotd/data.py index 54384ac..3132813 100644 --- a/src/dotd/data.py +++ b/src/dotd/data.py @@ -1,27 +1,31 @@ # coding: utf-8 - +""" data module """ import torch from torch.utils.data import Dataset def get_shakespeare(): - return open("data/shakespeare.txt", "rt").read() + """loads the shakespeare text""" + return open("data/shakespeare.txt", "rt", encoding="utf-8").read() def preprocess(text: str) -> tuple: + """preprocesses the shakespeare text""" alphabet = sorted(set(text)) letter_to_int = {let: ind for ind, let in enumerate(alphabet)} - int_to_letter = {ind: let for ind, let in enumerate(alphabet)} + int_to_letter = dict(enumerate(alphabet)) letter_ints = [letter_to_int[letter] for letter in text] alphabet_size = len(alphabet) return int_to_letter, letter_to_int, alphabet_size, letter_ints class ShakespeareDataset(Dataset): + """Shakespeare Dataset""" + def __init__(self, seq_len: int): _, _, _, self.text = preprocess(get_shakespeare()) - self.x = torch.LongTensor(self.text[:-1]) # get the data - self.y = torch.LongTensor(self.text[1:]) + self.data = torch.LongTensor(self.text[:-1]) # get the data + self.labels = torch.LongTensor(self.text[1:]) self.seq_len = seq_len # set the sequence length def __len__(self) -> int: @@ -31,6 +35,6 @@ class ShakespeareDataset(Dataset): def __getitem__(self, index: int) -> tuple: return ( - self.x[index : index + self.seq_len], - self.x[index : index + self.seq_len], + self.data[index : index + self.seq_len], + self.data[index : index + self.seq_len], ) # return tuple of (sample, label) diff --git a/src/dotd/data/markdown_header.txt b/src/dotd/data/markdown_header.txt index a096913..584e8a4 100644 --- a/src/dotd/data/markdown_header.txt +++ b/src/dotd/data/markdown_header.txt @@ -12,7 +12,7 @@ All the (pseudo-)words produced were learnt through imitating Shakespeare. There Every day, the model is told to use the prompt **TOM:** and then produces the rest up to 10000 characters, thereby creating a unique new drama. -If you like to know more about this, [click here](/DOTD_generator). +If you like to know more about this, [click here](https://git.weber.codes/tom/DOTD_generator). --- diff --git a/src/dotd/generate.py b/src/dotd/generate.py index 595bd6b..b869199 100644 --- a/src/dotd/generate.py +++ b/src/dotd/generate.py @@ -1,5 +1,5 @@ # coding: utf-8 - +""" generate the drama of the day """ import re import torch from dotd.model import GRU @@ -7,10 +7,12 @@ from dotd.data import get_shakespeare, preprocess def get_header(): - return open("data/markdown_header.txt", "rt").read() + """get markdown header file for hugo static website generator""" + return open("data/markdown_header.txt", "rt", encoding="utf-8").read() def load_model(alphabet_size: int, path: str = "saved_models/rnn_2epochs.pth") -> GRU: + """load the model, put on cpu and in eval mode""" gru = GRU(alphabet_size, 1024, 256, layers=2, batch=1) # instantiate model gru.load_state_dict( torch.load( @@ -22,17 +24,19 @@ def load_model(alphabet_size: int, path: str = "saved_models/rnn_2epochs.pth") - def make_pretty(text: str) -> str: + """delete some line breaks for markdown""" return re.sub(r"\n", r"
\n", text) def write_drama(seq, temp=0.7, max_seq_len=1000): + """generate the drama starting from a start sequence""" int_to_letter, letter_to_int, alphabet_size, _ = preprocess(get_shakespeare()) gru = load_model(alphabet_size=alphabet_size) hidden = gru.init_hidden() input_idx = torch.LongTensor( [[letter_to_int[s] for s in seq]] ) # input characters to ints - for i in range(max_seq_len): + for _ in range(max_seq_len): output, hidden = gru( input_idx, hidden ) # predict the logits for the next character @@ -49,10 +53,11 @@ def write_drama(seq, temp=0.7, max_seq_len=1000): def main(): + """main function""" init_seq = "TOM:" output_text = get_header() + make_pretty(write_drama(init_seq)) - with open("dramaoftheday.md", "w") as f: - f.write(output_text) + with open("dramaoftheday.md", "w", encoding="utf-8") as file: + file.write(output_text) if __name__ == "__main__": diff --git a/src/dotd/model.py b/src/dotd/model.py index ef6cad0..c7df003 100644 --- a/src/dotd/model.py +++ b/src/dotd/model.py @@ -1,15 +1,17 @@ -# coding: utf- - +# coding: utf-8 +""" module for training and creating the model """ import torch from torch.utils.data import DataLoader -from tqdm import tqdm +from tqdm import tqdm # type: ignore import numpy as np from dotd.data import ShakespeareDataset, preprocess, get_shakespeare class GRU(torch.nn.Module): + """simple GRU model""" + def __init__(self, vocab_size, hidden_size, embedding_size, batch=32, layers=2): - super(GRU, self).__init__() + super().__init__() self.hidden_size = hidden_size # size of the GRU layers self.batch = batch self.layers = layers # how many GRU layers