master
Tom Weber 3 years ago
parent 1c817c315f
commit d1c27f2112

@ -22,7 +22,7 @@ mypy:
. .venv/bin/activate && mypy src
lint:
. .venv/bin/activate && pylint src -j 4 --reports=y
. .venv/bin/activate && flake8 src
docs: FORCE
cd docs; . .venv/bin/activate && sphinx-apidoc -o ./source ./src

@ -1,7 +1,7 @@
# coding: utf-8
""" Drama of the Day """
from os.path import dirname, abspath
__version__ = "0.1.0"
ROOT_DIR = dirname(abspath(__file__))
ROOT_DIR = dirname(abspath(__file__))

@ -1,27 +1,31 @@
# coding: utf-8
""" data module """
import torch
from torch.utils.data import Dataset
def get_shakespeare():
return open("data/shakespeare.txt", "rt").read()
"""loads the shakespeare text"""
return open("data/shakespeare.txt", "rt", encoding="utf-8").read()
def preprocess(text: str) -> tuple:
"""preprocesses the shakespeare text"""
alphabet = sorted(set(text))
letter_to_int = {let: ind for ind, let in enumerate(alphabet)}
int_to_letter = {ind: let for ind, let in enumerate(alphabet)}
int_to_letter = dict(enumerate(alphabet))
letter_ints = [letter_to_int[letter] for letter in text]
alphabet_size = len(alphabet)
return int_to_letter, letter_to_int, alphabet_size, letter_ints
class ShakespeareDataset(Dataset):
"""Shakespeare Dataset"""
def __init__(self, seq_len: int):
_, _, _, self.text = preprocess(get_shakespeare())
self.x = torch.LongTensor(self.text[:-1]) # get the data
self.y = torch.LongTensor(self.text[1:])
self.data = torch.LongTensor(self.text[:-1]) # get the data
self.labels = torch.LongTensor(self.text[1:])
self.seq_len = seq_len # set the sequence length
def __len__(self) -> int:
@ -31,6 +35,6 @@ class ShakespeareDataset(Dataset):
def __getitem__(self, index: int) -> tuple:
return (
self.x[index : index + self.seq_len],
self.x[index : index + self.seq_len],
self.data[index : index + self.seq_len],
self.data[index : index + self.seq_len],
) # return tuple of (sample, label)

@ -12,7 +12,7 @@ All the (pseudo-)words produced were learnt through imitating Shakespeare. There
Every day, the model is told to use the prompt **TOM:** and then produces the rest up to 10000 characters, thereby creating a unique new drama.
If you like to know more about this, [click here](/DOTD_generator).
If you like to know more about this, [click here](https://git.weber.codes/tom/DOTD_generator).
---

@ -1,5 +1,5 @@
# coding: utf-8
""" generate the drama of the day """
import re
import torch
from dotd.model import GRU
@ -7,10 +7,12 @@ from dotd.data import get_shakespeare, preprocess
def get_header():
return open("data/markdown_header.txt", "rt").read()
"""get markdown header file for hugo static website generator"""
return open("data/markdown_header.txt", "rt", encoding="utf-8").read()
def load_model(alphabet_size: int, path: str = "saved_models/rnn_2epochs.pth") -> GRU:
"""load the model, put on cpu and in eval mode"""
gru = GRU(alphabet_size, 1024, 256, layers=2, batch=1) # instantiate model
gru.load_state_dict(
torch.load(
@ -22,17 +24,19 @@ def load_model(alphabet_size: int, path: str = "saved_models/rnn_2epochs.pth") -
def make_pretty(text: str) -> str:
"""delete some line breaks for markdown"""
return re.sub(r"\n", r"<br>\n", text)
def write_drama(seq, temp=0.7, max_seq_len=1000):
"""generate the drama starting from a start sequence"""
int_to_letter, letter_to_int, alphabet_size, _ = preprocess(get_shakespeare())
gru = load_model(alphabet_size=alphabet_size)
hidden = gru.init_hidden()
input_idx = torch.LongTensor(
[[letter_to_int[s] for s in seq]]
) # input characters to ints
for i in range(max_seq_len):
for _ in range(max_seq_len):
output, hidden = gru(
input_idx, hidden
) # predict the logits for the next character
@ -49,10 +53,11 @@ def write_drama(seq, temp=0.7, max_seq_len=1000):
def main():
"""main function"""
init_seq = "TOM:"
output_text = get_header() + make_pretty(write_drama(init_seq))
with open("dramaoftheday.md", "w") as f:
f.write(output_text)
with open("dramaoftheday.md", "w", encoding="utf-8") as file:
file.write(output_text)
if __name__ == "__main__":

@ -1,15 +1,17 @@
# coding: utf-
# coding: utf-8
""" module for training and creating the model """
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
from tqdm import tqdm # type: ignore
import numpy as np
from dotd.data import ShakespeareDataset, preprocess, get_shakespeare
class GRU(torch.nn.Module):
"""simple GRU model"""
def __init__(self, vocab_size, hidden_size, embedding_size, batch=32, layers=2):
super(GRU, self).__init__()
super().__init__()
self.hidden_size = hidden_size # size of the GRU layers
self.batch = batch
self.layers = layers # how many GRU layers

Loading…
Cancel
Save