commit
2056b207f6
@ -0,0 +1,2 @@
|
||||
dramaoftheday.md
|
||||
.venv/
|
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
import torch
|
||||
import re
|
||||
|
||||
class RNN(torch.nn.Module):
|
||||
def __init__(self, vocab_size, hidden_size, embedding_size, batch=32, layers=2):
|
||||
super(RNN, self).__init__()
|
||||
self.hidden_size = hidden_size # size of the GRU layers
|
||||
self.batch = batch
|
||||
self.layers = layers # how many GRU layers
|
||||
self.word_embeds = torch.nn.Embedding(vocab_size, embedding_size) # Embedding layer
|
||||
self.gru = torch.nn.GRU(embedding_size, hidden_size, layers, batch_first=True) # GRU layer(s)
|
||||
self.output_layer = torch.nn.Linear(hidden_size, vocab_size)
|
||||
|
||||
def forward(self, inputs, hidden):
|
||||
x = self.word_embeds(inputs) # transform the input integer into a high dimensional embedding
|
||||
output, hidden = self.gru(x, hidden) # Compute the output of the GRU layer(s)
|
||||
output = self.output_layer(output) # compute the logits
|
||||
return output, hidden
|
||||
|
||||
def initHidden(self):
|
||||
return torch.zeros(self.layers, self.batch, self.hidden_size)
|
||||
|
||||
def preprocess(text):
|
||||
alphabet = sorted(set(text))
|
||||
letter_to_int = {let: ind for ind, let in enumerate(alphabet)}
|
||||
int_to_letter = {ind: let for ind, let in enumerate(alphabet)}
|
||||
letter_ints = [letter_to_int[letter] for letter in text]
|
||||
alphabet_size = len(alphabet)
|
||||
return int_to_letter, letter_to_int, alphabet_size, letter_ints
|
||||
|
||||
|
||||
def markdown_header():
|
||||
return open("/home/tux/shakespeare_generator/markdown_header.txt", "rt").read()
|
||||
|
||||
text = open("/home/tux/shakespeare_generator/shakespeare.txt", "rt").read()
|
||||
init_seq = "TOM:"
|
||||
int_to_letter, letter_to_int, alphabet_size, letter_ints = preprocess(text)
|
||||
rnn = RNN(alphabet_size, 1024, 256, layers=2, batch=1) # instantiate model
|
||||
rnn.load_state_dict(torch.load("/home/tux/shakespeare_generator/rnn_2epochs.pth", map_location=torch.device('cpu'))) # load weights
|
||||
rnn.eval() # tell model its time to evaluate
|
||||
|
||||
def write_drama(seq, temp=0.7, max_seq_len=1000):
|
||||
hidden = rnn.initHidden()
|
||||
input_idx = torch.LongTensor([[letter_to_int[s] for s in seq]]) # input characters to ints
|
||||
for i in range(max_seq_len):
|
||||
output, hidden = rnn(input_idx, hidden) # predict the logits for the next character
|
||||
pred = torch.squeeze(output, 0)[-1]
|
||||
pred = pred / temp # apply temperature
|
||||
pred_id = torch.distributions.categorical.Categorical(logits=pred).sample() # sample from the distribution
|
||||
input_idx = torch.cat((input_idx[:,1:], pred_id.reshape(1,-1)), 1) # predicted character is added to our input
|
||||
seq += int_to_letter[pred_id.item()] # add predicted character to sequence
|
||||
return seq
|
||||
|
||||
def stylise_drama(drama):
|
||||
return re.sub(r"\n", r"<br>\n", drama)
|
||||
|
||||
output_text = markdown_header() + stylise_drama(write_drama(init_seq))
|
||||
|
||||
with open("/home/tux/shakespeare_generator/dramaoftheday.md", "w") as f:
|
||||
f.write(output_text)
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,14 @@
|
||||
+++
|
||||
title = "Drama of the Day"
|
||||
date = "2021-11-07"
|
||||
+++
|
||||
|
||||
|
||||
This excerpt of a drama, although inspired by Shakespeare, is entirely artifically generated.
|
||||
The language model that was used to produce this text only had prior knowledge about the latin alphabet. This means that all the (pseudo-)words produced were learnt through imitating Shakespeare. Therefore, you might find quite a few neologisms, in true Shakespearean spirit.
|
||||
Everyday, the model is told to use the prompt **TOM:** and then produces the rest up to 10000 characters, thereby creating a unique new drama.
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue