Welcome to Hackster!
Hackster is a community dedicated to learning hardware, from beginner to pro. Join us, it's free!
Max Sánchez
Published © GPL3+

Lightweight Multi-use Generative Model

This model represents the lemma 'simplicity is the ultimate sophistication' Its designed to run on most computers, making AI accesibe to all

AdvancedShowcase (no instructions)15 hours223
Lightweight Multi-use Generative Model

Things used in this project

Hardware components

AMD Accelerator cloud
×1

Software apps and online services

TensorFlow
TensorFlow
Python

Story

Read more

Schematics

Model summary

This is an image of the model summary

Code

Mollie.ipynb

Python
This is a Jupyter notebook that contains bothe the code to train and the code to use the model
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "4083e7df-90e9-4d49-86d8-882c771e7b56",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset installed!\n",
      "Text tokenized!\n",
      "Input sequences fitted\n",
      "Created predictors and labels\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_1461/3731491159.py:63: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
      "  filtered_predictors = np.array(filtered_predictors)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Predictors padded to dense tensors\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[4], line 72\u001b[0m\n\u001b[1;32m     70\u001b[0m \u001b[38;5;66;03m# Convert label to categorical format\u001b[39;00m\n\u001b[1;32m     71\u001b[0m total_words \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mmin\u001b[39m(\u001b[38;5;28mlen\u001b[39m(tokenizer\u001b[38;5;241m.\u001b[39mword_index) \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m, max_vocab_size)\n\u001b[0;32m---> 72\u001b[0m label_categorical \u001b[38;5;241m=\u001b[39m \u001b[43mtf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkeras\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mutils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_categorical\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfiltered_labels\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_classes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtotal_words\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     73\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAll sequences converted\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     75\u001b[0m \u001b[38;5;66;03m# Define custom lightweight transformer-based model\u001b[39;00m\n",
      "File \u001b[0;32m~/.local/lib/python3.9/site-packages/keras/src/utils/np_utils.py:74\u001b[0m, in \u001b[0;36mto_categorical\u001b[0;34m(y, num_classes, dtype)\u001b[0m\n\u001b[1;32m     72\u001b[0m n \u001b[38;5;241m=\u001b[39m y\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m     73\u001b[0m categorical \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mzeros((n, num_classes), dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[0;32m---> 74\u001b[0m \u001b[43mcategorical\u001b[49m\u001b[43m[\u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marange\u001b[49m\u001b[43m(\u001b[49m\u001b[43mn\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m     75\u001b[0m output_shape \u001b[38;5;241m=\u001b[39m input_shape \u001b[38;5;241m+\u001b[39m (num_classes,)\n\u001b[1;32m     76\u001b[0m categorical \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mreshape(categorical, output_shape)\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "#This is the code to train the model\n",
    "import os\n",
    "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n",
    "import tensorflow as tf\n",
    "from datasets import load_from_disk\n",
    "import numpy as np\n",
    "from tensorflow.keras.preprocessing.text import Tokenizer\n",
    "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
    "\n",
    "dataset = load_from_disk(\"Dolma\")\n",
    "texts = dataset['train'].select(range(1000))  # Use more data if possible\n",
    "del dataset\n",
    "print(\"Dataset installed!\")\n",
    "\n",
    "max_sequence_len = 500\n",
    "# Prepare tokenizer\n",
    "max_vocab_size = 20000  # Limit vocabulary size\n",
    "tokenizer = Tokenizer()  \n",
    "tokenizer.fit_on_texts(texts['text'])\n",
    "print(\"Text tokenized!\")\n",
    "\n",
    "import os\n",
    "import tensorflow as tf\n",
    "from tensorflow.keras.preprocessing.text import Tokenizer\n",
    "import pickle\n",
    "\n",
    "\n",
    "\n",
    "# Save the tokenizer to a file\n",
    "with open('tokenizer.pkl', 'wb') as file:\n",
    "    pickle.dump(tokenizer, file)\n",
    "\n",
    "\n",
    "sequences = tokenizer.texts_to_sequences(texts['text'])\n",
    "\n",
    "input_sequences = []\n",
    "for seq in sequences:\n",
    "    padded_seq = pad_sequences([seq], maxlen=max_sequence_len, padding='pre')[0]\n",
    "    for i in range(1, len(padded_seq)):\n",
    "        n_gram_sequence = padded_seq[:i+1]\n",
    "        input_sequences.append(n_gram_sequence)\n",
    "\n",
    "print(\"Input sequences fitted\")\n",
    "input_sequences = np.array(input_sequences, dtype=object)\n",
    "\n",
    "max_sequence_len = 50  # Adjust based on your data\n",
    "predictors = [seq[:-1] for seq in input_sequences]\n",
    "label = [seq[-1] for seq in input_sequences]\n",
    "print(\"Created predictors and labels\")\n",
    "\n",
    "filtered_predictors = []\n",
    "filtered_labels = []\n",
    "for i in range(len(label)):\n",
    "    if label[i] < max_vocab_size:\n",
    "        filtered_predictors.append(predictors[i])\n",
    "        filtered_labels.append(label[i])\n",
    "\n",
    "filtered_predictors = np.array(filtered_predictors)\n",
    "filtered_labels = np.array(filtered_labels)\n",
    "\n",
    "predictors_padded = pad_sequences(filtered_predictors, maxlen=max_sequence_len-1, padding='pre')\n",
    "print(\"Predictors padded to dense tensors\")\n",
    "\n",
    "total_words = min(len(tokenizer.word_index) + 1, max_vocab_size)\n",
    "label_categorical = tf.keras.utils.to_categorical(filtered_labels, num_classes=total_words)\n",
    "print(\"All sequences converted\")\n",
    "\n",
    "class TransformerBlock(tf.keras.layers.Layer):\n",
    "    def __init__(self, num_heads, key_dim, ff_dim):\n",
    "        super(TransformerBlock, self).__init__()\n",
    "        self.att = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)\n",
    "        self.ffn = tf.keras.Sequential([\n",
    "            tf.keras.layers.Dense(ff_dim, activation='relu'),\n",
    "            tf.keras.layers.Dense(key_dim)\n",
    "        ])\n",
    "        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)\n",
    "        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)\n",
    "        self.dropout1 = tf.keras.layers.Dropout(0.1)\n",
    "        self.dropout2 = tf.keras.layers.Dropout(0.1)\n",
    "\n",
    "    def call(self, inputs, training):\n",
    "        attn_output = self.att(inputs, inputs)\n",
    "        attn_output = self.dropout1(attn_output, training=training)\n",
    "        out1 = self.layernorm1(inputs + attn_output)\n",
    "        ffn_output = self.ffn(out1)\n",
    "        ffn_output = self.dropout2(ffn_output, training=training)\n",
    "        return self.layernorm2(out1 + ffn_output)\n",
    "\n",
    "def build_custom_model(vocab_size, max_seq_len):\n",
    "    embedding_dim = 64  # Adjust the embedding dimension\n",
    "    inputs = tf.keras.Input(shape=(max_seq_len-1,))\n",
    "    embedding_layer = tf.keras.layers.Embedding(vocab_size, embedding_dim)(inputs)\n",
    "    transformer_block = TransformerBlock(num_heads=2, key_dim=embedding_dim, ff_dim=embedding_dim*2)(embedding_layer)\n",
    "    flatten_layer = tf.keras.layers.Flatten()(transformer_block)\n",
    "    dropout_layer = tf.keras.layers.Dropout(0.2)(flatten_layer)\n",
    "    outputs = tf.keras.layers.Dense(vocab_size, activation='softmax')(dropout_layer)\n",
    "\n",
    "    model = tf.keras.Model(inputs=inputs, outputs=outputs)\n",
    "    return model\n",
    "\n",
    "model = build_custom_model(total_words, max_sequence_len)\n",
    "\n",
    "optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)\n",
    "model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])\n",
    "print(\"Model compiled\")\n",
    "\n",
    "callbacks = [\n",
    "    tf.keras.callbacks.ModelCheckpoint(\"best_model.h5\", save_best_only=True, monitor='loss'),\n",
    "    tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10),\n",
    "    tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5)\n",
    "]\n",
    "\n",
    "history = model.fit(predictors_padded, label_categorical, epochs=150, verbose=1, batch_size=256, callbacks=callbacks)\n",
    "print(\"Model training completed\")\n",
    "\n",
    "model.save('lightweight_text_gen_model_V2.h5')\n",
    "print(\"Model saved and ready for text generation\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "2bf6775c-6f28-4741-82ec-37da60fb44dd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Of course i'll be starting to blog again next monday fresh and spirited up thanks for being such a great company in my alone time life's been great so far i think\n"
     ]
    }
   ],
   "source": [
    "#This is the code to load the model\n",
    "import os\n",
    "import tensorflow as tf\n",
    "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
    "import numpy as np\n",
    "import pickle\n",
    "\n",
    "class TransformerBlock(tf.keras.layers.Layer):\n",
    "    def __init__(self, num_heads, key_dim, ff_dim):\n",
    "        super(TransformerBlock, self).__init__()\n",
    "        self.att = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)\n",
    "        self.ffn = tf.keras.Sequential([\n",
    "            tf.keras.layers.Dense(ff_dim, activation='relu'),\n",
    "            tf.keras.layers.Dense(key_dim)\n",
    "        ])\n",
    "        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)\n",
    "        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)\n",
    "        self.dropout1 = tf.keras.layers.Dropout(0.1)\n",
    "        self.dropout2 = tf.keras.layers.Dropout(0.1)\n",
    "\n",
    "    def call(self, inputs, training):\n",
    "        attn_output = self.att(inputs, inputs)\n",
    "        attn_output = self.dropout1(attn_output, training=training)\n",
    "        out1 = self.layernorm1(inputs + attn_output)\n",
    "        ffn_output = self.ffn(out1)\n",
    "        ffn_output = self.dropout2(ffn_output, training=training)\n",
    "        return self.layernorm2(out1 + ffn_output)\n",
    "\n",
    "def build_custom_model(vocab_size, max_seq_len):\n",
    "    embedding_dim = 64  # Adjust the embedding dimension\n",
    "    inputs = tf.keras.Input(shape=(max_seq_len-1,))\n",
    "    embedding_layer = tf.keras.layers.Embedding(vocab_size, embedding_dim)(inputs)\n",
    "    transformer_block = TransformerBlock(num_heads=2, key_dim=embedding_dim, ff_dim=embedding_dim*2)(embedding_layer)\n",
    "    flatten_layer = tf.keras.layers.Flatten()(transformer_block)\n",
    "    dropout_layer = tf.keras.layers.Dropout(0.2)(flatten_layer)\n",
    "    outputs = tf.keras.layers.Dense(vocab_size, activation='softmax')(dropout_layer)\n",
    "\n",
    "    model = tf.keras.Model(inputs=inputs, outputs=outputs)\n",
    "    return model\n",
    "\n",
    "\n",
    "with open('tokenizer.pkl', 'rb') as file:\n",
    "    tokenizer = pickle.load(file)\n",
    "\n",
    "model = tf.keras.models.load_model('lightweight_text_gen_model_V2.h5', custom_objects={'TransformerBlock': TransformerBlock})\n",
    "\n",
    "def generate_text(seed_text, next_words, max_sequence_len):\n",
    "    for _ in range(next_words):\n",
    "        token_list = tokenizer.texts_to_sequences([seed_text])[0]\n",
    "        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')  # Adjust padding based on max_sequence_len\n",
    "        predicted = model.predict(token_list, verbose=0)\n",
    "        predicted_word_index = np.argmax(predicted, axis=-1)[0]\n",
    "        \n",
    "        # Append the predicted word to the seed text\n",
    "        for word, index in tokenizer.word_index.items():\n",
    "            if index == predicted_word_index:\n",
    "                seed_text += \" \" + word\n",
    "                break\n",
    "    return seed_text\n",
    "\n",
    "seed_text = \"Of course\"\n",
    "next_words = 30\n",
    "max_sequence_len = 50  \n",
    "\n",
    "generated_text = generate_text(seed_text, next_words, max_sequence_len)\n",
    "print(generated_text)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "128ebe99-83e0-4e35-824b-1f5518a2ff72",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"model_2\"\n",
      "_________________________________________________________________\n",
      " Layer (type)                Output Shape              Param #   \n",
      "=================================================================\n",
      " input_3 (InputLayer)        [(None, 49)]              0         \n",
      "                                                                 \n",
      " embedding_2 (Embedding)     (None, 49, 64)            1280000   \n",
      "                                                                 \n",
      " transformer_block_33 (Tran  (None, 49, 64)            50048     \n",
      " sformerBlock)                                                   \n",
      "                                                                 \n",
      " flatten_2 (Flatten)         (None, 3136)              0         \n",
      "                                                                 \n",
      " dropout_34 (Dropout)        (None, 3136)              0         \n",
      "                                                                 \n",
      " dense_34 (Dense)            (None, 20000)             62740000  \n",
      "                                                                 \n",
      "=================================================================\n",
      "Total params: 64070048 (244.41 MB)\n",
      "Trainable params: 64070048 (244.41 MB)\n",
      "Non-trainable params: 0 (0.00 Byte)\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e743b695-1bf2-4c57-95c2-f393d48141a0",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "53e2701d-53b6-46b4-9e0f-e4e7246e5fda",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}

Model File

Python
This is the file with the trained model
No preview (download only).

Tokenizer file

Python
This si the file that contains the tokenizer for the model
No preview (download only).

Credits

Max Sánchez
6 projects • 4 followers
I am a student that is passionate about technology, hardware and software. I like making video games and experimenting with new technology
Contact

Comments

Please log in or sign up to comment.