Hackster is hosting Hackster Holidays, Ep. 7: Livestream & Giveaway Drawing. Watch previous episodes or stream live on Friday!Stream Hackster Holidays, Ep. 7 on Friday!
Ryan ZhangAlice ZhangBernice ZhangHannah XuSophie
Published © MIT

Silencing the Noise Precisely: NPU AI for Cancer Screening

A solution to enhance cancer screening accuracy and efficiency by leveraging the power of AMD Ryzen AI PCs with NPUs.

AdvancedFull instructions provided7 hours294
Silencing the Noise Precisely: NPU AI for Cancer Screening

Things used in this project

Hardware components

Minisforum Venus UM790 Pro with AMD Ryzen™ 9
Minisforum Venus UM790 Pro with AMD Ryzen™ 9
×1
NVIDIA GPU 4060 Ti
×1
AMD Ryzen 9 7940HS
×1

Software apps and online services

Microsoft Visual Studio Code
Anaconda
AMD Ryzen AI
Ubuntu 22.04
Microsoft WSL

Story

Read more

Code

Full Code/Database:

Python
Full Code and Database
Was to big to upload database and full code to github. Here is google drive

Database: https://drive.google.com/drive/folders/1CvOVql5FJvsDLAAV8hRh9fmnhlhJqSUr?usp=sharing

Full code: https://drive.google.com/drive/folders/1kDyZbObYyhLT34NHT4waRNS_TFTaygmh?usp=sharing

train.py

Python
Training Code
from dataclasses import dataclass, field
from typing import Optional, Dict

import torch
import transformers
from datasets import load_from_disk
from transformers import DataCollatorForLanguageModeling, Trainer, TrainingArguments, TrainerCallback
import os

class MemoryUsageCallback(TrainerCallback):
    """A callback to log memory usage at each training step and epoch."""

    def on_step_end(self, args, state, control, **kwargs):
        if torch.cuda.is_available():
            torch.cuda.synchronize()
            allocated = torch.cuda.memory_allocated(0) / (1024 ** 3)
            cached = torch.cuda.memory_reserved(0) / (1024 ** 3)
            print(f"Step {state.global_step}: CUDA Memory Allocated: {allocated:.2f} GB, Cached: {cached:.2f} GB")

    def on_epoch_end(self, args, state, control, **kwargs):
        if torch.cuda.is_available():
            torch.cuda.synchronize()
            allocated = torch.cuda.memory_allocated(0) / (1024 ** 3)
            cached = torch.cuda.memory_reserved(0) / (1024 ** 3)
            print(f"Epoch {state.epoch}: CUDA Memory Allocated: {allocated:.2f} GB, Cached: {cached:.2f} GB")

@dataclass
class ModelArguments:
    model_name_or_path: Optional[str] = field(default="facebook/opt-125m")
    tokenizer_name_or_path: Optional[str] = field(default="facebook/opt-125m")

@dataclass
class DataArguments:
    data_path: str = field(
        default=None, metadata={"help": "Path to the training data."}
    )

def make_data_module(tokenizer: transformers.PreTrainedTokenizer, data_args) -> Dict:
    tokenized_datasets = load_from_disk(data_args.data_path)
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer, mlm=False, mlm_probability=0.0
    )
    return dict(
        train_dataset=tokenized_datasets["train"],
        eval_dataset=None,
        data_collator=data_collator,
    )

def main():
    parser = transformers.HfArgumentParser((ModelArguments, DataArguments, TrainingArguments))
    model_args, data_args, training_args = parser.parse_args_into_dataclasses()

    print(model_args, data_args, training_args)
    model = transformers.AutoModelForCausalLM.from_pretrained(model_args.model_name_or_path)
    tokenizer = transformers.AutoTokenizer.from_pretrained(model_args.tokenizer_name_or_path)

    data_module = make_data_module(tokenizer=tokenizer, data_args=data_args)
    trainer = Trainer(
        model=model,
        tokenizer=tokenizer,
        args=training_args,
        **data_module,
        callbacks=[MemoryUsageCallback()]
    )
    trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
    trainer.save_state()
    trainer.save_model(output_dir=training_args.output_dir)

if __name__ == "__main__":
    main()

train_finaledb.sh

SH
Shell Script Run Train.py
#!/bin/bash

SHELL_SCRIPT=$(readlink -f "$0")
RUN_PATH=$(dirname "$SHELL_SCRIPT")
SRC_PATH=""

echo "RUN_PATH: ${RUN_PATH}"
echo "SRC_PATH: ${SRC_PATH}"

torchrun --nproc_per_node=1 ${SRC_PATH}/train.py \
  --model_name_or_path "facebook/opt-125m" \
  --tokenizer_name_or_path ${SRC_PATH}/opt-seq-pubmed-tokenizer \
  --data_path ${RUN_PATH}/data_Cristiano/tokenized_data \
  --output_dir ${RUN_PATH}/train_output_Cristiano \
  --num_train_epochs 30 \
  --per_device_train_batch_size 7 \
  --per_device_eval_batch_size 7 \
  --gradient_accumulation_steps 7 \
  --evaluation_strategy "no" \
  --save_strategy "steps" \
  --save_steps 1000 \
  --save_total_limit 1 \
  --learning_rate 1e-4 \
  --weight_decay 0.01 \
  --warmup_ratio 0.03 \
  --lr_scheduler_type "cosine" \
  --logging_steps 32 \
  --full_determinism \
  --fp16 False \
  --dataloader_num_workers 8 \
  --fsdp "full_shard auto_wrap" \
  --fsdp_config ${SRC_PATH}/fsdp_config_opt.json

Credits

Ryan Zhang
1 project • 0 followers
Alice Zhang
0 projects • 1 follower
Bernice Zhang
0 projects • 1 follower
Hannah Xu
0 projects • 1 follower
Sophie
0 projects • 1 follower

Comments