Learning Python
I'm starting to understand how awesome Python is for some specific tasks. This would be the place for me to learn to make stuff.
Extracting PDF to text
Instead of just sending the file to some LLMs, it's better to extract it first and let them read by text so it's lighter.
import pdfplumber
def extract_text_from_pdfs(pdf_files):
extracted_text = []
for pdf_file in pdf_files:
with pdfplumber.open(pdf_file) as pdf:
text = ''
for page in pdf.pages:
text += page.extract_text() + '\n'
extracted_text.append(text)
return extracted_text
Converting using FFMPEG
Download and add an environment variable
import subprocess
CurrentFileName = 'input/audio.m4a'
FinalFileName = 'output/audio.mp3'
try:
subprocess.call(['ffmpeg', '-i', f'{CurrentFileName}', f'{FinalFileName}'])
except Exception as e:
print(e)
print('Error While Converting Audio')
ch = input('Press Enter to Close')
Extracting audio to text using free Google Web Speech API
This can only be processing audio for 60 seconds using the free service
import speech_recognition as sr
# Initialize the recognizer
recognizer = sr.Recognizer()
# Path to your audio file
audio_file = "output/audio.wav"
# Convert audio to text
try:
# Load the audio file
with sr.AudioFile(audio_file) as source:
print("Processing audio...")
audio_data = recognizer.record(source, duration=60) # Record the entire audio file
# Recognize the speech
text = recognizer.recognize_google(audio_data, language="id-ID")
print("Text extracted from audio:")
print(text)
except sr.UnknownValueError:
print("Google Web Speech API could not understand the audio.")
except sr.RequestError as e:
print(f"Error with the request to Google API: {e}")
except Exception as e:
print(f"Unexpected error: {e}")
Using OpenAI
At some point, we'll need to use OpenAI, here's the way
import openai
openai.api_key = 'YOUR_OPENAI_API_KEY'
def summarize_text(text):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo", # Use GPT-3.5-turbo or newer available model
messages=[
{"role": "system", "content": "You are an AI assistant that summarizes texts."},
{"role": "user", "content": f"Please provide a summary for the following text:\n{text}"}
],
max_tokens=150,
temperature=0.7
)
return response['choices'][0]['message']['content'].strip()
except openai.error.OpenAIError as e:
return f"An error occurred: {str(e)}"
summarized_data = [summarize_text(text) for text in pdf_texts]
Sending Email Using SMTP
Sending email using SMTP, this is one option as we don't have another transactional email service to use.
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
def send_email(subject, body, to_email):
from_email = "your_email@example.com"
password = "your_password"
# Set up the MIME
message = MIMEMultipart()
message['From'] = from_email
message['To'] = to_email
message['Subject'] = subject
message.attach(MIMEText(body, 'plain'))
# Send the email
with smtplib.SMTP('smtp.gmail.com', 587) as server:
server.starttls()
server.login(from_email, password)
server.send_message(message)
email_body = "\n\n".join(summarized_data)
send_email("Summarized PDF Data", email_body, "recipient@example.com")
Make exe from .py
Make sure you have your icon ready
py -m venv venv
.\venv\Scripts\activate
pip install pyinstaller
pyinstaller --onefile --icon=icon-console.ico index.py
Adding requirements.txt
Making requirements.txt will add all the required packages, after adding the required packages to the file run this.
pip install -r requirements.txt
Changing Python version
I've had trouble on some package, need to change the version to support the package.
after installing run below to check if the python is installed
py -0
Forecasting
Time Series Models - Traditional statistical methods that explicitly use temporal data patterns such as trends, seasonality, and autoregressive behavior.
Ex: ARIMA (AutoRegressive Integrated Moving Average), SARIMA (Seasonal ARIMA), ETS (Exponential Smoothing State Space)
Machine Learning Models - Predictive models that learn patterns from historical data without explicitly modeling time dependencies.
Ex: Linear Regression, Random Forests, Gradient Boosting (XGBoost, LightGBM, CatBoost)
Deep Learning Models - Neural network-based methods that excel in capturing complex patterns in data, including long-term temporal dependencies.
Ex: LSTM (Long Short-Term Memory), GRU (Gated Recurrent Unit), Transformer-based models (e.g., Attention mechanisms)
Hybrid Models - Combines the strengths of traditional time series models with machine learning or deep learning models.
Ex: ARIMA combined with a neural network for residual forecasting.
SARIMAX for trend/seasonality and gradient boosting for residuals.
pip install statsmodels
# Select the columns to use
columns_to_use = ['date', 'machine_id', 'cycle_time', 'downtime', 'production_count']
data = data[columns_to_use]
# Convert 'date' to datetime format
data['date'] = pd.to_datetime(data['date'])
# Sort data by date and machine_id
data = data.sort_values(by=['date', 'machine_id'])
# Initialize a DataFrame to store all forecasts
all_forecasts = []
# Group the data by 'machine_id' and apply the ARIMA forecast for each machine
grouped = data.groupby('machine_id')
for machine_id, group in grouped:
# Sort the group by date
group = group.sort_values(by='date')
# Train a VAR model for multivariate forecasting
model = VAR(group[['production_count', 'cycle_time', 'downtime']])
model_fit = model.fit(maxlags=15)
forecast_steps = 30
forecast = model_fit.forecast(y=group[['production_count', 'cycle_time', 'downtime']].values, steps=forecast_steps)
# Create a new DataFrame for forecasted data
forecast_dates = pd.date_range(start=group['date'].max() + pd.Timedelta(days=1), periods=forecast_steps, freq='D')
forecast_df = pd.DataFrame(forecast, columns=['production_count', 'cycle_time', 'downtime'])
forecast_df['date'] = forecast_dates
forecast_df['machine_id'] = machine_id
# Append the forecast_df to the list
all_forecasts.append(forecast_df)
# Concatenate all forecasts into a single DataFrame
forecast_result = pd.concat(all_forecasts)
# Round values for better readability
forecast_result['cycle_time'] = forecast_result['cycle_time'].round(2)
forecast_result['downtime'] = forecast_result['downtime'].round(2)
forecast_result['production_count'] = forecast_result['production_count'].round(1)
# Rearrange columns to the correct order
forecast_result = forecast_result[['date', 'machine_id', 'cycle_time', 'downtime', 'production_count']]
# Sort the forecasted data by date and machine_id
forecast_result = forecast_result.sort_values(by=['date', 'machine_id'])
# Save the forecasted data to a new CSV file with the correct column order
forecast_result.to_csv('assets/result/forecasted_production_data_reordered.csv', index=False)
Setting up docker
# Use an official Python runtime as a parent image
FROM python:3.10-slim
# Set the working directory in the container
WORKDIR /app
# Copy the project files into the container
COPY . .
# Install dependencies from requirements.txt or using pip
RUN pip install --upgrade pip
RUN pip install -r requirements.txt
# Set the environment variables from the .env file
ENV PYTHONUNBUFFERED 1
# Run the Python script manually (modify as per your need)
CMD ["python", "manual.py"]
docker build -t takehome .
docker run -d --name takehome_container takehome
docker run -it takehome /bin/bash