import tiktoken from openai import OpenAI model = "gpt-4o" system_prompt = "Your translate parts of an article from Russian to English. It contains markdown; leave the markup, links and other formatting intact, translating the actual text." input_token_limit = 2000 def translate(text): buf = [] bufsize = 0 cl = OpenAI() translation = [] paragraphs = text.split("\n\n") for i, paragraph in enumerate(paragraphs): buf.append(paragraph) bufsize += num_tokens_from_string(paragraph) if bufsize >= input_token_limit or i == len(paragraphs)-1: chat_completion = cl.chat.completions.create( messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": '\n\n'.join(buf)}, ], model=model, ) translation.append(chat_completion.choices[0].message.content) bufsize = 0 buf = [] return "\n\n".join(translation) def num_tokens_from_string(string, encoding_name="o200k_base"): encoding = tiktoken.get_encoding(encoding_name) num_tokens = len(encoding.encode(string)) return num_tokens