Thanks to visit codestin.com
Credit goes to www.scribd.com

0% found this document useful (0 votes)
23 views4 pages

Print

The document contains a JavaScript function for extracting questions from a text input, specifically formatted for a PDF file. It also includes an asynchronous upload function that handles PDF file uploads to the Mathpix API, checks the processing status, and extracts questions from the processed data. The extracted questions are then returned as a JSON response, along with metadata about the processing status and time taken.

Uploaded by

Debasmit Bose
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
23 views4 pages

Print

The document contains a JavaScript function for extracting questions from a text input, specifically formatted for a PDF file. It also includes an asynchronous upload function that handles PDF file uploads to the Mathpix API, checks the processing status, and extracts questions from the processed data. The extracted questions are then returned as a JSON response, along with metadata about the processing status and time taken.

Uploaded by

Debasmit Bose
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

import axios from 'axios';

import FormData from 'form-data';


import * as fs from 'fs';
import path from 'path';

function extractQuestionsImproved(text) {
const questions = [];

// Split into sections and process each


const sections = text.split(/(?=\d+\.\s)/);

for (const section of sections) {


if (!section.trim()) continue;

const lines = section.split('\n').map(l => l.trim()).filter(l => l.length >


0);
if (lines.length === 0) continue;

// Find question number and text


const firstLine = lines[0];
const qMatch = firstLine.match(/^(\d+)\.\s*(.+)/);

if (!qMatch) continue;

let questionText = qMatch[2];


let currentLineIndex = 1;

// Continue collecting question text until we hit options


while (currentLineIndex < lines.length &&
!lines[currentLineIndex].match(/^\([A-D]\)/)) {
if (!lines[currentLineIndex].match(/^\d+\./) &&
!lines[currentLineIndex].includes('\\') &&
!lines[currentLineIndex].includes('স্তম্ভ')) {
questionText += ' ' + lines[currentLineIndex];
}
currentLineIndex++;
}

// Extract options
const options = [];
const remainingText = lines.slice(currentLineIndex).join(' ');

const optionMatches = [...remainingText.matchAll(/\([A-D]\)\s*([^(]+?)(?=\


s*\([A-D]\)|$)/g)];

if (optionMatches.length >= 4) {
for (let i = 0; i < 4; i++) {
options.push(optionMatches[i][1].trim());
}

// Clean question text


questionText = questionText
.replace(/\s+/g, ' ')
.replace(/হলে\s*নীচের\s*কোন্টি/g, 'হলে নীচের কোনটি')
.trim();

questions.push({
question: questionText,
diagram: null,
options: options
});
}
}

return questions;
}

const upload = async (req, res) => {


const pdf_file = req.file;

try {
const form = new FormData();
form.append('file', fs.readFileSync(pdf_file.path), pdf_file.path);
form.append('options_json', '{ "math_inline_delimiters": ["$", "$"],
"rm_spaces": true}');

// Upload PDF
const postResponse = await axios.post(
'https://api.mathpix.com/v3/pdf',
form,
{
headers: {
...form.getHeaders(),
'app_id': process.env.MATHPIX_API_ID,
'app_key': process.env.MATHPIX_API_KEY
}
}
);

const pdf_id = postResponse.data.pdf_id;


fs.rmSync(pdf_file.path); // Clean up uploaded file

console.log(`PDF uploaded successfully. PDF ID: ${pdf_id}`);

// Poll for completion


const maxAttempts = 5;
const pollInterval = 7000;
let attempts = 0;
let isComplete = false;

while (!isComplete && attempts < maxAttempts) {


attempts++;
console.log(`Checking processing status... Attempt ${attempts}`);

try {
const statusResponse = await axios.get(
`https://api.mathpix.com/v3/pdf/${pdf_id}`,
{
headers: {
'app_id': process.env.MATHPIX_API_ID,
'app_key': process.env.MATHPIX_API_KEY
}
}
);

const status = statusResponse.data.status;


console.log(`Status: ${status}`);
if (status === 'completed') {
isComplete = true;

// Get the processed data directly in memory


const resultResponse = await axios.get(
`https://api.mathpix.com/v3/pdf/${pdf_id}.mmd`,
{
headers: {
'app_id': process.env.MATHPIX_API_ID,
'app_key': process.env.MATHPIX_API_KEY
}
}
);

// Extract questions from the MMD content


const extractedQuestions =
extractQuestionsImproved(resultResponse.data);

console.log(`Extracted ${extractedQuestions.length}
questions`);

// Send JSON response with extracted questions


res.status(200).json({
success: true,
message: "PDF processed and questions extracted
successfully",
data: {
pdf_id: pdf_id,
total_questions: extractedQuestions.length,
questions: extractedQuestions,
processing_time: `${attempts * pollInterval / 1000}
seconds`
}
});

} else if (status === 'error' || status === 'failed') {


throw new Error(`PDF processing failed with status: $
{status}`);
} else {
// Still processing, wait before next poll
await new Promise(resolve => setTimeout(resolve,
pollInterval));
}

} catch (pollError) {
console.error(`Error checking status: ${pollError.message}`);
if (pollError.response?.status === 404) {
throw new Error('PDF not found - may have expired');
}
await new Promise(resolve => setTimeout(resolve, pollInterval));
}
}

if (!isComplete) {
throw new Error(`PDF processing timeout after ${maxAttempts *
pollInterval / 1000} seconds`);
}

} catch (error) {
console.error('Upload error:', error);
res.status(500).json({
success: false,
message: "Failed to process PDF",
error: error.message
});
}
};

export { upload };

You might also like