Source code for wildnlp.datasets.squad

import copy
import json

from .base import Dataset, file_exists_check


[docs]class SQuAD(Dataset): """The SQuAD dataset. For details see: https://rajpurkar.github.io/SQuAD-explorer/ """ def __init__(self): self._data = None
[docs] @file_exists_check def load(self, path): """Loads a SQuAD dataset. :param path: A path to a SQuAD data file in JSONL format. :return: None """ with open(path, 'r') as f: self._data = json.load(f)
[docs] def apply(self, aspect): """Modifies questions in the dataset leaving other data intact. """ modified = copy.deepcopy(self._data) for entry in modified['data']: for paragraph in entry['paragraphs']: for qa in paragraph['qas']: modified_sentence = aspect(qa['question']) qa['question'] = modified_sentence return modified
[docs] def save(self, data, path): """Saves data in the SQuAD format """ with open(path, 'w') as f: json.dump(data, f)