Skip to content

Commit

Permalink
Add encoding='utf-8' to all non-binary open calls
Browse files Browse the repository at this point in the history
  • Loading branch information
henchaves committed Dec 11, 2024
1 parent 70a1978 commit fc827bf
Show file tree
Hide file tree
Showing 15 changed files with 37 additions and 36 deletions.
2 changes: 1 addition & 1 deletion giskard/core/model_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def validate_model_loading_and_saving(model: BaseModel):
with tempfile.TemporaryDirectory(prefix="giskard-model-") as f:
model.save(f)

with open(f + "/giskard-model-meta.yaml") as yaml_f:
with open(f + "/giskard-model-meta.yaml", encoding="utf-8") as yaml_f:
saved_meta = yaml.load(yaml_f, Loader=yaml.Loader)

meta = ModelMeta(
Expand Down
10 changes: 4 additions & 6 deletions giskard/core/savable.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,11 @@ def dependencies(self) -> Set["Artifact"]:
return set()

@abstractmethod
def _save_locally(self, local_dit: Path):
...
def _save_locally(self, local_dit: Path): ...

@classmethod
@abstractmethod
def load(cls, local_dir: Path, uuid: str, meta: SMT) -> "Artifact":
...
def load(cls, local_dir: Path, uuid: str, meta: SMT) -> "Artifact": ...

@classmethod
def _get_meta_class(cls) -> type(SMT):
Expand All @@ -54,7 +52,7 @@ def _get_meta_endpoint(cls, uuid: str, project_key: str) -> str:
return posixpath.join("project", project_key, cls._get_name(), uuid)

def _save_meta_locally(self, local_dir):
with open(Path(local_dir) / "meta.yaml", "w") as f:
with open(Path(local_dir) / "meta.yaml", "w", encoding="utf-8") as f:
yaml.dump(self.meta, f)


Expand All @@ -70,7 +68,7 @@ def _load_meta_locally(cls, local_dir, uuid: str) -> Optional[SMT]:
if meta is not None:
return meta

with open(local_dir / "meta.yaml", "r") as f:
with open(local_dir / "meta.yaml", "r", encoding="utf-8") as f:
return yaml.load(f, Loader=yaml.Loader)

@classmethod
Expand Down
6 changes: 3 additions & 3 deletions giskard/core/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def to_json(self, filename=None):
"metric_value": suite_result.result.metric,
}
if filename is not None:
with open(filename, "w") as json_file:
with open(filename, "w", encoding="utf-8") as json_file:
json.dump(results, json_file, indent=4, ensure_ascii=False)
else:
return json.dumps(results, indent=4)
Expand Down Expand Up @@ -628,7 +628,7 @@ def save(self, folder: str):

json_content = self._to_json(folder_path, saved_uuid_status)

with open(folder_path / "suite.json", "w") as f:
with open(folder_path / "suite.json", "w", encoding="utf-8") as f:
json.dump(json_content, f, ensure_ascii=False)

analytics.track("lib:test_suite:saved")
Expand Down Expand Up @@ -843,7 +843,7 @@ def _contains_test(self, test: TestFunctionMeta):
def load(cls, folder: str) -> "Suite":
folder_path = Path(folder)

with open(folder_path / "suite.json", "r") as f:
with open(folder_path / "suite.json", "r", encoding="utf-8") as f:
suite_json = json.load(f)

suite = Suite(name=suite_json.get("name", "Unnamed test suite"))
Expand Down
4 changes: 2 additions & 2 deletions giskard/datasets/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ def cast_column_to_dtypes(df, column_dtypes):
@classmethod
def load(cls, local_path: str):
# load metadata
with open(Path(local_path) / "giskard-dataset-meta.yaml", "r") as meta_f:
with open(Path(local_path) / "giskard-dataset-meta.yaml", "r", encoding="utf-8") as meta_f:
meta = yaml.safe_load(meta_f)

# load data
Expand Down Expand Up @@ -560,7 +560,7 @@ def save(self, local_path: str):
f.write(compressed_bytes)
original_size_bytes, compressed_size_bytes = len(uncompressed_bytes), len(compressed_bytes)

with open(Path(local_path) / "giskard-dataset-meta.yaml", "w") as meta_f:
with open(Path(local_path) / "giskard-dataset-meta.yaml", "w", encoding="utf-8") as meta_f:
yaml.dump(
{
"id": str(self.id),
Expand Down
4 changes: 2 additions & 2 deletions giskard/models/base/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def save_model_postprocessing_function(self, local_path: Union[str, Path], *_arg
cloudpickle.dump(self.model_postprocessing_function, f, protocol=pickle.DEFAULT_PROTOCOL)

def save_wrapper_meta(self, local_path, *_args, **_kwargs):
with open(Path(local_path) / "giskard-model-wrapper-meta.yaml", "w") as f:
with open(Path(local_path) / "giskard-model-wrapper-meta.yaml", "w", encoding="utf-8") as f:
yaml.dump(
{
"batch_size": self.batch_size,
Expand Down Expand Up @@ -313,7 +313,7 @@ def load_model_postprocessing_function(cls, local_path: Union[str, Path], *_args
def load_wrapper_meta(cls, local_dir, *args, **kwargs):
wrapper_meta_file = Path(local_dir) / "giskard-model-wrapper-meta.yaml"
if wrapper_meta_file.exists():
with open(wrapper_meta_file) as f:
with open(wrapper_meta_file, encoding="utf-8") as f:
wrapper_meta = yaml.load(f, Loader=yaml.Loader)
wrapper_meta["batch_size"] = int(wrapper_meta["batch_size"]) if wrapper_meta["batch_size"] else None
return wrapper_meta
Expand Down
5 changes: 3 additions & 2 deletions giskard/models/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class explicitly using :class:`giskard.models.huggingface.HuggingFaceModel`.
the `model_postprocessing_function` argument. This function should take the
raw output of your model and return a numpy array of probabilities.
"""

from typing import Any, Callable, Iterable, Optional, Tuple, Union

import logging
Expand Down Expand Up @@ -199,7 +200,7 @@ def __init__(
def load_model(cls, local_path, model_py_ver: Optional[Tuple[str, str, str]] = None, *args, **kwargs):
huggingface_meta_file = Path(local_path) / "giskard-model-huggingface-meta.yaml"
if huggingface_meta_file.exists():
with open(huggingface_meta_file) as f:
with open(huggingface_meta_file, encoding="utf-8") as f:
huggingface_meta = yaml.load(f, Loader=yaml.Loader)

if huggingface_meta["pipeline_task"]:
Expand All @@ -208,7 +209,7 @@ def load_model(cls, local_path, model_py_ver: Optional[Tuple[str, str, str]] = N
return huggingface_meta["huggingface_module"].from_pretrained(local_path)

def save_huggingface_meta(self, local_path, *args, **kwargs):
with open(Path(local_path) / "giskard-model-huggingface-meta.yaml", "w") as f:
with open(Path(local_path) / "giskard-model-huggingface-meta.yaml", "w", encoding="utf-8") as f:
yaml.dump(
{
"huggingface_module": self.huggingface_module,
Expand Down
4 changes: 2 additions & 2 deletions giskard/models/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def _convert_to_numpy(self, raw_predictions):
return super()._convert_to_numpy(raw_predictions)

def save_pytorch_meta(self, local_path, *_args, **_kwargs):
with open(Path(local_path) / "giskard-model-pytorch-meta.yaml", "w") as f:
with open(Path(local_path) / "giskard-model-pytorch-meta.yaml", "w", encoding="utf-8") as f:
yaml.dump(
{
"device": self.device,
Expand All @@ -224,7 +224,7 @@ def load(cls, local_dir, model_py_ver: Optional[Tuple[str, str, str]] = None, *a
def load_pytorch_meta(cls, local_dir):
pytorch_meta_file = Path(local_dir) / "giskard-model-pytorch-meta.yaml"
if pytorch_meta_file.exists():
with open(pytorch_meta_file) as f:
with open(pytorch_meta_file, encoding="utf-8") as f:
pytorch_meta = yaml.load(f, Loader=yaml.Loader)
pytorch_meta["device"] = pytorch_meta.get("device")
pytorch_meta["torch_dtype"] = pytorch_meta.get("torch_dtype")
Expand Down
8 changes: 4 additions & 4 deletions giskard/rag/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,11 +193,11 @@ def load(
The embedding model to use inside the knowledge base. If not provided, the default model will be used.
"""
path = Path(folder_path)
knowledge_base_meta = json.load(open(path / "knowledge_base_meta.json", "r"))
knowledge_base_meta = json.load(open(path / "knowledge_base_meta.json", "r", encoding="utf-8"))
knowledge_base_data = pd.read_json(path / "knowledge_base.jsonl", orient="records", lines=True)
testset = QATestset.load(path / "testset.jsonl")

answers = json.load(open(path / "agent_answer.json", "r"))
answers = json.load(open(path / "agent_answer.json", "r", encoding="utf-8"))
model_outputs = [AgentAnswer(**answer) for answer in answers]

topics = {int(k): topic for k, topic in knowledge_base_meta.pop("topics", None).items()}
Expand All @@ -219,9 +219,9 @@ def load(

metrics_results = {}
if (path / "metrics_results.json").exists():
metrics_results = json.load(open(path / "metrics_results.json", "r"))
metrics_results = json.load(open(path / "metrics_results.json", "r", encoding="utf-8"))

report_details = json.load(open(path / "report_details.json", "r"))
report_details = json.load(open(path / "report_details.json", "r", encoding="utf-8"))
testset._dataframe.index = testset._dataframe.index.astype(str)

report = cls(testset, model_outputs, metrics_results, knowledge_base)
Expand Down
2 changes: 1 addition & 1 deletion giskard/registry/giskard_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def _load_meta_locally(cls, local_dir, uuid: str) -> Optional[TestFunctionMeta]:
if meta is not None:
return meta

with open(local_dir / "meta.yaml", "r") as f:
with open(local_dir / "meta.yaml", "r", encoding="utf-8") as f:
return yaml.load(f, Loader=yaml.Loader)

@classmethod
Expand Down
10 changes: 5 additions & 5 deletions giskard/scanner/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def to_json(self, filename=None):
results[issue.detector_name][issue.level] = []
results[issue.detector_name][issue.level].append(issue.description)
if filename is not None:
with open(filename, "w") as json_file:
with open(filename, "w", encoding="utf-8") as json_file:
json.dump(results, json_file, indent=4, ensure_ascii=False)
else:
return json.dumps(results, indent=4)
Expand All @@ -115,7 +115,7 @@ def to_html(self, filename=None, embed=False):
html = widget.render_html(embed=embed)

if filename is not None:
with open(filename, "w") as f:
with open(filename, "w", encoding="utf-8") as f:
f.write(html)
return

Expand All @@ -139,7 +139,7 @@ def to_markdown(self, filename=None, template="summary"):
markdown = widget.render_markdown(template=template)

if filename is not None:
with open(filename, "w") as f:
with open(filename, "w", encoding="utf-8") as f:
f.write(markdown)
return

Expand Down Expand Up @@ -349,7 +349,7 @@ def to_avid(self, filename=None):
]

if filename is not None:
with open(filename, "w") as f, warnings.catch_warnings():
with open(filename, "w", encoding="utf-8") as f, warnings.catch_warnings():
warnings.filterwarnings("ignore", category=DeprecationWarning) # we need to support both pydantic 1 & 2
f.writelines(r.json() + "\n" for r in reports)
return
Expand All @@ -373,7 +373,7 @@ def generate_rails(self, filename=None, colang_version="1.0"):
_rails = generate_rails_from_scan_report(self, colang_version=colang_version)

if filename:
with open(filename, "a") as f:
with open(filename, "a", encoding="utf-8") as f:
f.write(_rails)
return

Expand Down
2 changes: 1 addition & 1 deletion giskard/scanner/robustness/text_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ class TextNationalityTransformation(TextLanguageBasedTransformation):
name = "Switch countries from high- to low-income and vice versa"

def _load_dictionaries(self):
with Path(__file__).parent.joinpath("nationalities.json").open("r") as f:
with Path(__file__).parent.joinpath("nationalities.json").open("r", encoding="utf-8") as f:
nationalities_dict = json.load(f)
self._lang_dictionary = {"en": nationalities_dict["en"], "fr": nationalities_dict["fr"]}

Expand Down
8 changes: 5 additions & 3 deletions giskard/visualization/widget.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,11 @@ def render_html(self, template="full", embed=False) -> str:
escaped = escape(html)
uid = id(self)

with Path(__file__).parent.joinpath("templates", "scan_report", "html", "static", "external.js").open(
"r"
) as f:
with (
Path(__file__)
.parent.joinpath("templates", "scan_report", "html", "static", "external.js")
.open("r", encoding="utf-8") as f
):
js_lib = f.read()

html = f"""<iframe id="scan-{uid}" srcdoc="{escaped}" style="width: 100%; border: none;" class="gsk-scan"></iframe>
Expand Down
4 changes: 2 additions & 2 deletions tests/fixtures/enron_multilabel_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@

# get_labels returns a dictionary representation of these labels.
def get_labels(filename):
with open(filename + ".cats") as f:
with open(filename + ".cats", encoding="utf-8") as f:
labels = defaultdict(dict)
line = f.readline()
while line:
Expand Down Expand Up @@ -99,7 +99,7 @@ def enron_raw_data_full() -> pd.DataFrame:

# Features are metadata from the email object
filename = email_file + ".txt"
with open(filename) as f:
with open(filename, encoding="utf-8") as f:
message = email.message_from_string(f.read())

values_to_add["Subject"] = str(message["Subject"])
Expand Down
2 changes: 1 addition & 1 deletion tests/integrations/test_avid.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def test_scan_report_can_be_exported_to_avid():
dest_path = Path(tmpdir).joinpath("test_report.avid")
report.to_avid(dest_path)

with dest_path.open("r") as f:
with dest_path.open("r", encoding="utf-8") as f:
avid_reports_read = [json.loads(line) for line in f.readlines()]

assert len(avid_reports_read) == len(avid_reports)
Expand Down
2 changes: 1 addition & 1 deletion tests/registry/module_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class PythonModule:

def _write_file(dir: Path, file: Union[str, Path], content: str):
os.makedirs(os.path.dirname(dir / file), exist_ok=True)
with open(dir / file, "w") as f:
with open(dir / file, "w", encoding="utf-8") as f:
f.write(content)


Expand Down

0 comments on commit fc827bf

Please sign in to comment.