Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for UD_OLD_FRENCH added #1964

Merged
merged 5 commits into from
Nov 12, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions flair/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@
from .treebanks import UD_NORTH_SAMI
from .treebanks import UD_MALTESE
from .treebanks import UD_AFRIKAANS
from .treebanks import UD_OLD_FRENCH
from .treebanks import UD_GOTHIC

# Expose all text-text datasets
Expand Down
32 changes: 31 additions & 1 deletion flair/datasets/treebanks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1226,6 +1226,7 @@ def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):

super(UD_NORTH_SAMI, self).__init__(data_folder, in_memory=in_memory)


class UD_MALTESE(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):

Expand All @@ -1252,6 +1253,7 @@ def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):

super(UD_MALTESE, self).__init__(data_folder, in_memory=in_memory)


class UD_AFRIKAANS(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):

Expand All @@ -1277,8 +1279,9 @@ def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):
)

super(UD_AFRIKAANS, self).__init__(data_folder, in_memory=in_memory)


class UD_GOTHIC(UniversalDependenciesCorpus):
class UD_GOTHIC(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):

if type(base_path) == str:
Expand All @@ -1303,3 +1306,30 @@ def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):
)

super(UD_GOTHIC, self).__init__(data_folder, in_memory=in_memory)


class UD_OLD_FRENCH(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):

if type(base_path) == str:
base_path: Path = Path(base_path)

# this dataset name
dataset_name = self.__class__.__name__.lower()

# default dataset folder is the cache root
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name

# download data if necessary
web_path = "https://github.com/UniversalDependencies/UD_Old_French-SRCMF/tree/master"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does not download the correct file. Please check the downloaded file if it looks correct.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The urls are fixed with the new commits added.

cached_path(f"{web_path}/fro_srcmf-ud-dev.conllu", Path("datasets") / dataset_name)
cached_path(
f"{web_path}/fro_srcmf-ud-test.conllu", Path("datasets") / dataset_name
)
cached_path(
f"{web_path}/fro_srcmf-ud-train.conllu", Path("datasets") / dataset_name
)

super(UD_OLD_FRENCH, self).__init__(data_folder, in_memory=in_memory)