From f0af559ede50b5afdac69c981f88637080f3d89c Mon Sep 17 00:00:00 2001 From: Zifan Jiang Date: Wed, 8 Jan 2025 14:10:07 +0000 Subject: [PATCH 1/2] bobsl islr dataset --- sign_language_datasets/datasets/__init__.py | 1 + .../datasets/bobsl_islr/__init__.py | 3 + .../datasets/bobsl_islr/bobsl_islr.py | 252 ++++++++++++++++++ .../TODO-add_fake_data_in_this_directory.txt | 0 .../datasets/bobsl_islr/holistic.poseheader | Bin 0 -> 14805 bytes 5 files changed, 256 insertions(+) create mode 100644 sign_language_datasets/datasets/bobsl_islr/__init__.py create mode 100644 sign_language_datasets/datasets/bobsl_islr/bobsl_islr.py create mode 100644 sign_language_datasets/datasets/bobsl_islr/dummy_data/TODO-add_fake_data_in_this_directory.txt create mode 100644 sign_language_datasets/datasets/bobsl_islr/holistic.poseheader diff --git a/sign_language_datasets/datasets/__init__.py b/sign_language_datasets/datasets/__init__.py index a177e0b..edfe5f2 100644 --- a/sign_language_datasets/datasets/__init__.py +++ b/sign_language_datasets/datasets/__init__.py @@ -1,6 +1,7 @@ from .aslg_pc12 import AslgPc12 from .asl_lex import AslLex from .autsl import AUTSL +from .bobsl_islr import BobslIslr from .chicago_fs_wild import ChicagoFSWild from .config import SignDatasetConfig from .dgs_types import DgsTypes diff --git a/sign_language_datasets/datasets/bobsl_islr/__init__.py b/sign_language_datasets/datasets/bobsl_islr/__init__.py new file mode 100644 index 0000000..4e73f81 --- /dev/null +++ b/sign_language_datasets/datasets/bobsl_islr/__init__.py @@ -0,0 +1,3 @@ +"""Popsign dataset.""" + +from .bobsl_islr import BobslIslr diff --git a/sign_language_datasets/datasets/bobsl_islr/bobsl_islr.py b/sign_language_datasets/datasets/bobsl_islr/bobsl_islr.py new file mode 100644 index 0000000..c8dda16 --- /dev/null +++ b/sign_language_datasets/datasets/bobsl_islr/bobsl_islr.py @@ -0,0 +1,252 @@ +"""BOBSL is a large-scale dataset of British Sign Language (BSL).""" + +import csv +import json +import pickle +import os +from os import path +from tqdm import tqdm +from typing import Union +from collections import defaultdict + +import numpy as np + +import tensorflow as tf +import tensorflow_datasets as tfds + +from pose_format import Pose + +from sign_language_datasets.utils.features import PoseFeature + +from ..warning import dataset_warning +from ...datasets.config import SignDatasetConfig, cloud_bucket_file + +_DESCRIPTION = """ +~5M noisy isolated signs collected from sign spottings on BOBSL +""" + +_CITATION = """ +@inproceedings{momeni2022automatic, + title={Automatic dense annotation of large-vocabulary sign language videos}, + author={Momeni, Liliane and Bull, Hannah and Prajwal, KR and Albanie, Samuel and Varol, G{\"u}l and Zisserman, Andrew}, + booktitle={European Conference on Computer Vision}, + pages={671--690}, + year={2022} +} + +@Article{Albanie2021bobsl, + author = "Samuel Albanie and G{\"u}l Varol and Liliane Momeni and Hannah Bull and Triantafyllos Afouras and Himel Chowdhury and Neil Fox and Bencie Woll and Rob Cooper and Andrew McParland and Andrew Zisserman", + title = "{BOBSL}: {BBC}-{O}xford {B}ritish {S}ign {L}anguage {D}ataset", + howpublished = "https://www.robots.ox.ac.uk/~vgg/data/bobsl", + year = "2021", + journal = "arXiv" +} +""" + +_DOWNLOAD_URL = 'https://www.robots.ox.ac.uk/~vgg/data/bobsl/' + +_POSE_HEADERS = {"holistic": path.join(path.dirname(path.realpath(__file__)), "holistic.poseheader")} + + +class BobslIslr(tfds.core.GeneratorBasedBuilder): + """DatasetBuilder for Popsign dataset.""" + + VERSION = tfds.core.Version("1.4.0") + RELEASE_NOTES = {"1.4.0": "v1.4"} + + BUILDER_CONFIGS = [SignDatasetConfig(name="default", include_pose="holistic")] + + def _info(self) -> tfds.core.DatasetInfo: + """Returns the dataset metadata.""" + + features = { + "id": tfds.features.Text(), + "text": tfds.features.Text(), + } + + # TODO: add videos + + if self._builder_config.include_pose == "holistic": + pose_header_path = _POSE_HEADERS[self._builder_config.include_pose] + stride = 1 if self._builder_config.fps is None else 25 / self._builder_config.fps + features["pose"] = PoseFeature(shape=(None, 1, 576, 3), header_path=pose_header_path, stride=stride) + + return tfds.core.DatasetInfo( + builder=self, + description=_DESCRIPTION, + features=tfds.features.FeaturesDict(features), + homepage=_DOWNLOAD_URL, + supervised_keys=None, + citation=_CITATION, + ) + + def _split_generators(self, dl_manager: tfds.download.DownloadManager): + """Returns SplitGenerators.""" + dataset_warning(self) + + # too expensive to host the poses at the moment, need to specify a local path + # poses_dir = str(dl_manager.download_and_extract(_POSE_URLS["holistic"])) + poses_dir = self._builder_config.extra["poses_dir"] + poses_dir_test = self._builder_config.extra["poses_dir_test"] + + print(f'Generating train and valid set ...') + # Copy aggregated annotations from the vgg_islr repo + # Originated from K R Prajwal + # bobsl_unfiltered_mouthings_praj_hannah_sent_plus_dict_ha_align_syn_plus_attention_plus_i3d_top1_pseudo_labels_clean_8697.pkl + + type2offsets = { + "prajwal_mouthing" : (-9, 11), "dict" : (-3, 22), "attention" : (-8, 18), + "i3d_pseudo_label" : (0, 19), "mouthing" : (-15, 4), + "swin_pseudo_label" : (5, 25), "other" : (-8, 8), "cos_sim" : (0, 19) + } + type2prob_thresh = { + 0 : # train + {"prajwal_mouthing" : 0.8, "dict" : 0.8, "attention" : 0., "i3d_pseudo_label" : 0.5, + "swin_pseudo_label" : 0.3, "cos_sim" : 2., "other" : 0.}, + + 1 : {"prajwal_mouthing" : 0.8, "dict" : 0.9, "attention" : 0., "i3d_pseudo_label" : 0.5, + "swin_pseudo_label" : 0.3, "cos_sim" : 2., "other" : 0.}, + + 3 : {"prajwal_mouthing" : 0.8, "dict" : 0.8, "mouthing" : 0.8, "other" : 0.}, + } + TRAIN_SPLIT_NUM = 0 + VAL_SPLIT_NUM = 1 + vocab_file = "/work/sign-language/haran/bobsl/vocab/8697_vocab.pkl" + spotting_file = "/work/sign-language/youngjoon/islr/anno.pkl" + fps = 25 + + print('Load vocab ...') + with open(vocab_file, 'rb') as f: + vocab = pickle.load(f)["words_to_id"] + id2word = {id : word for word, id in vocab.items()} + print('Vocab size:', len(vocab)) + print('Load spotting annotations ...') + with open(spotting_file, 'rb') as f: + data = pickle.load(f) + data = data["videos"] + + examples = { + TRAIN_SPLIT_NUM: defaultdict(list), + VAL_SPLIT_NUM: defaultdict(list), + } + for split_idx in [TRAIN_SPLIT_NUM,VAL_SPLIT_NUM]: + count = 0 + + print('Load examples from anno.pkl ...') + for i in tqdm(range(len(data["name"]))): + if data["split"][i] == split_idx: + if data["word"][i] in ['_fingerspelling', '_nosigning', '_pointing', '_lexical_signing']: + continue + + if data["word"][i] in vocab: + if data["mouthing_prob"][i] >= type2prob_thresh[split_idx][data["anno_type"][i]]: + pose_filename = data["name"][i].replace('.mp4', '.pose') + pose_path = path.join(poses_dir, pose_filename) + + if os.path.exists(pose_path): + time = int(data["mouthing_time"][i] * fps) + start_offset, end_offset = type2offsets[data["anno_type"][i]] + s, e = max(0, time + start_offset), time + end_offset + + w_l = data["word"][i] + w_l = w_l if isinstance(w_l, tuple) else [w_l] + + for w in w_l: + examples[split_idx][pose_filename].append({ + 'idx': f"{w}-{pose_filename.replace('.pose', '')}-{i}", + 'text': w, + 'start_frame': s, + 'end_frame': e, + }) + + # count = count + 1 + # if count >= 20: + # break + else: + print(f'{pose_path} does not exist, skipping ...') + + print(f'Generating test set ...') + # Read the 25K ISOLATED SIGNS annotation files from BOBSL website + + BOBSL_PATH = '/athenahomes/zifan/BOBSL/v1.4' + annotations = { + 'test': { + 'dict': { + 'spottings_path': f"{BOBSL_PATH}/manual_annotations/isolated_signs/verified_dict_spottings.json", + 'range': [-3, 22], + }, + 'mouthing': { + 'spottings_path': f"{BOBSL_PATH}/manual_annotations/isolated_signs/verified_mouthing_spottings.json", + 'range': [-15, 4], + }, + }, + } + + test_examples = [] + for annotation_source, annotation in annotations['test'].items(): + print(f'Loading {annotation_source} ...') + + file_path = annotation['spottings_path'] + if file_path.endswith('.json'): + with open(file_path, "r") as file: + data = json.load(file) + data = data['test'] + + annotation['total_num'] = sum([len(d['names']) for d in data.values()]) + annotation['vocab'] = len(data) + + for gloss, value in tqdm(list(data.items())): + for i, name in enumerate(value['names']): + global_time = value['global_times'][i] + filename = f"{name}-{str(global_time).replace('.', '_')}" + idx = f"{gloss}-{filename}" + pose_path = f"{annotation_source}/{gloss}/{filename}.pose" + + test_examples.append({ + 'idx': idx, + 'pose_path': pose_path, + 'text': gloss, + }) + + print('Train:', sum([len(d) for d in examples[TRAIN_SPLIT_NUM].values()])) + print('Valid:', sum([len(d) for d in examples[VAL_SPLIT_NUM].values()])) + print('Test:', len(test_examples)) + + return [ + tfds.core.SplitGenerator(name=tfds.Split.TRAIN, gen_kwargs={"poses_dir": poses_dir, "examples": examples[TRAIN_SPLIT_NUM]}), + tfds.core.SplitGenerator(name=tfds.Split.VALIDATION, gen_kwargs={"poses_dir": poses_dir, "examples": examples[VAL_SPLIT_NUM]}), + tfds.core.SplitGenerator(name=tfds.Split.TEST, gen_kwargs={"poses_dir": poses_dir_test, "examples": test_examples}), + ] + + def _generate_examples(self, poses_dir: str, examples: Union[list, dict]): + """Yields examples.""" + + if isinstance(examples, list): + for example in examples: + datum = {"id": example["idx"], "text": example["text"]} + mediapipe_path = path.join(poses_dir, example['pose_path']) + + with open(mediapipe_path, "rb") as f: + try: + pose = Pose.read(f.read()) + datum["pose"] = pose + + yield datum["id"], datum + except Exception as e: + print(e) + elif isinstance(examples, dict): + for pose_path, episode_examples in examples.items(): + mediapipe_path = path.join(poses_dir, pose_path) + + with open(mediapipe_path, "rb") as f: + buffer = f.read() + + for episode_example in episode_examples: + try: + datum = {"id": episode_example["idx"], "text": episode_example["text"]} + datum["pose"] = Pose.read(buffer, start_frame=episode_example["start_frame"], end_frame=episode_example["end_frame"]) + + yield datum["id"], datum + except Exception as e: + print(e) + diff --git a/sign_language_datasets/datasets/bobsl_islr/dummy_data/TODO-add_fake_data_in_this_directory.txt b/sign_language_datasets/datasets/bobsl_islr/dummy_data/TODO-add_fake_data_in_this_directory.txt new file mode 100644 index 0000000..e69de29 diff --git a/sign_language_datasets/datasets/bobsl_islr/holistic.poseheader b/sign_language_datasets/datasets/bobsl_islr/holistic.poseheader new file mode 100644 index 0000000000000000000000000000000000000000..ae663d67dd39b9a7ca7ee8fb863c72d19a6c6b6f GIT binary patch literal 14805 zcmeI3XLwar*2mX^5C|m!Lhrq|oO6XoXKnkSLOQ=SAFH)qabWmDoN|P?VcS4aC zI+no-DuW|B)^SAC_qQUDZ}rv+`0=R01TP_ktxhxB-pKmn@f$Jm&7(U$ zc_ZtO$8RLJN{MUwtEGt!kzW}pyw@VC*+(v}kh;SPbZX?2NM7WKJun`e9BEm*Q*oX)l5n&@DY(#{O zh_De6Ho|8kd^W;oBYZZ(XCr(z!e=9VHo|8kd^RG|Mnu|(NE;DpBO+}?q>YHQ5s@|` z(ndtu2)~W++X%mn@Y@K#jquwDzm4$Q2)~Um%pJqrG0Yvq+%e1@!`v~<9mCu)%pJqr zG0Yvq+%e1@!`v~<9mCu)%pJqrG0Yvq+%e1@!`v~<9mCu)%pJqrG0Yvq+%e1@!`v~< z9mCu)%pJqrG0Yvq+%e1@!`v~<9mCu)%pJqrG0Yvq+%e1@!`v~<9mCu)%pJqrG0Yvq z+%e1@!`v~<9mCu)%pJqrG0Yvq+%e1@!`v~<9mCu)%pJqrG0Yvq+%e1@!`v~<9mCu) z%pJqrG0Yvq+%e1@!`v~<9mCu)%pJqrG0Yvq+%e1@!`v~<9mCu)%pJqrG0Yvq+%e1@ z!`v~<9mCu)%pJqrG0Yvq+%e2u!`wB@UBlcp%w5CWHOyVZ+%?Qy!`wB@UBlcp%w5CW zHOyVZ+%?Qy!`wB@UBlcp%w5CWHOyVZ+%?Qy!`wB@UBlcp%w5CWHOyVZ+%?Qy!`wB@ zUBlcp%w5CWHOyVZ+%?Qy!`wB@UBlcp%w5CWHOyVZ+%?Qy!`wB@UBlcp%w5CWHOyVZ z+%?Qy!`wB@UBlcp%w5CWHOyVZ+%?Qy!`wB@UBlcp%w5CWHOyVZ+%?Qy!`wB@UBlcp z%w5CWHOyVZ+%?Qy!`wB@UBlcp%w5CWHOyVZ+%?Qy!`wB@UBlcp%w5CWHOyVZ+%?Qy z!`wB@UBlcp%ss>0Gt521+%wEQ!`w5>J;U5H%ss>0Gt521+%wEQ!`w5>J;U5H%ss>0 zGt521+%wEQ!`w5>J;U5H%ss>0Gt521+%wEQ!`w5>J;U5H%ss>0Gt521+%wEQ!`w5> zJ;U5H%ss>0Gt521+%wEQ!`w5>J;U5H%ss>0Gt521+%wEQ!`w5>J;U5H%ss>0Gt521 z+%wEQ!`w5>J;U5H%ss>0Gt521+>7uPq}xf9Ds)jna$f%}8|i%NYKTTln*1!Mw7S%g z`kG&2q_rgJD9xr*^ciU^?@B%To&5Sh{w9B??@Ezm8Kgt>3%M=d%23&>TXmT(XU6l| zT660RnJyirhs@ILIzR@>Xc;93biW?fBO0JLbOiIY*Fri@Cd+wQuft@5JTEip-|k5V z?Mu{AZ6xvZXe;Cs87w<>w+__t8m*pgV$Mo(SqA89T3FUfE4`xYw1*DXPMX2a2IzS` zAZ=;O#%Oo;7pJAPfcW*0RFhitZ56ewuFzC1BwtE>*&z8ff|>T|G*&iM?usk5WrvK@ zi}IHA)aNuwPijS7%Bll2m%gDF^q}_EZJH{bSp7ITB3C6%i>RZn9@m??UaCqvO_xoQ zK_071BVDLDxn5F_X>0O(L6Rg&K9I_EoZFZwKXdPqz0yZ2$*b~;>}URd+D41Bj~Oyc zp3+6SMsrC8c}|AtYTYaQWQ#Us&u8U=hU?qJd?Q~=dHF%Y<&I2}>vB_{){o>v3DJ7m zlN=n76LL{9K_p)OMcgh~D1EhqMu|^GlK1JdL?+SHo|COoLo368`kE>IWwC7Go$0br z>uW>tll3FyFhKgT|H7<&KliUet{=#Da&cekXkPW}Q~H%OAWro^ z?{G~f?@ho>K;t1y>MLVFthLN1_xU&v+?EmWaV7aEK&Idpzn8)PgW6GNYCdM)tyLIN zjdeChCkwP8BlqdgGFpE@LCw(B407}`%x)|#$YUdYj$N$MN*cj=XA@_aR_HO4{U)KQ z66C%PdK`<&cB82#x>&d9Mm?lunByf$VPDr!%tN`NL&{&An!Im7H*bMuG3|^dgCq#HeusM3n090x>hG#^bew)K-?5hc>~@Ww z)^2)9ddXPbLv9@Dq$kKmU;Prkbq23tIvX^4vfm6CcTA7pj-h!AhBrr)<#=Vf1Aku0`2=!e2-+JR+iV~B>dbZPswuJsDUofi8=|U7S%bj6E9g#PG_kH z5(#9d6}Yv;KfaUCP-7HozahQ2*Bs7?Wn{J7f?-EtQBIUqTYr)_(b*Q}DL{7ef>Jp$ z+lKuf##f#Z4=wg08{=Vp6h74iCN$83Tua3XLpV#`L=E?0=OW(g&K}xp5Pooo{TGl0 z(hz+Nlf{e`xLFeaTZZejW{vS^_j@@iPs629I2$T+!ElAX2lG>;y^Lg@FXTNqR1=>a z!!9mL1ADa;O~R=zOJ6k9U0Y_I*;Ayt#^F^MSsm~#zEUc$W#Ndw$528Rv6=Q@I!fY*4mT}}JR6qNi1-g^!osf!ZgwFCR>Eno4k zw+sZkCen}N{TXei)p6!rIv3|ViU*8k-&eu@CZ19TpP5E|*-RZ8$2%#kq!hl|PJ^g9 zJ#{$>euirCF}~-4+-P|HH(X~E3i(l9)HCp^1llUA1@wk&mhKuuEjl5Ev=;TPxDMsc zTjB9pa2m;ceRYmV`J8R4CYqA564n7D{F0S zK#p3G+md9?g-20%M>neK0cnJnE~6qP!qs;q9WVSIr#&X`O9I|;m@0pW-@~P@Y-62g zP)8R1>dJ5!G70z0 zCtdJ}qU^~9?|SU{7kpwhPg#1x{@r*)ZoDl3$7+CQ1i%V=g4322r;yV@c*1r#+LXO^ zLPMpgXRlGqu8_r6_{mGKb{}_mBonk4{@~Yw=r zQkqQW(M`NsCNIN-W-#*=9I=>=qSKfP&*z})ExK95@rm#8hHTQ4tX+}Q=;#s}+AVwF zLVNvO+E6D}(pR*G**E1nxO_lP+u*yO((i15BQ4oqC_BuHW8|UAG?6f}^gWrFhNr%) zTT$W~?pcByy`g<{0~O~r(Ekjd$iN$mkoU#-MLhZ_O0F*u{~md&k7xA4T{fa77bRyW zPXi^IQFSFZzS-NJp;3Ehf`0*+FiUr^&#F|WZFqtyb0O~V3*LGGSFI9fj>D|9Rh z`jH%8)VJkz^kH>-4eW1=K6Yp&@(>8d)$oit{P#sxlYma%!Ywmk)Icx|hHFRhfltY2 zYw~)YP9=aVM_J<%s#O_goCmK8;Cwf5-2y0dBR<-gyM@Aq{8S1DJq6>Md#Q;z$oyB- zpab}Yz|HaKJcOrssh~WUIuyvfX=JAhD87M`8=;ea@Num!(Ge)^IGm27yBG|+>XHAp z(vF_62FfjjfAmCkzmU0svQ`82DqN_d=jbb&6P3s+K9y3$j)19?>4pCeQnSfs)2v?i zH2is)y3T_O0@gZ!SN6I!f3a=4#fY8H&2sV&%ZCT!UZA5T+T*Ms~r zT%ZD8989elEGIRGzA0~`sg3yE=Tz#pbT=Kf6TEwXKeh$Qeav_Y4{u7&(&=u-9Y$1Ny-w}@7I$w)-*AZjvpt!6gKQ_!!4F_h zC-M8?^a)Kt>n`f+gla#)YsTR+y`(5vN+)M+Sl<%5#OLYerl9!~IO1S>gcelrNVKq3 zu91K1)>g?LovRV#btpdg7W=QlTHhx(SI9wW>TPj7O{Ja69J$DNDc(H?->=D5@|MQ_ zqu|+iav#J!3c}mItj4nNF-rd#XGw*-0d%K5s5afH5hda1DRS*2gZ;r}IjGfRy$$F$ zhOm~c)X>IgD+5K&1Kl?4$qy^5Ya$*|nO?0iRk|yFHVL=z(J{Y?2S-six>2X5ai=fn zOsC`CML_ZqtFfA~j6L~eicW=(#qi5pbR)Y&`THHJ<~1;ziY_m){+V2#&YHq-1s|QI zr_I?}C(XmXcd*Z`RKnKWe|PB@EpW8lcw;9zuyDB7OD^FVyBSkdIkZa-W*S0#Q})vyT^|F5VsN@B zUfT)1X6Vn7Ow8x3z8U=ageUQ1P}xHspLb?e`N&5ud@vbTXn;qh(D zj2cC~9<38NXI&wq4z6{Oer*l+3c$l+S#=IM%+=%Qs4xCJgIaMXs~<_mp^nRBb{qpk zwlU||WGR$=uV*Lk;=KWMp$>QWfLgPGeeIA%sA5`{Vw=?ISe-K+AJ*|XNC9xIO>Xa?#8En0t58jH62A?Ft>V0M88!`J%{N))9lDKl z_&%I7u@Bv?WDRHN0PCXe()2&5iQaKD{GLfYS3LTjyg)}Yo5-FhYb9ra+jx3Cc?|S<4OiqHX813Mo-gmo!)qA7kE3E-aH!>dm6hg zK?gK}Zu};CO(P!@;PgN6yjO6fohb1P-nENL3Wp; z@`89$N!0QcHFG$6t_{xDVdWHR{6pMtIx8+gul^KBw$s0!KO53cwK z7e7INl}vsvvDPE-eH=az1-^ITb~Cat3VuIJ)=tpZ%w_%0QF+#w-+^r&I<|ju8VQ4= z*U)hh@HhaU=cBj7)a$P~E9XV~TUd2I?*BTPeUAQmEFD&MYU;D}KnLJ_06p4PVmjjl z4!QXiMpYtv!>HM)lDv&&#U;toL$Jxv%XBnN*n1w_rU&(T2m4#0yL3Lt94BY>>Bu^x z@sX^g0VkQ3aQ+JWJINX78veSNb5I|B0e|dH-`)g%O{S7m$0Oe+BR{jZyY%AiHJ0pM z!?S|X^(bm-33x+>d8#v%v)FcgB$Z0E2E5m4L-zd+UA)Wb>7p@#+L=K0s){;yG4Ble z(HQP{0c1PKZt7%)zAiH)H&+Yd{OjS}U34DF4w~_tCKtT^l-#%B%+ZnBxr%2;mC3>_ z^p?pf=^oCr6MY{Aog}Jb4es9_?wyl4aO+)GG@o9jG`&WBeP9fs|BquwsjS-LT)Kgo zPf`=I!__#nJ%B0|PedRY4P?$G)QVEXcc+KXg+`{J13?e9=nvYngS+T?3`kbyggk^A zcA9=6fv%+*{PpmG(&XhM?%x_^m1YL(@?8Az2w9#@?|lfD3x><*Ic=otDx9RB#?uvL zN7uJm{~9t|4^2d(g2t@38&BrGhHW|M^@`J#&H?j7^gAt4Z6DUr8Nckx8j^W`FMNJP zRh>y^(T(bMg^v3i`442TgH8Flg^%4RM_KAK}bp*;z~0mxjZKq5i9A zd^Fik;{>~cGx&StGLXBszytQ8{*B~h3;h6HH+k$#OO}Lkx8nGBn zzl!?*hVIYGzhQhu8KV_Js|hX^%~Ok-ocB^$2cJIE3mt(oh3I-FF#k2>Y));M$O;#6 zQYcE-P!3P4MHTrD*Ev9r&f;cy(RByvOLv^OJKnVoZXD(AO;N%)w0jOEk4L-Z@Z+ZB z{{<>>Q7Y0buAXIw6UgLh>Q8?>A)HP5qo;q0Up`rH9` zdcm@-%w88C?m{kCfq%1s%VNIv#n3l5F_fg3tR6CPiCJf(Gokr)rLU)2e5BB~! z{COF#>I~Zx*z0Ypcif>oEJ&oo7)X`r&HmbRb|{3ZJo2^`p7vy~JJ{Do_H>+T+Zw&D z<+BfZs<@RdIYg2qt#rygLH?Zj$SY zARGcO=dtqBAajtsypPtWf^=b2_LJ1WTZiI(CvnsAxK=QXi=ajy($(Z@JDF+$k2CpM zN+&S^uNb0($eBH@>;)&yU0>H{afFt{v}G+pcy$%d0VQz!>trehH!4ooQU~Ac!%9xz z@0oPer>S2G2iDU;4yWqvhSyQ}u!mmv;8j)8_$AKqbx`7Tuv%lT$S&^TiG5geH$3cX zybhPeTRtH7WAV0GAU;{j@$799Oi0JskD$j&~`QYT#Uu{_+R* z8VDDHaqeJzW;i=P4bN-Av9&03AJu9jCz@gOf~~plJUDZdJH_E%@o>MOhRRR4T`sa2 z3nIPA|8Ba=GIR_hsh?%(HTKh0Re=Y}d2JJ3Hx4f4Br~({<3M=QlY9>2S>jdn@-jJE zK#dBb?>UV&zC_zs$i?%x`XS~_(ib(8-mWv6UXDI`v$mb!U6LAk0v{hv4sO7+qwHiZ zoj@0M5Kqm$k7w0`r!P?juhM5P;pyi&I-CP=^r2Kh=YCXBm#iG37WT#WPU`dMzYzWJ ztMt4D*wZdJZ~qU;DtP!S5j$ilyDN*@zv0xl0@tjA!lIblk9POc3Do4w`+zgjd>HbO zT$h(YoF!V2*F;T+qr;>-JUGRPPE~3^+bdoJO@1NjV<5tcd zWq9VAfg(%MB^*T?yErGMaZakm8Y`lKcUargsBJ!8@DRVqr^8rRed^BwX6#44`jh8H zto=N`G6!dR9}b znl&_~>!?C?7)|cZp_l#WZ7}*>#Q7;7I1M4D2CnbJk3{m|Sf}B7CsQp4A6kNTu2&i^*6+STF~6b;EHJSe2jta1-d} zMuBJ0&mKE9p^$IzfH7RJOs{sCUT8e=Q{mW$^!4}P`%*lhD_ZXa5(PmxicTSx{ReSE zZBN|}=G1i&z9o>QLgaHi`3_>=<HVE~;?R%scL*L)gTAdij!CzNzc}>E zo#FFr>dz3Z&TdMBUt!p{o4G$i5v9OWKnh;K3 zlb8M6pu^q86Sued_a?f?q(*GwT+<8vTm_vOT9a&Ugr~J&M+NSG75$8XaW(0&$5FYq zqWfx8sOqq>06AWOQ_khiHNh({`?$@z9aj0Pl(aol+q z+&BlaXK{o)tfQmur$&EIJsV0Ny@w~F*BJX5-qv0(F(S;_vc z;G@eq)15)dEpX)DKG{48a?S9@CER5Xp5BsdRYuoqC5(Oa1jCt}mg~Z(yc!7$CV^le zOvWA1Wj3B0#WG?beMCO?{{ed3M+HkoUxD=e2jTEjAUc=1uhZG}WBxT{V>*aWc;ri2&rUvN{p&crr!#9DTFb;&UA#ISt`^4Y&f>mx zG&k>WLFc8YB;`1J4`7WUX!a`A;W{Yy2b(uRcq+Po8;^UA^Y=7*(v~RhE6zwe=@Gw% zF>eub2b~XrpW9LEC|>P{OP|sYzlg7HhcAs;!8)E61mO<7&`}AxEtYf+I^PDDfAT$as2*NSPxhHf-IJofAQ?IKhL8+!XNYVtmHj(nu_lgzPY8#R^n0+P2Hv|K zmnhEXC|yzZ>#Y0%pH8IF;dMmY33R?Y@bN)Z;d8989DPIso^4%(N4vAIg004AG;sNO4e_wSDo>Qb#(j9aJC>kv^>4#JKSp*{JDizR#2IzQemf{mqT-697xQ_LA_<9WIJJHH?ruE3t zN;puB-o{R@->}xB^jtToCnK1#9Vem%JP-Jsj-weJ^+Y@)lH9$F8(f3S$9bIx?l-31 zwIX--(O4xsVIiZMz=Sk*c!+b$P@Hiyu9A%oDFsivMpyARSxo1Q+81X($6jBeE1ksd zKEn%EQiCtz1C8l*ro+nhxN3Vcok4x7&1^Hl=M-B1f_j=nPq>BW(BIOvO@`lg>bge$ ztFgZotmi7)e-qC0NgW=NmkQHZDuDZBcDjs8o|n2GPreS4om#kJFL16$MvE}^964@H zzMA3`WzoVko)jH~g{A1RmNRxQyIRH`K7^64!>h5>o4L5pOVq#dIPN@L@-P`#PbE7^ zy_?0ZkAP7lSk{~TjzP;HME8=wYP&M(Q#dn&eyBcKxdh8ggZ4DBlZ!p^>_QE4aMNtK z$3c3T3-r>TFlH!q=b!WsDfH1Z!F(%TaDzT?7U!urcy*DpMMrj+2eiK7Zja&z z`MmKax$;q8XVXhxq?b;^xuZe!0^F=dl`DkjSK+Mt4gPS1HOztEd3hpL3C-33jYY%; zF~e}qHrDADhimuni`HbS2{X+F^9J0rCp9FE+?T{JqESvQysjR5;|U`;~D zh#T+B>J--UT=04PC7w*>=6pOGt>mIB+>I}dM|G*}c8@H_4*ZeTp#T?Jp558o6&97mPO?A=8LYS4w*)5%C zJnPXxPOk4q*M~vn1y1)3$bp}1*{9X7(B~)Vk1*#w8Ou53bJjQ%Hb$Y(RWpb@-&368hp^P;Z!P$)HQAwAqa#tx?ExlMIy49cdbGYBhPR<_A8e;-&4F)SW?!G-I1k8u1vGq_=cn@| zo@Y`QK({>m*+6D~qAS0PH*LlzqtWvzcwLP%$7K2?fw5U%)@a3dWB&XFnZg`J3q!@M z?~(*b*7rbqG>dEY7si@Kln?<8Xcz$@}EkY=?70(=Hpo#dZgw|4@eja|2QB z!mghr)<)R%Kw@nK1G28$J2tyrw|5FX{!-5WnDR$r{EsPrWlGl9?K&hR#{R`q{`aN3 N|9$E1|No`C{{s=!CENf2 literal 0 HcmV?d00001 From b01beb081faa53fa53be649048c2da2c209a65e0 Mon Sep 17 00:00:00 2001 From: Zifan Jiang Date: Mon, 24 Feb 2025 12:32:48 +0000 Subject: [PATCH 2/2] add lip reading features and more meta data --- .../datasets/bobsl_islr/bobsl_islr.py | 98 +++++++++++++------ 1 file changed, 67 insertions(+), 31 deletions(-) diff --git a/sign_language_datasets/datasets/bobsl_islr/bobsl_islr.py b/sign_language_datasets/datasets/bobsl_islr/bobsl_islr.py index c8dda16..d277c44 100644 --- a/sign_language_datasets/datasets/bobsl_islr/bobsl_islr.py +++ b/sign_language_datasets/datasets/bobsl_islr/bobsl_islr.py @@ -62,6 +62,9 @@ def _info(self) -> tfds.core.DatasetInfo: features = { "id": tfds.features.Text(), "text": tfds.features.Text(), + "episode_id": tfds.features.Text(), + "start_frame": tfds.features.Scalar(dtype=np.int64), + "end_frame": tfds.features.Scalar(dtype=np.int64), } # TODO: add videos @@ -71,6 +74,9 @@ def _info(self) -> tfds.core.DatasetInfo: stride = 1 if self._builder_config.fps is None else 25 / self._builder_config.fps features["pose"] = PoseFeature(shape=(None, 1, 576, 3), header_path=pose_header_path, stride=stride) + if "lip_feature_dir" in self._builder_config.extra: + features["lip"] = tfds.features.Tensor(shape=(None, 768), dtype=np.float32) + return tfds.core.DatasetInfo( builder=self, description=_DESCRIPTION, @@ -87,7 +93,6 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): # too expensive to host the poses at the moment, need to specify a local path # poses_dir = str(dl_manager.download_and_extract(_POSE_URLS["holistic"])) poses_dir = self._builder_config.extra["poses_dir"] - poses_dir_test = self._builder_config.extra["poses_dir_test"] print(f'Generating train and valid set ...') # Copy aggregated annotations from the vgg_islr repo @@ -129,6 +134,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): TRAIN_SPLIT_NUM: defaultdict(list), VAL_SPLIT_NUM: defaultdict(list), } + # for split_idx in [VAL_SPLIT_NUM]: for split_idx in [TRAIN_SPLIT_NUM,VAL_SPLIT_NUM]: count = 0 @@ -152,15 +158,19 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): w_l = w_l if isinstance(w_l, tuple) else [w_l] for w in w_l: - examples[split_idx][pose_filename].append({ - 'idx': f"{w}-{pose_filename.replace('.pose', '')}-{i}", + episode_id = pose_filename.replace('.pose', '') + idx = f"{'train' if split_idx == TRAIN_SPLIT_NUM else 'val'}-{w}-{episode_id}-{i}" + examples[split_idx][episode_id].append({ + 'idx': idx, 'text': w, 'start_frame': s, 'end_frame': e, + 'episode_id': episode_id, }) + # DEBUG # count = count + 1 - # if count >= 20: + # if count >= 10: # break else: print(f'{pose_path} does not exist, skipping ...') @@ -181,6 +191,7 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): }, }, } + fps = 25 test_examples = [] for annotation_source, annotation in annotations['test'].items(): @@ -199,54 +210,79 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager): for i, name in enumerate(value['names']): global_time = value['global_times'][i] filename = f"{name}-{str(global_time).replace('.', '_')}" - idx = f"{gloss}-{filename}" + idx = f"test-{gloss}-{filename}" pose_path = f"{annotation_source}/{gloss}/{filename}.pose" + start_offset, end_offset = annotation['range'] + s, e = max(0, int(global_time * fps + start_offset)), int(global_time * fps + end_offset) + test_examples.append({ 'idx': idx, - 'pose_path': pose_path, + # 'pose_path': pose_path, 'text': gloss, + 'start_frame': s, + 'end_frame': e, + 'episode_id': filename.split('-')[0], }) + idxs = [item["idx"] for item in test_examples] + assert len(idxs) == len(set(idxs)), "Duplicate 'idx' values found!" + + # test_examples = test_examples[:10] # DEBUG + + # Group examples by episode_id + grouped_examples = defaultdict(list) + for example in test_examples: + grouped_examples[example['episode_id']].append(example) + test_examples = grouped_examples + print('Train:', sum([len(d) for d in examples[TRAIN_SPLIT_NUM].values()])) print('Valid:', sum([len(d) for d in examples[VAL_SPLIT_NUM].values()])) - print('Test:', len(test_examples)) + print('Test:', sum([len(d) for d in test_examples.values()])) return [ tfds.core.SplitGenerator(name=tfds.Split.TRAIN, gen_kwargs={"poses_dir": poses_dir, "examples": examples[TRAIN_SPLIT_NUM]}), tfds.core.SplitGenerator(name=tfds.Split.VALIDATION, gen_kwargs={"poses_dir": poses_dir, "examples": examples[VAL_SPLIT_NUM]}), - tfds.core.SplitGenerator(name=tfds.Split.TEST, gen_kwargs={"poses_dir": poses_dir_test, "examples": test_examples}), + tfds.core.SplitGenerator(name=tfds.Split.TEST, gen_kwargs={"poses_dir": poses_dir, "examples": test_examples}), ] - def _generate_examples(self, poses_dir: str, examples: Union[list, dict]): + def _generate_examples(self, poses_dir: str, examples: dict): """Yields examples.""" - if isinstance(examples, list): - for example in examples: - datum = {"id": example["idx"], "text": example["text"]} - mediapipe_path = path.join(poses_dir, example['pose_path']) + lip_dir = self._builder_config.extra["lip_feature_dir"] if "lip_feature_dir" in self._builder_config.extra else None - with open(mediapipe_path, "rb") as f: + for episode_id, episode_examples in examples.items(): + mediapipe_path = path.join(poses_dir, episode_id + '.pose') + + with open(mediapipe_path, "rb") as f: + buffer = f.read() + + if lip_dir: + feat_path = path.join(lip_dir, episode_id + ".npy") + lip_feat = np.load(feat_path) if os.path.exists(feat_path) else None + + for example in episode_examples: try: - pose = Pose.read(f.read()) + datum = { + "id": example["idx"], + "text": example["text"], + "episode_id": example["episode_id"], + "start_frame": example["start_frame"], + "end_frame": example["end_frame"], + } + + pose = Pose.read(buffer, start_frame=example["start_frame"], end_frame=example["end_frame"]) datum["pose"] = pose + + if lip_dir: + if lip_feat is not None: + datum['lip'] = lip_feat[example['start_frame']:example['end_frame']] + assert datum['lip'].shape[0] == pose.body.data.shape[0], \ + f"lip reading feature should have the same number of frames as pose: {datum['lip'].shape[0]} vs. {pose.body.data.shape[0]}" + else: + print(f'WARNING: {feat_path} not found ...') + datum['lip'] = np.zeros((pose.body.data.shape[0], 768), dtype=np.float32) yield datum["id"], datum except Exception as e: print(e) - elif isinstance(examples, dict): - for pose_path, episode_examples in examples.items(): - mediapipe_path = path.join(poses_dir, pose_path) - - with open(mediapipe_path, "rb") as f: - buffer = f.read() - - for episode_example in episode_examples: - try: - datum = {"id": episode_example["idx"], "text": episode_example["text"]} - datum["pose"] = Pose.read(buffer, start_frame=episode_example["start_frame"], end_frame=episode_example["end_frame"]) - - yield datum["id"], datum - except Exception as e: - print(e) -